Merge
authorduke
Wed, 05 Jul 2017 16:43:15 +0200
changeset 1488 ec72429f79c4
parent 1487 628049ac53ed (current diff)
parent 1465 27ac920c7b50 (diff)
child 1489 126f365cec6c
Merge
--- a/.hgtags-top-repo	Thu Oct 23 21:56:41 2008 -0700
+++ b/.hgtags-top-repo	Wed Jul 05 16:43:15 2017 +0200
@@ -12,3 +12,4 @@
 143c1abedb7d3095eff0f9ee5fec9bf48e3490fc jdk7-b35
 4b4f5fea8d7d0743f0c30d91fcd9bf9d96e5d2ad jdk7-b36
 744554f5a3290e11c71cd2ddb1aff49e431f9ed0 jdk7-b37
+cc47a76899ed33a2c513cb688348244c9b5a1288 jdk7-b38
--- a/corba/.hgtags	Thu Oct 23 21:56:41 2008 -0700
+++ b/corba/.hgtags	Wed Jul 05 16:43:15 2017 +0200
@@ -12,3 +12,4 @@
 3867c4d14a5bfdbb37c97b4874ccb0ee5343111c jdk7-b35
 0723891eb8d1c27e67c54163af0b4cea05a4e036 jdk7-b36
 59d5848bdedebe91cc2753acce78911bcb4a66db jdk7-b37
+08be802754b0296c91a7713b6d85a015dbcd5349 jdk7-b38
--- a/hotspot/.hgtags	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/.hgtags	Wed Jul 05 16:43:15 2017 +0200
@@ -12,3 +12,4 @@
 5fa96a5a7e76da7c8dad12486293a0456c2c116c jdk7-b35
 e91159f921a58af3698e6479ea1fc5818da66d09 jdk7-b36
 9ee9cf798b59e7d51f8c0a686959f313867a55d6 jdk7-b37
+d9bc824aa078573829bb66572af847e26e1bd12e jdk7-b38
--- a/hotspot/make/hotspot_distro	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/make/hotspot_distro	Wed Jul 05 16:43:15 2017 +0200
@@ -1,4 +1,4 @@
-#
+# 
 # Copyright 2006-2008 Sun Microsystems, Inc.  All Rights Reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
@@ -19,7 +19,7 @@
 # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 # CA 95054 USA or visit www.sun.com if you need additional information or
 # have any questions.
-#
+# 
 
 #
 # This file format must remain compatible with both
--- a/hotspot/make/hotspot_version	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/make/hotspot_version	Wed Jul 05 16:43:15 2017 +0200
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=14
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=05
+HS_BUILD_NUMBER=06
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/hotspot/make/linux/makefiles/top.make	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/make/linux/makefiles/top.make	Wed Jul 05 16:43:15 2017 +0200
@@ -64,6 +64,7 @@
                           $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                           $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                           $(VM)/gc_implementation/includeDB_gc_parNew \
+                          $(VM)/gc_implementation/includeDB_gc_g1     \
                           $(VM)/gc_implementation/includeDB_gc_serial \
                           $(VM)/gc_implementation/includeDB_gc_shared
 
--- a/hotspot/make/solaris/makefiles/top.make	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/make/solaris/makefiles/top.make	Wed Jul 05 16:43:15 2017 +0200
@@ -54,6 +54,7 @@
                      $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                      $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                      $(VM)/gc_implementation/includeDB_gc_parNew \
+                     $(VM)/gc_implementation/includeDB_gc_g1 \
                      $(VM)/gc_implementation/includeDB_gc_serial \
                      $(VM)/gc_implementation/includeDB_gc_shared
 
--- a/hotspot/make/windows/makefiles/generated.make	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/make/windows/makefiles/generated.make	Wed Jul 05 16:43:15 2017 +0200
@@ -50,7 +50,8 @@
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \
-           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1
 
 IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \
                 $(WorkSpace)/src/share/vm/includeDB_features
--- a/hotspot/make/windows/makefiles/makedeps.make	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/make/windows/makefiles/makedeps.make	Wed Jul 05 16:43:15 2017 +0200
@@ -64,6 +64,7 @@
         -relativeInclude src\share\vm\gc_implementation\shared \
         -relativeInclude src\share\vm\gc_implementation\parNew \
         -relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \
+        -relativeInclude src\share\vm\gc_implementation\g1 \
         -relativeInclude src\share\vm\gc_interface \
         -relativeInclude src\share\vm\asm \
         -relativeInclude src\share\vm\memory \
@@ -115,6 +116,7 @@
         -additionalFile includeDB_gc_parallel \
         -additionalFile includeDB_gc_parallelScavenge \
         -additionalFile includeDB_gc_concurrentMarkSweep \
+        -additionalFile includeDB_gc_g1 \
         -additionalFile includeDB_gc_parNew \
         -additionalFile includeDB_gc_shared \
         -additionalFile includeDB_gc_serial \
--- a/hotspot/make/windows/makefiles/vm.make	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/make/windows/makefiles/vm.make	Wed Jul 05 16:43:15 2017 +0200
@@ -117,6 +117,7 @@
   /I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\
   /I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\
   /I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\
+  /I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\
   /I "$(WorkSpace)\src\share\vm\gc_interface"\
   /I "$(WorkSpace)\src\share\vm\asm"         \
   /I "$(WorkSpace)\src\share\vm\memory"      \
@@ -146,6 +147,7 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
@@ -222,6 +224,9 @@
 {$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
         $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
 
+{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj::
+        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+
 {$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj::
         $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
 
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -130,6 +130,20 @@
   return 0x00;                  // illegal instruction 0x00000000
 }
 
+Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
+  switch (in) {
+  case rc_z:   return equal;
+  case rc_lez: return lessEqual;
+  case rc_lz:  return less;
+  case rc_nz:  return notEqual;
+  case rc_gz:  return greater;
+  case rc_gez: return greaterEqual;
+  default:
+    ShouldNotReachHere();
+  }
+  return equal;
+}
+
 // Generate a bunch 'o stuff (including v9's
 #ifndef PRODUCT
 void Assembler::test_v9() {
@@ -1213,31 +1227,19 @@
 }
 
 
-void MacroAssembler::store_check(Register tmp, Register obj) {
-  // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
-
-  /* $$$ This stuff needs to go into one of the BarrierSet generator
-     functions.  (The particular barrier sets will have to be friends of
-     MacroAssembler, I guess.) */
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+void MacroAssembler::card_table_write(jbyte* byte_map_base,
+                                      Register tmp, Register obj) {
 #ifdef _LP64
   srlx(obj, CardTableModRefBS::card_shift, obj);
 #else
   srl(obj, CardTableModRefBS::card_shift, obj);
 #endif
   assert( tmp != obj, "need separate temp reg");
-  Address rs(tmp, (address)ct->byte_map_base);
+  Address rs(tmp, (address)byte_map_base);
   load_address(rs);
   stb(G0, rs.base(), obj);
 }
 
-void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
-  store_check(tmp, obj);
-}
-
 // %%% Note:  The following six instructions have been moved,
 //            unchanged, from assembler_sparc.inline.hpp.
 //            They will be refactored at a later date.
@@ -1663,11 +1665,21 @@
 
   if (reg == G0)  return;       // always NULL, which is always an oop
 
-  char buffer[16];
+  char buffer[64];
+#ifdef COMPILER1
+  if (CommentedAssembly) {
+    snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+    block_comment(buffer);
+  }
+#endif
+
+  int len = strlen(file) + strlen(msg) + 1 + 4;
   sprintf(buffer, "%d", line);
-  int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
+  len += strlen(buffer);
+  sprintf(buffer, " at offset %d ", offset());
+  len += strlen(buffer);
   char * real_msg = new char[len];
-  sprintf(real_msg, "%s (%s:%d)", msg, file, line);
+  sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
 
   // Call indirectly to solve generation ordering problem
   Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
@@ -2059,6 +2071,27 @@
 #endif
 }
 
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, address d,
+                                     relocInfo::relocType rt ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, d, rt);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, d, rt);
+  }
+}
+
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, Label& L ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, L);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, L);
+  }
+}
+
 
 // instruction sequences factored across compiler & interpreter
 
@@ -3241,68 +3274,74 @@
   assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
 
-  // get eden boundaries
-  // note: we need both top & top_addr!
-  const Register top_addr = t1;
-  const Register end      = t2;
-
-  CollectedHeap* ch = Universe::heap();
-  set((intx)ch->top_addr(), top_addr);
-  intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
-  ld_ptr(top_addr, delta, end);
-  ld_ptr(top_addr, 0, obj);
-
-  // try to allocate
-  Label retry;
-  bind(retry);
-#ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    btst(MinObjAlignmentInBytesMask, obj);
-    br(Assembler::zero, false, Assembler::pt, L);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    br(Assembler::always, false, Assembler::pt, slow_case);
     delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
-  }
+  } else {
+    // get eden boundaries
+    // note: we need both top & top_addr!
+    const Register top_addr = t1;
+    const Register end      = t2;
+
+    CollectedHeap* ch = Universe::heap();
+    set((intx)ch->top_addr(), top_addr);
+    intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
+    ld_ptr(top_addr, delta, end);
+    ld_ptr(top_addr, 0, obj);
+
+    // try to allocate
+    Label retry;
+    bind(retry);
+#ifdef ASSERT
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      btst(MinObjAlignmentInBytesMask, obj);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
 #endif // ASSERT
-  const Register free = end;
-  sub(end, obj, free);                                   // compute amount of free space
-  if (var_size_in_bytes->is_valid()) {
-    // size is unknown at compile time
-    cmp(free, var_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, var_size_in_bytes, end);
-  } else {
-    // size is known at compile time
-    cmp(free, con_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, con_size_in_bytes, end);
-  }
-  // Compare obj with the value at top_addr; if still equal, swap the value of
-  // end with the value at top_addr. If not equal, read the value at top_addr
-  // into end.
-  casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-  // if someone beat us on the allocation, try again, otherwise continue
-  cmp(obj, end);
-  brx(Assembler::notEqual, false, Assembler::pn, retry);
-  delayed()->mov(end, obj);                              // nop if successfull since obj == end
+    const Register free = end;
+    sub(end, obj, free);                                   // compute amount of free space
+    if (var_size_in_bytes->is_valid()) {
+      // size is unknown at compile time
+      cmp(free, var_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, var_size_in_bytes, end);
+    } else {
+      // size is known at compile time
+      cmp(free, con_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, con_size_in_bytes, end);
+    }
+    // Compare obj with the value at top_addr; if still equal, swap the value of
+    // end with the value at top_addr. If not equal, read the value at top_addr
+    // into end.
+    casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+    // if someone beat us on the allocation, try again, otherwise continue
+    cmp(obj, end);
+    brx(Assembler::notEqual, false, Assembler::pn, retry);
+    delayed()->mov(end, obj);                              // nop if successfull since obj == end
 
 #ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    const Register top_addr = t1;
-
-    set((intx)ch->top_addr(), top_addr);
-    ld_ptr(top_addr, 0, top_addr);
-    btst(MinObjAlignmentInBytesMask, top_addr);
-    br(Assembler::zero, false, Assembler::pt, L);
-    delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      const Register top_addr = t1;
+
+      set((intx)ch->top_addr(), top_addr);
+      ld_ptr(top_addr, 0, top_addr);
+      btst(MinObjAlignmentInBytesMask, top_addr);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
+#endif // ASSERT
   }
-#endif // ASSERT
 }
 
 
@@ -3554,6 +3593,468 @@
   }
 }
 
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+static uint num_stores = 0;
+static uint num_null_pre_stores = 0;
+
+static void count_null_pre_vals(void* pre_val) {
+  num_stores++;
+  if (pre_val == NULL) num_null_pre_stores++;
+  if ((num_stores % 1000000) == 0) {
+    tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
+                  num_stores, num_null_pre_stores,
+                  100.0*(float)num_null_pre_stores/(float)num_stores);
+  }
+}
+
+static address satb_log_enqueue_with_frame = 0;
+static u_char* satb_log_enqueue_with_frame_end = 0;
+
+static address satb_log_enqueue_frameless = 0;
+static u_char* satb_log_enqueue_frameless_end = 0;
+
+static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+
+// The calls to this don't work.  We'd need to do a fair amount of work to
+// make it work.
+static void check_index(int ind) {
+  assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
+         "Invariants.")
+}
+
+static void generate_satb_log_enqueue(bool with_frame) {
+  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+  Register pre_val;
+
+  Label refill, restart;
+  if (with_frame) {
+    masm.save_frame(0);
+    pre_val = I0;  // Was O0 before the save.
+  } else {
+    pre_val = O0;
+  }
+  int satb_q_index_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int satb_q_buf_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+         in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
+         "check sizes in assembly below");
+
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+  if (!with_frame) {
+    // Use return-from-leaf
+    masm.retl();
+    masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  } else {
+    // Not delayed.
+    masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  }
+  if (with_frame) {
+    masm.ret();
+    masm.delayed()->restore();
+  }
+  masm.bind(refill);
+
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &SATBMarkQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L0);
+  masm.mov(G3_scratch, L1);
+  masm.mov(G4, L2);
+  // We need the value of O0 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O0, L3);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+  masm.call_VM_leaf(L5, handle_zero, G2_thread);
+  masm.mov(L0, G1_scratch);
+  masm.mov(L1, G3_scratch);
+  masm.mov(L2, G4);
+  masm.mov(L3, O0);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  if (with_frame) {
+    satb_log_enqueue_with_frame = start;
+    satb_log_enqueue_with_frame_end = masm.pc();
+  } else {
+    satb_log_enqueue_frameless = start;
+    satb_log_enqueue_frameless_end = masm.pc();
+  }
+}
+
+static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
+  if (with_frame) {
+    if (satb_log_enqueue_with_frame == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated with-frame satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
+                             satb_log_enqueue_with_frame_end,
+                             tty);
+      }
+    }
+  } else {
+    if (satb_log_enqueue_frameless == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_frameless != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated frameless satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_frameless,
+                             satb_log_enqueue_frameless_end,
+                             tty);
+      }
+    }
+  }
+}
+
+void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
+  assert(offset == 0 || index == noreg, "choose one");
+
+  if (G1DisablePreBarrier) return;
+  // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+  Label filtered;
+  // satb_log_barrier_work0(tmp, filtered);
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ld(G2,
+       in_bytes(JavaThread::satb_mark_queue_offset() +
+                PtrQueue::byte_offset_of_active()),
+       tmp);
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    ldsb(G2,
+         in_bytes(JavaThread::satb_mark_queue_offset() +
+                  PtrQueue::byte_offset_of_active()),
+         tmp);
+  }
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+  delayed() -> nop();
+
+  // satb_log_barrier_work1(tmp, offset);
+  if (index == noreg) {
+    if (Assembler::is_simm13(offset)) {
+      ld_ptr(obj, offset, tmp);
+    } else {
+      set(offset, tmp);
+      ld_ptr(obj, tmp, tmp);
+    }
+  } else {
+    ld_ptr(obj, index, tmp);
+  }
+
+  // satb_log_barrier_work2(obj, tmp, offset);
+
+  // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
+
+  const Register pre_val = tmp;
+
+  if (G1SATBBarrierPrintNullPreVals) {
+    save_frame(0);
+    mov(pre_val, O0);
+    // Save G-regs that target may use.
+    mov(G1, L1);
+    mov(G2, L2);
+    mov(G3, L3);
+    mov(G4, L4);
+    mov(G5, L5);
+    call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
+    delayed()->nop();
+    // Restore G-regs that target may have used.
+    mov(L1, G1);
+    mov(L2, G2);
+    mov(L3, G3);
+    mov(L4, G4);
+    mov(L5, G5);
+    restore(G0, G0, G0);
+  }
+
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
+  delayed() -> nop();
+
+  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
+  // case, pre_val will be a scratch G-reg, but there's some cases in which
+  // it's an O-reg.  In the first case, do a normal call.  In the latter,
+  // do a save here and call the frameless version.
+
+  guarantee(pre_val->is_global() || pre_val->is_out(),
+            "Or we need to think harder.");
+  if (pre_val->is_global() && !preserve_o_regs) {
+    generate_satb_log_enqueue_if_necessary(true); // with frame.
+    call(satb_log_enqueue_with_frame);
+    delayed()->mov(pre_val, O0);
+  } else {
+    generate_satb_log_enqueue_if_necessary(false); // with frameless.
+    save_frame(0);
+    call(satb_log_enqueue_frameless);
+    delayed()->mov(pre_val->after_save(), O0);
+    restore();
+  }
+
+  bind(filtered);
+}
+
+static jint num_ct_writes = 0;
+static jint num_ct_writes_filtered_in_hr = 0;
+static jint num_ct_writes_filtered_null = 0;
+static jint num_ct_writes_filtered_pop = 0;
+static G1CollectedHeap* g1 = NULL;
+
+static Thread* count_ct_writes(void* filter_val, void* new_val) {
+  Atomic::inc(&num_ct_writes);
+  if (filter_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_in_hr);
+  } else if (new_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_null);
+  } else {
+    if (g1 == NULL) {
+      g1 = G1CollectedHeap::heap();
+    }
+    if ((HeapWord*)new_val < g1->popular_object_boundary()) {
+      Atomic::inc(&num_ct_writes_filtered_pop);
+    }
+  }
+  if ((num_ct_writes % 1000000) == 0) {
+    jint num_ct_writes_filtered =
+      num_ct_writes_filtered_in_hr +
+      num_ct_writes_filtered_null +
+      num_ct_writes_filtered_pop;
+
+    tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
+                  "   (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
+                  num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_in_hr/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_null/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_pop/
+                  (float)num_ct_writes);
+  }
+  return Thread::current();
+}
+
+static address dirty_card_log_enqueue = 0;
+static u_char* dirty_card_log_enqueue_end = 0;
+
+// This gets to assume that o0 contains the object address.
+static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+
+  Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+  masm.srlx(O0, CardTableModRefBS::card_shift, O0);
+#else
+  masm.srl(O0, CardTableModRefBS::card_shift, O0);
+#endif
+  Address rs(O1, (address)byte_map_base);
+  masm.load_address(rs); // O1 := <card table base>
+  masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                      O2, not_already_dirty);
+  // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
+  // case, harmless if not.
+  masm.delayed()->add(O0, O1, O3);
+
+  // We didn't take the branch, so we're already dirty: return.
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->nop();
+
+  // Not dirty.
+  masm.bind(not_already_dirty);
+  // First, dirty it.
+  masm.stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+  int dirty_card_q_index_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int dirty_card_q_buf_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                      L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(O3, L1, L0);  // [_buf + index] := I0
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+  masm.bind(refill);
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &DirtyCardQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L3);
+  masm.mov(G3_scratch, L5);
+  // We need the value of O3 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O3, L6);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+
+  masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+  masm.mov(L3, G1_scratch);
+  masm.mov(L5, G3_scratch);
+  masm.mov(L6, O3);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  dirty_card_log_enqueue = start;
+  dirty_card_log_enqueue_end = masm.pc();
+  // XXX Should have a guarantee here about not going off the end!
+  // Does it already do so?  Do an experiment...
+}
+
+static inline void
+generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
+  if (dirty_card_log_enqueue == 0) {
+    generate_dirty_card_log_enqueue(byte_map_base);
+    assert(dirty_card_log_enqueue != 0, "postcondition.");
+    if (G1SATBPrintStubs) {
+      tty->print_cr("Generated dirty_card enqueue:");
+      Disassembler::decode((u_char*)dirty_card_log_enqueue,
+                           dirty_card_log_enqueue_end,
+                           tty);
+    }
+  }
+}
+
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+
+  Label filtered;
+  MacroAssembler* post_filter_masm = this;
+
+  if (new_val == G0) return;
+  if (G1DisablePostBarrier) return;
+
+  G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::G1SATBCT ||
+         bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+  if (G1RSBarrierRegionFilter) {
+    xor3(store_addr, new_val, tmp);
+#ifdef _LP64
+    srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#else
+    srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#endif
+    if (G1PrintCTFilterStats) {
+      guarantee(tmp->is_global(), "Or stats won't work...");
+      // This is a sleazy hack: I'm temporarily hijacking G2, which I
+      // promise to restore.
+      mov(new_val, G2);
+      save_frame(0);
+      mov(tmp, O0);
+      mov(G2, O1);
+      // Save G-regs that target may use.
+      mov(G1, L1);
+      mov(G2, L2);
+      mov(G3, L3);
+      mov(G4, L4);
+      mov(G5, L5);
+      call(CAST_FROM_FN_PTR(address, &count_ct_writes));
+      delayed()->nop();
+      mov(O0, G2);
+      // Restore G-regs that target may have used.
+      mov(L1, G1);
+      mov(L3, G3);
+      mov(L4, G4);
+      mov(L5, G5);
+      restore(G0, G0, G0);
+    }
+    // XXX Should I predict this taken or not?  Does it mattern?
+    br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+    delayed()->nop();
+  }
+
+  // Now we decide how to generate the card table write.  If we're
+  // enqueueing, we call out to a generated function.  Otherwise, we do it
+  // inline here.
+
+  if (G1RSBarrierUseQueue) {
+    // If the "store_addr" register is an "in" or "local" register, move it to
+    // a scratch reg so we can pass it as an argument.
+    bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+    // Pick a scratch register different from "tmp".
+    Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+    // Make sure we use up the delay slot!
+    if (use_scr) {
+      post_filter_masm->mov(store_addr, scr);
+    } else {
+      post_filter_masm->nop();
+    }
+    generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+    save_frame(0);
+    call(dirty_card_log_enqueue);
+    if (use_scr) {
+      delayed()->mov(scr, O0);
+    } else {
+      delayed()->mov(store_addr->after_save(), O0);
+    }
+    restore();
+
+  } else {
+
+#ifdef _LP64
+    post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
+#else
+    post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
+#endif
+    assert( tmp != store_addr, "need separate temp reg");
+    Address rs(tmp, (address)bs->byte_map_base);
+    load_address(rs);
+    stb(G0, rs.base(), store_addr);
+  }
+
+  bind(filtered);
+
+}
+
+#endif  // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+  // If we're writing constant NULL, we can skip the write barrier.
+  if (new_val == G0) return;
+  CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef ||
+         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+  card_table_write(bs->byte_map_base, tmp, store_addr);
+}
+
 void MacroAssembler::load_klass(Register src_oop, Register klass) {
   // The number of bytes in this code is used by
   // MachCallDynamicJavaNode::ret_addr_offset()
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1439,7 +1439,11 @@
   // pp 214
 
   void save(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
-  void save(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void save(    Register s1, int simm13a, Register d ) {
+    // make sure frame is at least large enough for the register save area
+    assert(-simm13a >= 16 * wordSize, "frame too small");
+    emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
+  }
 
   void restore( Register s1 = G0,  Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
   void restore( Register s1,       int simm13a,      Register d      ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
@@ -1594,6 +1598,11 @@
   inline void wrasi(  Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
   inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
+  // For a given register condition, return the appropriate condition code
+  // Condition (the one you would use to get the same effect after "tst" on
+  // the target register.)
+  Assembler::Condition reg_cond_to_cc_cond(RCondition in);
+
 
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
@@ -1630,6 +1639,8 @@
 
   // restore global registers in case C code disturbed them
   static void restore_registers(MacroAssembler* a, Register r);
+
+
 };
 
 
@@ -1722,6 +1733,12 @@
   void br_null   ( Register s1, bool a, Predict p, Label& L );
   void br_notnull( Register s1, bool a, Predict p, Label& L );
 
+  // These versions will do the most efficient thing on v8 and v9.  Perhaps
+  // this is what the routine above was meant to do, but it didn't (and
+  // didn't cover both target address kinds.)
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L);
+
   inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
 
@@ -2056,9 +2073,23 @@
 #endif // ASSERT
 
  public:
-  // Stores
-  void store_check(Register tmp, Register obj);                // store check for obj - register is destroyed afterwards
-  void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards
+
+  // Write to card table for - register is destroyed afterwards.
+  void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
+
+  void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+#ifndef SERIALGC
+  // Array store and offset
+  void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs);
+
+  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+  // May do filtering, depending on the boolean arguments.
+  void g1_card_table_write(jbyte* byte_map_base,
+                           Register tmp, Register obj, Register new_val,
+                           bool region_filter, bool null_filter);
+#endif // SERIALGC
 
   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
   void push_fTOS();
--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -404,4 +404,55 @@
 }
 
 
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    pre_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
+  __ delayed()->mov(pre_val_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register addr_reg = addr()->as_pointer_register();
+  Register new_val_reg = new_val()->as_register();
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    new_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id));
+  __ delayed()->mov(addr_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+}
+
+#endif // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
 #undef __
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -2093,7 +2093,11 @@
   // the known type isn't loaded since the code sanity checks
   // in debug mode and the type isn't required when we know the exact type
   // also check that the type is an array type.
-  if (op->expected_type() == NULL) {
+  // We also, for now, always call the stub if the barrier set requires a
+  // write_ref_pre barrier (which the stub does, but none of the optimized
+  // cases currently does).
+  if (op->expected_type() == NULL ||
+      Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) {
     __ mov(src,     O0);
     __ mov(src_pos, O1);
     __ mov(dst,     O2);
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -365,6 +365,10 @@
     __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info);
   }
 
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+  }
   __ move(value.result(), array_addr, null_check_info);
   if (obj_store) {
     // Is this precise?
@@ -663,6 +667,10 @@
 
   __ add(obj.result(), offset.result(), addr);
 
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    pre_barrier(obj.result(), false, NULL);
+  }
+
   if (type == objectType)
     __ cas_obj(addr, cmp.result(), val.result(), t1, t2);
   else if (type == intType)
@@ -677,7 +685,11 @@
   LIR_Opr result = rlock_result(x);
   __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
   if (type == objectType) {  // Write-barrier needed for Object fields.
+#ifdef PRECISE_CARDMARK
+    post_barrier(addr, val.result());
+#else
     post_barrier(obj.result(), val.result());
+#endif // PRECISE_CARDMARK
   }
 }
 
@@ -1154,6 +1166,10 @@
         addr = new LIR_Address(base_op, index_op, type);
       }
 
+      if (is_obj) {
+        pre_barrier(LIR_OprFact::address(addr), false, NULL);
+        // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
+      }
       __ move(data, addr);
       if (is_obj) {
         // This address is precise
--- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -832,6 +832,163 @@
       }
       break;
 
+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      { // G4: previous value of memory
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+        Register pre_val = G4;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+
+        Label refill, restart;
+        bool with_frame = false; // I don't know if we can do with-frame.
+        int satb_q_index_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int satb_q_buf_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false,
+                          Assembler::pn, tmp, refill);
+
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
+        __ sub(tmp, oopSize, tmp);
+
+        __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(pre_val, L0);
+        __ mov(tmp,     L1);
+        __ mov(tmp2,    L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         SATBMarkQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, pre_val);
+        __ mov(L1, tmp);
+        __ mov(L2, tmp2);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+        Register addr = G4;
+        Register cardtable = G5;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+        jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
+
+        Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+        __ srlx(addr, CardTableModRefBS::card_shift, addr);
+#else
+        __ srl(addr, CardTableModRefBS::card_shift, addr);
+#endif
+
+        Address rs(cardtable, (address)byte_map_base);
+        __ load_address(rs); // cardtable := <card table base>
+        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
+
+        __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                          tmp, not_already_dirty);
+        // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch
+        // case, harmless if not.
+        __ delayed()->add(addr, cardtable, tmp2);
+
+        // We didn't take the branch, so we're already dirty: return.
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->nop();
+
+        // Not dirty.
+        __ bind(not_already_dirty);
+        // First, dirty it.
+        __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
+
+        Register tmp3 = cardtable;
+        Register tmp4 = tmp;
+
+        // these registers are now dead
+        addr = cardtable = tmp = noreg;
+
+        int dirty_card_q_index_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int dirty_card_q_buf_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                          tmp3, refill);
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
+        __ sub(tmp3, oopSize, tmp3);
+
+        __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(tmp2, L0);
+        __ mov(tmp3, L1);
+        __ mov(tmp4, L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         DirtyCardQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, tmp2);
+        __ mov(L1, tmp3);
+        __ mov(L2, tmp4);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+#endif // !SERIALGC
+
     default:
       { __ set_info("unimplemented entry", dont_gc_arguments);
         __ save_frame(0);
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1110,30 +1110,31 @@
   //  The input registers are overwritten.
   //
   void gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 only
     BarrierSet* bs = Universe::heap()->barrier_set();
     if (bs->has_write_ref_pre_barrier()) {
       assert(bs->has_write_ref_array_pre_opt(),
              "Else unsupported barrier set.");
 
-      assert(addr->is_global() && count->is_global(),
-             "If not, then we have to fix this code to handle more "
-             "general cases.");
-      // Get some new fresh output registers.
       __ save_frame(0);
       // Save the necessary global regs... will be used after.
-      __ mov(addr, L0);
-      __ mov(count, L1);
-
-      __ mov(addr, O0);
+      if (addr->is_global()) {
+        __ mov(addr, L0);
+      }
+      if (count->is_global()) {
+        __ mov(count, L1);
+      }
+      __ mov(addr->after_save(), O0);
       // Get the count into O1
       __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-      __ delayed()->mov(count, O1);
-      __ mov(L0, addr);
-      __ mov(L1, count);
+      __ delayed()->mov(count->after_save(), O1);
+      if (addr->is_global()) {
+        __ mov(L0, addr);
+      }
+      if (count->is_global()) {
+        __ mov(L1, count);
+      }
       __ restore();
     }
-#endif // 0
   }
   //
   //  Generate post-write barrier for array.
@@ -1150,22 +1151,17 @@
     BarrierSet* bs = Universe::heap()->barrier_set();
 
     switch (bs->kind()) {
-#if 0 // G1 - only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
-          assert(addr->is_global() && count->is_global(),
-                 "If not, then we have to fix this code to handle more "
-                 "general cases.");
           // Get some new fresh output registers.
           __ save_frame(0);
-          __ mov(addr, O0);
+          __ mov(addr->after_save(), O0);
           __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ delayed()->mov(count, O1);
+          __ delayed()->mov(count->after_save(), O1);
           __ restore();
         }
         break;
-#endif // 0 G1 - only
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
         {
@@ -2412,8 +2408,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    gen_write_ref_array_pre_barrier(G1, G5);
-
+    gen_write_ref_array_pre_barrier(O1, O2);
 
 #ifdef ASSERT
     // We sometimes save a frame (see partial_subtype_check below).
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -28,6 +28,79 @@
 #ifndef CC_INTERP
 #define __ _masm->
 
+// Misc helpers
+
+// Do an oop store like *(base + index + offset) = val
+// index can be noreg,
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Register base,
+                         Register index,
+                         int offset,
+                         Register val,
+                         Register tmp,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(tmp != val && tmp != base && tmp != index, "register collision");
+  assert(index == noreg || offset == 0, "only one offset");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true);
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ g1_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ card_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      ShouldNotReachHere();
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 
 //----------------------------------------------------------------------------------------------------
 // Platform-dependent initialization
@@ -758,6 +831,8 @@
   // O4:        array element klass
   // O5:        value klass
 
+  // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
   // Generate a fast subtype check.  Branch to store_ok if no
   // failure.  Throw if failure.
   __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok );
@@ -767,18 +842,14 @@
 
   // Store is OK.
   __ bind(store_ok);
-  __ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  // Quote from rememberedSet.hpp: For objArrays, the precise card
-  // corresponding to the pointer store is dirtied so we don't need to
-  // scavenge the entire array.
-  Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  __ add(element, O1);              // address the element precisely
-  __ store_check(G3_scratch, O1);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true);
+
   __ ba(false,done);
   __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value)
 
   __ bind(is_null);
-  __ store_heap_oop(Otos_i, element);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true);
+
   __ profile_null_seen(G3_scratch);
   __ inc(Lesp, 3* Interpreter::stackElementSize());     // adj sp (pops array, index and value)
   __ bind(done);
@@ -2449,8 +2520,9 @@
     // atos
     __ pop_ptr();
     __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
     __ ba(false, checkVolatile);
     __ delayed()->tst(Lscratch);
 
@@ -2491,8 +2563,9 @@
     __ pop_ptr();
     pop_and_check_object(Rclass);
     __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
     patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch);
     __ ba(false, checkVolatile);
     __ delayed()->tst(Lscratch);
@@ -2646,8 +2719,7 @@
       __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
       break;
     case Bytecodes::_fast_aputfield:
-      __ store_heap_oop(Otos_i, Rclass, Roffset);
-      __ store_check(G1_scratch, Rclass, Roffset);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
       break;
     default:
       ShouldNotReachHere();
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1575,6 +1575,35 @@
   emit_operand(src, dst);
 }
 
+void Assembler::movdqu(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefix(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_byte(0xF3);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0x6F);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::movdqu(Address dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefix(dst, src);
+  emit_byte(0x0F);
+  emit_byte(0x7F);
+  emit_operand(src, dst);
+}
+
 // Uses zero extension on 64bit
 
 void Assembler::movl(Register dst, int32_t imm32) {
@@ -5935,26 +5964,30 @@
                                    Label& slow_case) {
   assert(obj == rax, "obj must be in rax, for cmpxchg");
   assert_different_registers(obj, var_size_in_bytes, t1);
-  Register end = t1;
-  Label retry;
-  bind(retry);
-  ExternalAddress heap_top((address) Universe::heap()->top_addr());
-  movptr(obj, heap_top);
-  if (var_size_in_bytes == noreg) {
-    lea(end, Address(obj, con_size_in_bytes));
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    jmp(slow_case);
   } else {
-    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
-  }
-  // if end < obj then we wrapped around => object too long => slow case
-  cmpptr(end, obj);
-  jcc(Assembler::below, slow_case);
-  cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
-  jcc(Assembler::above, slow_case);
-  // Compare obj with the top addr, and if still equal, store the new top addr in
-  // end at the address of the top addr pointer. Sets ZF if was equal, and clears
-  // it otherwise. Use lock prefix for atomicity on MPs.
-  locked_cmpxchgptr(end, heap_top);
-  jcc(Assembler::notEqual, retry);
+    Register end = t1;
+    Label retry;
+    bind(retry);
+    ExternalAddress heap_top((address) Universe::heap()->top_addr());
+    movptr(obj, heap_top);
+    if (var_size_in_bytes == noreg) {
+      lea(end, Address(obj, con_size_in_bytes));
+    } else {
+      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    cmpptr(end, obj);
+    jcc(Assembler::below, slow_case);
+    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
+    jcc(Assembler::above, slow_case);
+    // Compare obj with the top addr, and if still equal, store the new top addr in
+    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+    // it otherwise. Use lock prefix for atomicity on MPs.
+    locked_cmpxchgptr(end, heap_top);
+    jcc(Assembler::notEqual, retry);
+  }
 }
 
 void MacroAssembler::enter() {
@@ -6491,6 +6524,179 @@
   }
 }
 
+//////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                                          Register thread,
+#endif
+                                          Register tmp,
+                                          Register tmp2,
+                                          bool tosca_live) {
+  LP64_ONLY(Register thread = r15_thread;)
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  Label done;
+  Label runtime;
+
+  // if (!marking_in_progress) goto done;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    cmpb(in_progress, 0);
+  }
+  jcc(Assembler::equal, done);
+
+  // if (x.f == NULL) goto done;
+  cmpptr(Address(obj, 0), NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+
+  LP64_ONLY(movslq(tmp, index);)
+  movptr(tmp2, Address(obj, 0));
+#ifdef _LP64
+  cmpq(tmp, 0);
+#else
+  cmpl(index, 0);
+#endif
+  jcc(Assembler::equal, runtime);
+#ifdef _LP64
+  subq(tmp, wordSize);
+  movl(index, tmp);
+  addq(tmp, buffer);
+#else
+  subl(index, wordSize);
+  movl(tmp, buffer);
+  addl(tmp, index);
+#endif
+  movptr(Address(tmp, 0), tmp2);
+  jmp(done);
+  bind(runtime);
+  // save the live input values
+  if(tosca_live) push(rax);
+  push(obj);
+#ifdef _LP64
+  movq(c_rarg0, Address(obj, 0));
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
+  pop(thread);
+#endif
+  pop(obj);
+  if(tosca_live) pop(rax);
+  bind(done);
+
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+#ifndef _LP64
+                                           Register thread,
+#endif
+                                           Register tmp,
+                                           Register tmp2) {
+
+  LP64_ONLY(Register thread = r15_thread;)
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  movptr(tmp, store_addr);
+  xorptr(tmp, new_val);
+  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+  jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  cmpptr(new_val, (int32_t) NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+#ifdef _LP64
+  const Register card_addr = tmp;
+
+  movq(card_addr, store_addr);
+  shrq(card_addr, CardTableModRefBS::card_shift);
+
+  lea(tmp2, cardtable);
+
+  // get the address of the card
+  addq(card_addr, tmp2);
+#else
+  const Register card_index = tmp;
+
+  movl(card_index, store_addr);
+  shrl(card_index, CardTableModRefBS::card_shift);
+
+  Address index(noreg, card_index, Address::times_1);
+  const Register card_addr = tmp;
+  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
+#endif
+  cmpb(Address(card_addr, 0), 0);
+  jcc(Assembler::equal, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  movb(Address(card_addr, 0), 0);
+
+  cmpl(queue_index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(queue_index, wordSize);
+  movptr(tmp2, buffer);
+#ifdef _LP64
+  movslq(rscratch1, queue_index);
+  addq(tmp2, rscratch1);
+  movq(Address(tmp2, 0), card_addr);
+#else
+  addl(tmp2, queue_index);
+  movl(Address(tmp2, 0), card_index);
+#endif
+  jmp(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr);
+  push(new_val);
+#ifdef _LP64
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  pop(thread);
+#endif
+  pop(new_val);
+  pop(store_addr);
+
+  bind(done);
+
+}
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////
+
+
 void MacroAssembler::store_check(Register obj) {
   // Does a store check for the oop in register obj. The content of
   // register obj is destroyed afterwards.
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -227,9 +227,11 @@
 #endif // ASSERT
 
   // accessors
-  bool uses(Register reg) const {
-    return _base == reg || _index == reg;
-  }
+  bool        uses(Register reg) const { return _base == reg || _index == reg; }
+  Register    base()             const { return _base;  }
+  Register    index()            const { return _index; }
+  ScaleFactor scale()            const { return _scale; }
+  int         disp()             const { return _disp;  }
 
   // Convert the raw encoding form into the form expected by the constructor for
   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
@@ -1053,6 +1055,11 @@
   void movdqa(XMMRegister dst, Address src);
   void movdqa(XMMRegister dst, XMMRegister src);
 
+  // Move Unaligned Double Quadword
+  void movdqu(Address     dst, XMMRegister src);
+  void movdqu(XMMRegister dst, Address src);
+  void movdqu(XMMRegister dst, XMMRegister src);
+
   void movl(Register dst, int32_t imm32);
   void movl(Address dst, int32_t imm32);
   void movl(Register dst, Register src);
@@ -1310,7 +1317,8 @@
 // on arguments should also go in here.
 
 class MacroAssembler: public Assembler {
- friend class LIR_Assembler;
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
  protected:
 
   Address as_Address(AddressLiteral adr);
@@ -1453,6 +1461,7 @@
   // The pointer will be loaded into the thread register.
   void get_thread(Register thread);
 
+
   // Support for VM calls
   //
   // It is imperative that all calls into the VM are handled via the call_VM macros.
@@ -1527,6 +1536,22 @@
   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 
+  void g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                            Register thread,
+#endif
+                            Register tmp,
+                            Register tmp2,
+                            bool     tosca_live);
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+#ifndef _LP64
+                             Register thread,
+#endif
+                             Register tmp,
+                             Register tmp2);
+
+
   // split store_check(Register obj) to enhance instruction interleaving
   void store_check_part_1(Register obj);
   void store_check_part_2(Register obj);
--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -456,5 +456,50 @@
   __ jmp(_continuation);
 }
 
+/////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+
+  // At this point we know that marking is in progress
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+
+  __ cmpptr(pre_val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ jmp(_continuation);
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ cmpptr(new_val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(addr()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ jmp(_continuation);
+}
+
+#endif // SERIALGC
+/////////////////////////////////////////////////////////////////////////////
 
 #undef __
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -302,6 +302,8 @@
   }
 
   if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
     __ move(value.result(), array_addr, null_check_info);
     // Seems to be a precise
     post_barrier(LIR_OprFact::address(array_addr), value.result());
@@ -756,7 +758,10 @@
   __ move(obj.result(), addr);
   __ add(addr, offset.result(), addr);
 
-
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Do the pre-write barrier, if any.
+    pre_barrier(addr, false, NULL);
+  }
 
   LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
   if (type == objectType)
@@ -1286,6 +1291,8 @@
     LIR_Address* addr = new LIR_Address(src, offset, type);
     bool is_obj = (type == T_ARRAY || type == T_OBJECT);
     if (is_obj) {
+      // Do the pre-write barrier, if any.
+      pre_barrier(LIR_OprFact::address(addr), false, NULL);
       __ move(data, addr);
       assert(src->is_register(), "must be register");
       // Seems to be a precise address
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1583,6 +1583,166 @@
       }
       break;
 
+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ movptr(rax, (int)id);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ push(rax);
+        __ push(rdx);
+
+        const Register pre_val = rax;
+        const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
+        const Register tmp = rdx;
+
+        NOT_LP64(__ get_thread(thread);)
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+
+        Label done;
+        Label runtime;
+
+        // Can we store original value in the thread's buffer?
+
+        LP64_ONLY(__ movslq(tmp, queue_index);)
+#ifdef _LP64
+        __ cmpq(tmp, 0);
+#else
+        __ cmpl(queue_index, 0);
+#endif
+        __ jcc(Assembler::equal, runtime);
+#ifdef _LP64
+        __ subq(tmp, wordSize);
+        __ movl(queue_index, tmp);
+        __ addq(tmp, buffer);
+#else
+        __ subl(queue_index, wordSize);
+        __ movl(tmp, buffer);
+        __ addl(tmp, queue_index);
+#endif
+
+        // prev_val (rax)
+        f.load_argument(0, pre_val);
+        __ movptr(Address(tmp, 0), pre_val);
+        __ jmp(done);
+
+        __ bind(runtime);
+        // load the pre-value
+        __ push(rcx);
+        f.load_argument(0, rcx);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread);
+        __ pop(rcx);
+
+        __ bind(done);
+        __ pop(rdx);
+        __ pop(rax);
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+
+        // arg0: store_address
+        Address store_addr(rbp, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regsion.
+        // Must check to see if card is already dirty
+
+        const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
+
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        __ push(rax);
+        __ push(rdx);
+
+        NOT_LP64(__ get_thread(thread);)
+        ExternalAddress cardtable((address)ct->byte_map_base);
+        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+        const Register card_addr = rdx;
+#ifdef _LP64
+        const Register tmp = rscratch1;
+        f.load_argument(0, card_addr);
+        __ shrq(card_addr, CardTableModRefBS::card_shift);
+        __ lea(tmp, cardtable);
+        // get the address of the card
+        __ addq(card_addr, tmp);
+#else
+        const Register card_index = rdx;
+        f.load_argument(0, card_index);
+        __ shrl(card_index, CardTableModRefBS::card_shift);
+
+        Address index(noreg, card_index, Address::times_1);
+        __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index)));
+#endif
+
+        __ cmpb(Address(card_addr, 0), 0);
+        __ jcc(Assembler::equal, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+
+        __ movb(Address(card_addr, 0), 0);
+
+        __ cmpl(queue_index, 0);
+        __ jcc(Assembler::equal, runtime);
+        __ subl(queue_index, wordSize);
+
+        const Register buffer_addr = rbx;
+        __ push(rbx);
+
+        __ movptr(buffer_addr, buffer);
+
+#ifdef _LP64
+        __ movslq(rscratch1, queue_index);
+        __ addptr(buffer_addr, rscratch1);
+#else
+        __ addptr(buffer_addr, queue_index);
+#endif
+        __ movptr(Address(buffer_addr, 0), card_addr);
+
+        __ pop(rbx);
+        __ jmp(done);
+
+        __ bind(runtime);
+        NOT_LP64(__ push(rcx);)
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+        NOT_LP64(__ pop(rcx);)
+
+        __ bind(done);
+        __ pop(rdx);
+        __ pop(rax);
+
+      }
+      break;
+#endif // !SERIALGC
+
     default:
       { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
         __ movptr(rax, (int)id);
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -44,8 +44,13 @@
   // Note: No need to save/restore bcp & locals (r13 & r14) pointer
   //       since these are callee saved registers and no blocking/
   //       GC can happen in leaf calls.
+  // Further Note: DO NOT save/restore bcp/locals. If a caller has
+  // already saved them so that it can use esi/edi as temporaries
+  // then a save/restore here will DESTROY the copy the caller
+  // saved! There used to be a save_bcp() that only happened in
+  // the ASSERT path (no restore_bcp). Which caused bizarre failures
+  // when jvm built with ASSERTs.
 #ifdef ASSERT
-  save_bcp();
   {
     Label L;
     cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
@@ -58,24 +63,9 @@
   // super call
   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
   // interpreter specific
-#ifdef ASSERT
-  {
-    Label L;
-    cmpptr(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r13 not callee saved?");
-    bind(L);
-  }
-  {
-    Label L;
-    cmpptr(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r14 not callee saved?");
-    bind(L);
-  }
-#endif
+  // Used to ASSERT that r13/r14 were equal to frame's bcp/locals
+  // but since they may not have been saved (and we don't want to
+  // save thme here (see note above) the assert is invalid.
 }
 
 void InterpreterMacroAssembler::call_VM_base(Register oop_result,
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -712,7 +712,6 @@
   //     end     -  element count
   void  gen_write_ref_array_pre_barrier(Register start, Register count) {
     assert_different_registers(start, count);
-#if 0 // G1 only
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
@@ -721,8 +720,8 @@
           __ pusha();                      // push registers
           __ push(count);
           __ push(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
+          __ addptr(rsp, 2*wordSize);
           __ popa();
         }
         break;
@@ -734,7 +733,6 @@
         ShouldNotReachHere();
 
     }
-#endif // 0 - G1 only
   }
 
 
@@ -750,20 +748,18 @@
     BarrierSet* bs = Universe::heap()->barrier_set();
     assert_different_registers(start, count);
     switch (bs->kind()) {
-#if 0 // G1 only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
           __ pusha();                      // push registers
           __ push(count);
           __ push(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
+          __ addptr(rsp, 2*wordSize);
           __ popa();
 
         }
         break;
-#endif // 0 G1 only
 
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
@@ -795,6 +791,69 @@
     }
   }
 
+
+  // Copy 64 bytes chunks
+  //
+  // Inputs:
+  //   from        - source array address
+  //   to_from     - destination array address - from
+  //   qword_count - 8-bytes element count, negative
+  //
+  void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
+    assert( UseSSE >= 2, "supported cpu only" );
+    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
+    // Copy 64-byte chunks
+    __ jmpb(L_copy_64_bytes);
+    __ align(16);
+  __ BIND(L_copy_64_bytes_loop);
+
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(from, 0));
+      __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
+      __ movdqu(xmm1, Address(from, 16));
+      __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
+      __ movdqu(xmm2, Address(from, 32));
+      __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
+      __ movdqu(xmm3, Address(from, 48));
+      __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
+
+    } else {
+      __ movq(xmm0, Address(from, 0));
+      __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
+      __ movq(xmm1, Address(from, 8));
+      __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
+      __ movq(xmm2, Address(from, 16));
+      __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
+      __ movq(xmm3, Address(from, 24));
+      __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
+      __ movq(xmm4, Address(from, 32));
+      __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
+      __ movq(xmm5, Address(from, 40));
+      __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
+      __ movq(xmm6, Address(from, 48));
+      __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
+      __ movq(xmm7, Address(from, 56));
+      __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
+    }
+
+    __ addl(from, 64);
+  __ BIND(L_copy_64_bytes);
+    __ subl(qword_count, 8);
+    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
+    __ addl(qword_count, 8);
+    __ jccb(Assembler::zero, L_exit);
+    //
+    // length is too short, just copy qwords
+    //
+  __ BIND(L_copy_8_bytes);
+    __ movq(xmm0, Address(from, 0));
+    __ movq(Address(from, to_from, Address::times_1), xmm0);
+    __ addl(from, 8);
+    __ decrement(qword_count);
+    __ jcc(Assembler::greater, L_copy_8_bytes);
+  __ BIND(L_exit);
+  }
+
   // Copy 64 bytes chunks
   //
   // Inputs:
@@ -803,6 +862,7 @@
   //   qword_count - 8-bytes element count, negative
   //
   void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
+    assert( VM_Version::supports_mmx(), "supported cpu only" );
     Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
     // Copy 64-byte chunks
     __ jmpb(L_copy_64_bytes);
@@ -880,7 +940,7 @@
     __ subptr(to, from); // to --> to_from
     __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
     __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-    if (!aligned && (t == T_BYTE || t == T_SHORT)) {
+    if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
       // align source address at 4 bytes address boundary
       if (t == T_BYTE) {
         // One byte misalignment happens only for byte arrays
@@ -910,20 +970,26 @@
       __ mov(count, rax);      // restore 'count'
       __ jmpb(L_copy_2_bytes); // all dwords were copied
     } else {
-      // align to 8 bytes, we know we are 4 byte aligned to start
-      __ testptr(from, 4);
-      __ jccb(Assembler::zero, L_copy_64_bytes);
-      __ movl(rax, Address(from, 0));
-      __ movl(Address(from, to_from, Address::times_1, 0), rax);
-      __ addptr(from, 4);
-      __ subl(count, 1<<shift);
+      if (!UseUnalignedLoadStores) {
+        // align to 8 bytes, we know we are 4 byte aligned to start
+        __ testptr(from, 4);
+        __ jccb(Assembler::zero, L_copy_64_bytes);
+        __ movl(rax, Address(from, 0));
+        __ movl(Address(from, to_from, Address::times_1, 0), rax);
+        __ addptr(from, 4);
+        __ subl(count, 1<<shift);
+      }
     __ BIND(L_copy_64_bytes);
       __ mov(rax, count);
       __ shrl(rax, shift+1);  // 8 bytes chunk count
       //
       // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
       //
-      mmx_copy_forward(from, to_from, rax);
+      if (UseXMMForArrayCopy) {
+        xmm_copy_forward(from, to_from, rax);
+      } else {
+        mmx_copy_forward(from, to_from, rax);
+      }
     }
     // copy tailing dword
   __ BIND(L_copy_4_bytes);
@@ -1073,13 +1139,20 @@
       __ align(16);
       // Move 8 bytes
     __ BIND(L_copy_8_bytes_loop);
-      __ movq(mmx0, Address(from, count, sf, 0));
-      __ movq(Address(to, count, sf, 0), mmx0);
+      if (UseXMMForArrayCopy) {
+        __ movq(xmm0, Address(from, count, sf, 0));
+        __ movq(Address(to, count, sf, 0), xmm0);
+      } else {
+        __ movq(mmx0, Address(from, count, sf, 0));
+        __ movq(Address(to, count, sf, 0), mmx0);
+      }
     __ BIND(L_copy_8_bytes);
       __ subl(count, 2<<shift);
       __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
       __ addl(count, 2<<shift);
-      __ emms();
+      if (!UseXMMForArrayCopy) {
+        __ emms();
+      }
     }
   __ BIND(L_copy_4_bytes);
     // copy prefix qword
@@ -1147,7 +1220,11 @@
 
     __ subptr(to, from); // to --> to_from
     if (VM_Version::supports_mmx()) {
-      mmx_copy_forward(from, to_from, count);
+      if (UseXMMForArrayCopy) {
+        xmm_copy_forward(from, to_from, count);
+      } else {
+        mmx_copy_forward(from, to_from, count);
+      }
     } else {
       __ jmpb(L_copy_8_bytes);
       __ align(16);
@@ -1200,8 +1277,13 @@
     __ align(16);
   __ BIND(L_copy_8_bytes_loop);
     if (VM_Version::supports_mmx()) {
-      __ movq(mmx0, Address(from, count, Address::times_8));
-      __ movq(Address(to, count, Address::times_8), mmx0);
+      if (UseXMMForArrayCopy) {
+        __ movq(xmm0, Address(from, count, Address::times_8));
+        __ movq(Address(to, count, Address::times_8), xmm0);
+      } else {
+        __ movq(mmx0, Address(from, count, Address::times_8));
+        __ movq(Address(to, count, Address::times_8), mmx0);
+      }
     } else {
       __ fild_d(Address(from, count, Address::times_8));
       __ fistp_d(Address(to, count, Address::times_8));
@@ -1210,7 +1292,7 @@
     __ decrement(count);
     __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
 
-    if (VM_Version::supports_mmx()) {
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
       __ emms();
     }
     inc_copy_counter_np(T_LONG);
@@ -1378,9 +1460,9 @@
     Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
 
     // Copy from low to high addresses, indexed from the end of each array.
+    gen_write_ref_array_pre_barrier(to, count);
     __ lea(end_from, end_from_addr);
     __ lea(end_to,   end_to_addr);
-    gen_write_ref_array_pre_barrier(to, count);
     assert(length == count, "");        // else fix next line:
     __ negptr(count);                   // negate and test the length
     __ jccb(Assembler::notZero, L_load_element);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1153,18 +1153,26 @@
   //     Destroy no registers!
   //
   void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 - only
-    assert_different_registers(addr, c_rarg1);
-    assert_different_registers(count, c_rarg0);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
           __ pusha();                      // push registers
-          __ movptr(c_rarg0, addr);
-          __ movptr(c_rarg1, count);
-          __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre));
+          if (count == c_rarg0) {
+            if (addr == c_rarg1) {
+              // exactly backwards!!
+              __ xchgptr(c_rarg1, c_rarg0);
+            } else {
+              __ movptr(c_rarg1, count);
+              __ movptr(c_rarg0, addr);
+            }
+
+          } else {
+            __ movptr(c_rarg0, addr);
+            __ movptr(c_rarg1, count);
+          }
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
           __ popa();
         }
         break;
@@ -1172,11 +1180,10 @@
       case BarrierSet::CardTableExtension:
       case BarrierSet::ModRef:
         break;
-      default      :
+      default:
         ShouldNotReachHere();
 
     }
-#endif // 0 G1 - only
   }
 
   //
@@ -1193,7 +1200,6 @@
     assert_different_registers(start, end, scratch);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
-#if 0 // G1 - only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
 
@@ -1206,11 +1212,10 @@
           __ shrptr(scratch, LogBytesPerWord);
           __ mov(c_rarg0, start);
           __ mov(c_rarg1, scratch);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
           __ popa();
         }
         break;
-#endif // 0 G1 - only
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
         {
@@ -1239,8 +1244,13 @@
           __ decrement(count);
           __ jcc(Assembler::greaterEqual, L_loop);
         }
-      }
-   }
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }
+
 
   // Copy big chunks forward
   //
@@ -1259,14 +1269,22 @@
     Label L_loop;
     __ align(16);
   __ BIND(L_loop);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
-    __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
-    __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
-    __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+      __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+      __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
+      __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
+
+    } else {
+      __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
+      __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
+      __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
+      __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+    }
   __ BIND(L_copy_32_bytes);
     __ addptr(qword_count, 4);
     __ jcc(Assembler::lessEqual, L_loop);
@@ -1292,14 +1310,22 @@
     Label L_loop;
     __ align(16);
   __ BIND(L_loop);
-    __ movq(to, Address(from, qword_count, Address::times_8, 24));
-    __ movq(Address(dest, qword_count, Address::times_8, 24), to);
-    __ movq(to, Address(from, qword_count, Address::times_8, 16));
-    __ movq(Address(dest, qword_count, Address::times_8, 16), to);
-    __ movq(to, Address(from, qword_count, Address::times_8,  8));
-    __ movq(Address(dest, qword_count, Address::times_8,  8), to);
-    __ movq(to, Address(from, qword_count, Address::times_8,  0));
-    __ movq(Address(dest, qword_count, Address::times_8,  0), to);
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
+      __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
+      __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
+      __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
+
+    } else {
+      __ movq(to, Address(from, qword_count, Address::times_8, 24));
+      __ movq(Address(dest, qword_count, Address::times_8, 24), to);
+      __ movq(to, Address(from, qword_count, Address::times_8, 16));
+      __ movq(Address(dest, qword_count, Address::times_8, 16), to);
+      __ movq(to, Address(from, qword_count, Address::times_8,  8));
+      __ movq(Address(dest, qword_count, Address::times_8,  8), to);
+      __ movq(to, Address(from, qword_count, Address::times_8,  0));
+      __ movq(Address(dest, qword_count, Address::times_8,  0), to);
+    }
   __ BIND(L_copy_32_bytes);
     __ subptr(qword_count, 4);
     __ jcc(Assembler::greaterEqual, L_loop);
@@ -2282,7 +2308,7 @@
     // and report their number to the caller.
     assert_different_registers(rax, r14_length, count, to, end_to, rcx);
     __ lea(end_to, to_element_addr);
-    gen_write_ref_array_post_barrier(to, end_to, rcx);
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
     __ movptr(rax, r14_length);           // original oops
     __ addptr(rax, count);                // K = (original - remaining) oops
     __ notptr(rax);                       // report (-1^K) to caller
@@ -2291,7 +2317,7 @@
     // Come here on success only.
     __ BIND(L_do_card_marks);
     __ addptr(end_to, -wordSize);         // make an inclusive end pointer
-    gen_write_ref_array_post_barrier(to, end_to, rcx);
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
     __ xorptr(rax, rax);                  // return 0 on success
 
     // Common exit point (success or failure).
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -107,6 +107,78 @@
 //----------------------------------------------------------------------------------------------------
 // Miscelaneous helper routines
 
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        // We do it regardless of precise because we need the registers
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movl(rdx, obj.base());
+          }
+        } else {
+          __ leal(rdx, obj);
+        }
+        __ get_thread(rcx);
+        __ save_bcp();
+        __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
+
+        // Do the actual store
+        // noreg means NULL
+        if (val == noreg) {
+          __ movl(Address(rdx, 0), NULL_WORD);
+          // No post barrier for NULL
+        } else {
+          __ movl(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
+        }
+        __ restore_bcp();
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ movl(obj, NULL_WORD);
+        } else {
+          __ movl(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leal(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ movl(obj, NULL_WORD);
+      } else {
+        __ movl(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 Address TemplateTable::at_bcp(int offset) {
   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   return Address(rsi, offset);
@@ -876,6 +948,8 @@
   __ movptr(rax, at_tos());     // Value
   __ movl(rcx, at_tos_p1());  // Index
   __ movptr(rdx, at_tos_p2());  // Array
+
+  Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   index_check_without_pop(rdx, rcx);      // kills rbx,
   // do array store check - check for NULL value first
   __ testptr(rax, rax);
@@ -887,7 +961,7 @@
   __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
   __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
   // Compress array+index*wordSize+12 into a single register.  Frees ECX.
-  __ lea(rdx, Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ lea(rdx, element_address);
 
   // Generate subtype check.  Blows ECX.  Resets EDI to locals.
   // Superklass in EAX.  Subklass in EBX.
@@ -899,15 +973,20 @@
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movptr(rax, at_rsp());     // Value
-  __ movptr(Address(rdx, 0), rax);
-  __ store_check(rdx);
-  __ jmpb(done);
+
+  // Get the value to store
+  __ movptr(rax, at_rsp());
+  // and store it with appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
+
+  __ jmp(done);
 
   // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
   __ bind(is_null);
   __ profile_null_seen(rbx);
-  __ movptr(Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
+
+  // Store NULL, (noreg means NULL to do_oop_store)
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 
   // Pop stack arguments
   __ bind(done);
@@ -1515,7 +1594,7 @@
     // compute return address as bci in rax,
     __ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
     __ subptr(rax, Address(rcx, methodOopDesc::const_offset()));
-    // Adjust the bcp in ESI by the displacement in EDX
+    // Adjust the bcp in RSI by the displacement in EDX
     __ addptr(rsi, rdx);
     // Push return address
     __ push_i(rax);
@@ -1526,7 +1605,7 @@
 
   // Normal (non-jsr) branch handling
 
-  // Adjust the bcp in ESI by the displacement in EDX
+  // Adjust the bcp in RSI by the displacement in EDX
   __ addptr(rsi, rdx);
 
   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
@@ -2439,11 +2518,12 @@
   __ pop(atos);
   if (!is_static) pop_and_check_object(obj);
 
-  __ movptr(lo, rax );
-  __ store_check(obj, lo);  // Need to mark card
+  do_oop_store(_masm, lo, rax, _bs->kind(), false);
+
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
   }
+
   __ jmp(Done);
 
   __ bind(notObj);
@@ -2664,7 +2744,10 @@
       break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
@@ -2672,7 +2755,8 @@
   Label done;
   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
                                                Assembler::StoreStore));
-  __ jmpb(done);
+  // Barriers are so large that short branch doesn't reach!
+  __ jmp(done);
 
   // Same code as above, but don't need rdx to test for volatile.
   __ bind(notVolatile);
@@ -2694,7 +2778,10 @@
       break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
@@ -3054,8 +3141,6 @@
   Label initialize_object;  // including clearing the fields
   Label allocate_shared;
 
-  ExternalAddress heap_top((address)Universe::heap()->top_addr());
-
   __ get_cpool_and_tags(rcx, rax);
   // get instanceKlass
   __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(constantPoolOopDesc)));
@@ -3112,6 +3197,8 @@
   if (allow_shared_alloc) {
     __ bind(allocate_shared);
 
+    ExternalAddress heap_top((address)Universe::heap()->top_addr());
+
     Label retry;
     __ bind(retry);
     __ movptr(rax, heap_top);
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -115,6 +115,69 @@
 
 
 // Miscelaneous helper routines
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movq(rdx, obj.base());
+          }
+        } else {
+          __ leaq(rdx, obj);
+        }
+        __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
+        if (val == noreg) {
+          __ store_heap_oop(Address(rdx, 0), NULL_WORD);
+        } else {
+          __ store_heap_oop(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, val, r8, rbx);
+        }
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop(obj, NULL_WORD);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leaq(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop(obj, NULL_WORD);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
 
 Address TemplateTable::at_bcp(int offset) {
   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
@@ -560,8 +623,8 @@
   // rdx: array
   index_check(rdx, rax); // kills rbx
   __ load_heap_oop(rax, Address(rdx, rax,
-                       UseCompressedOops ? Address::times_4 : Address::times_8,
-                       arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+                                UseCompressedOops ? Address::times_4 : Address::times_8,
+                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 }
 
 void TemplateTable::baload() {
@@ -866,6 +929,11 @@
   __ movptr(rax, at_tos());    // value
   __ movl(rcx, at_tos_p1()); // index
   __ movptr(rdx, at_tos_p2()); // array
+
+  Address element_address(rdx, rcx,
+                          UseCompressedOops? Address::times_4 : Address::times_8,
+                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
   index_check(rdx, rcx);     // kills rbx
   // do array store check - check for NULL value first
   __ testptr(rax, rax);
@@ -879,9 +947,7 @@
                          sizeof(oopDesc) +
                          objArrayKlass::element_klass_offset_in_bytes()));
   // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
-  __ lea(rdx, Address(rdx, rcx,
-                      UseCompressedOops ? Address::times_4 : Address::times_8,
-                      arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ lea(rdx, element_address);
 
   // Generate subtype check.  Blows rcx, rdi
   // Superklass in rax.  Subklass in rbx.
@@ -893,18 +959,19 @@
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movptr(rax, at_tos()); // Value
-  __ store_heap_oop(Address(rdx, 0), rax);
-  __ store_check(rdx);
+
+  // Get the value we will store
+  __ movptr(rax, at_tos());
+  // Now store using the appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
   __ jmp(done);
 
   // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
   __ bind(is_null);
   __ profile_null_seen(rbx);
-  __ store_heap_oop(Address(rdx, rcx,
-                            UseCompressedOops ? Address::times_4 : Address::times_8,
-                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
-                    rax);
+
+  // Store a NULL
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 
   // Pop stack arguments
   __ bind(done);
@@ -2396,8 +2463,10 @@
   // atos
   __ pop(atos);
   if (!is_static) pop_and_check_object(obj);
-  __ store_heap_oop(field, rax);
-  __ store_check(obj, field); // Need to mark card
+
+  // Store into the field
+  do_oop_store(_masm, field, rax, _bs->kind(), false);
+
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
   }
@@ -2584,8 +2653,7 @@
   // access field
   switch (bytecode()) {
   case Bytecodes::_fast_aputfield:
-    __ store_heap_oop(field, rax);
-    __ store_check(rcx, field);
+    do_oop_store(_masm, field, rax, _bs->kind(), false);
     break;
   case Bytecodes::_fast_lputfield:
     __ movq(field, rax);
@@ -3044,8 +3112,6 @@
   Label initialize_header;
   Label initialize_object; // including clearing the fields
   Label allocate_shared;
-  ExternalAddress top((address)Universe::heap()->top_addr());
-  ExternalAddress end((address)Universe::heap()->end_addr());
 
   __ get_cpool_and_tags(rsi, rax);
   // get instanceKlass
@@ -3106,6 +3172,9 @@
   if (allow_shared_alloc) {
     __ bind(allocate_shared);
 
+    ExternalAddress top((address)Universe::heap()->top_addr());
+    ExternalAddress end((address)Universe::heap()->end_addr());
+
     const Register RtopAddr = rscratch1;
     const Register RendAddr = rscratch2;
 
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -242,9 +242,11 @@
   _supports_cx8 = supports_cmpxchg8();
   // if the OS doesn't support SSE, we can't use this feature even if the HW does
   if( !os::supports_sse())
-    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A);
-  if (UseSSE < 4)
-    _cpuFeatures &= ~CPU_SSE4;
+    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
+  if (UseSSE < 4) {
+    _cpuFeatures &= ~CPU_SSE4_1;
+    _cpuFeatures &= ~CPU_SSE4_2;
+  }
   if (UseSSE < 3) {
     _cpuFeatures &= ~CPU_SSE3;
     _cpuFeatures &= ~CPU_SSSE3;
@@ -261,7 +263,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -272,7 +274,8 @@
                (supports_sse2() ? ", sse2" : ""),
                (supports_sse3() ? ", sse3" : ""),
                (supports_ssse3()? ", ssse3": ""),
-               (supports_sse4() ? ", sse4" : ""),
+               (supports_sse4_1() ? ", sse4.1" : ""),
+               (supports_sse4_2() ? ", sse4.2" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow()   ? ", 3dnow"  : ""),
                (supports_3dnow2()  ? ", 3dnowext" : ""),
@@ -285,7 +288,7 @@
   // older Pentiums which do not support it.
   if( UseSSE > 4 ) UseSSE=4;
   if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4() ) // Drop to 3 if no SSE4 support
+  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
     UseSSE = MIN2((intx)3,UseSSE);
   if( !supports_sse3() ) // Drop to 2 if no SSE3 support
     UseSSE = MIN2((intx)2,UseSSE);
@@ -375,6 +378,14 @@
         MaxLoopPad = 11;
       }
 #endif // COMPILER2
+      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
+      }
+      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
+        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
     }
   }
 
@@ -413,7 +424,7 @@
 
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
-    tty->print_cr("Logical CPUs per package: %u",
+    tty->print_cr("Logical CPUs per core: %u",
                   logical_processors_per_package());
     tty->print_cr("UseSSE=%d",UseSSE);
     tty->print("Allocation: ");
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -68,9 +68,9 @@
                cmpxchg16: 1,
                         : 4,
                dca      : 1,
-                        : 4,
-               popcnt   : 1,
-                        : 8;
+               sse4_1   : 1,
+               sse4_2   : 1,
+                        : 11;
     } bits;
   };
 
@@ -177,8 +177,9 @@
      CPU_SSE2 = (1 << 7),
      CPU_SSE3 = (1 << 8), // sse3  comes from cpuid 1 (ECX)
      CPU_SSSE3= (1 << 9),
-     CPU_SSE4 = (1 <<10),
-     CPU_SSE4A= (1 <<11)
+     CPU_SSE4A= (1 <<10),
+     CPU_SSE4_1 = (1 << 11),
+     CPU_SSE4_2 = (1 << 12)
    } cpuFeatureFlags;
 
   // cpuid information block.  All info derived from executing cpuid with
@@ -240,22 +241,14 @@
   static CpuidInfo _cpuid_info;
 
   // Extractors and predicates
-  static bool is_extended_cpu_family() {
-    const uint32_t Extended_Cpu_Family = 0xf;
-    return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family;
-  }
   static uint32_t extended_cpu_family() {
     uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
-    if (is_extended_cpu_family()) {
-      result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
-    }
+    result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
     return result;
   }
   static uint32_t extended_cpu_model() {
     uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
-    if (is_extended_cpu_family()) {
-      result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
-    }
+    result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
     return result;
   }
   static uint32_t cpu_stepping() {
@@ -293,6 +286,10 @@
       result |= CPU_SSSE3;
     if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
       result |= CPU_SSE4A;
+    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0)
+      result |= CPU_SSE4_1;
+    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0)
+      result |= CPU_SSE4_2;
     return result;
   }
 
@@ -380,7 +377,8 @@
   static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
-  static bool supports_sse4()     { return (_cpuFeatures & CPU_SSE4) != 0; }
+  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
+  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   //
   // AMD features
   //
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -186,8 +186,10 @@
   if (!VM_Version::supports_sse2()) {
     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
   }
-  if (UseSSE < 4)
-    _cpuFeatures &= ~CPU_SSE4;
+  if (UseSSE < 4) {
+    _cpuFeatures &= ~CPU_SSE4_1;
+    _cpuFeatures &= ~CPU_SSE4_2;
+  }
   if (UseSSE < 3) {
     _cpuFeatures &= ~CPU_SSE3;
     _cpuFeatures &= ~CPU_SSSE3;
@@ -204,7 +206,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -215,7 +217,8 @@
                (supports_sse2() ? ", sse2" : ""),
                (supports_sse3() ? ", sse3" : ""),
                (supports_ssse3()? ", ssse3": ""),
-               (supports_sse4() ? ", sse4" : ""),
+               (supports_sse4_1() ? ", sse4.1" : ""),
+               (supports_sse4_2() ? ", sse4.2" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow()   ? ", 3dnow"  : ""),
                (supports_3dnow2()  ? ", 3dnowext" : ""),
@@ -228,7 +231,7 @@
   // older Pentiums which do not support it.
   if( UseSSE > 4 ) UseSSE=4;
   if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4() ) // Drop to 3 if no SSE4 support
+  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
     UseSSE = MIN2((intx)3,UseSSE);
   if( !supports_sse3() ) // Drop to 2 if no SSE3 support
     UseSSE = MIN2((intx)2,UseSSE);
@@ -314,6 +317,14 @@
         MaxLoopPad = 11;
       }
 #endif // COMPILER2
+      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
+      }
+      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
+        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
     }
   }
 
@@ -355,7 +366,7 @@
 
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
-    tty->print_cr("Logical CPUs per package: %u",
+    tty->print_cr("Logical CPUs per core: %u",
                   logical_processors_per_package());
     tty->print_cr("UseSSE=%d",UseSSE);
     tty->print("Allocation: ");
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -68,9 +68,9 @@
                cmpxchg16: 1,
                         : 4,
                dca      : 1,
-                        : 4,
-               popcnt   : 1,
-                        : 8;
+               sse4_1   : 1,
+               sse4_2   : 1,
+                        : 11;
     } bits;
   };
 
@@ -177,8 +177,9 @@
      CPU_SSE2 = (1 << 7),
      CPU_SSE3 = (1 << 8),
      CPU_SSSE3= (1 << 9),
-     CPU_SSE4 = (1 <<10),
-     CPU_SSE4A= (1 <<11)
+     CPU_SSE4A= (1 <<10),
+     CPU_SSE4_1 = (1 << 11),
+     CPU_SSE4_2 = (1 << 12)
    } cpuFeatureFlags;
 
   // cpuid information block.  All info derived from executing cpuid with
@@ -240,22 +241,14 @@
   static CpuidInfo _cpuid_info;
 
   // Extractors and predicates
-  static bool is_extended_cpu_family() {
-    const uint32_t Extended_Cpu_Family = 0xf;
-    return _cpuid_info.std_cpuid1_eax.bits.family == Extended_Cpu_Family;
-  }
   static uint32_t extended_cpu_family() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
-    if (is_extended_cpu_family()) {
-      result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
-    }
+    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
     return result;
   }
   static uint32_t extended_cpu_model() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
-    if (is_extended_cpu_family()) {
-      result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
-    }
+    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
     return result;
   }
   static uint32_t cpu_stepping() {
@@ -293,6 +286,10 @@
       result |= CPU_SSSE3;
     if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
       result |= CPU_SSE4A;
+    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
+      result |= CPU_SSE4_1;
+    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
+      result |= CPU_SSE4_2;
     return result;
   }
 
@@ -380,7 +377,8 @@
   static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
-  static bool supports_sse4()     { return (_cpuFeatures & CPU_SSE4) != 0; }
+  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
+  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   //
   // AMD features
   //
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Jul 05 16:43:15 2017 +0200
@@ -4810,6 +4810,16 @@
   interface(CONST_INTER);
 %}
 
+// Long Immediate zero
+operand immL_M1() %{
+  predicate( n->get_long() == -1L );
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Long immediate from 0 to 127.
 // Used for a shorter form of long mul by 10.
 operand immL_127() %{
@@ -8621,6 +8631,18 @@
   ins_pipe( ialu_reg_reg );
 %}
 
+// Xor Register with Immediate -1
+instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
+  match(Set dst (XorI dst imm));  
+
+  size(2);
+  format %{ "NOT    $dst" %}  
+  ins_encode %{
+     __ notl($dst$$Register);
+  %}
+  ins_pipe( ialu_reg );
+%}
+
 // Xor Register with Immediate
 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (XorI dst src));
@@ -8938,6 +8960,18 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
+// Xor Long Register with Immediate -1
+instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
+  match(Set dst (XorL dst imm));  
+  format %{ "NOT    $dst.lo\n\t"
+            "NOT    $dst.hi" %}
+  ins_encode %{
+     __ notl($dst$$Register);
+     __ notl(HIGH_FROM_LOW($dst$$Register));
+  %}
+  ins_pipe( ialu_reg_long );
+%}
+
 // Xor Long Register with Immediate
 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
   match(Set dst (XorL dst src));
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Jul 05 16:43:15 2017 +0200
@@ -9309,6 +9309,17 @@
   ins_pipe(ialu_reg_reg);
 %}
 
+// Xor Register with Immediate -1
+instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
+  match(Set dst (XorI dst imm));  
+
+  format %{ "not    $dst" %}  
+  ins_encode %{
+     __ notl($dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
 // Xor Register with Immediate
 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 %{
@@ -9529,6 +9540,17 @@
   ins_pipe(ialu_reg_reg);
 %}
 
+// Xor Register with Immediate -1
+instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
+  match(Set dst (XorL dst imm));  
+
+  format %{ "notq   $dst" %}  
+  ins_encode %{
+     __ notq($dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
 // Xor Register with Immediate
 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 %{
--- a/hotspot/src/os/linux/launcher/java.c	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/linux/launcher/java.c	Wed Jul 05 16:43:15 2017 +0200
@@ -1110,7 +1110,7 @@
         if (propname) {
             jclass cls;
             jmethodID mid;
-            NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System"));
+            NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System"));
             NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                                    env, cls,
                                    "getProperty",
@@ -1125,7 +1125,7 @@
 static jboolean isEncodingSupported(JNIEnv *env, jstring enc) {
     jclass cls;
     jmethodID mid;
-    NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset"));
+    NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset"));
     NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                            env, cls,
                            "isSupported",
@@ -1161,7 +1161,7 @@
 #else
             if (isEncodingSupported(env, enc) == JNI_TRUE) {
 #endif
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([BLjava/lang/String;)V"));
                 str = (*env)->NewObject(env, cls, mid, ary, enc);
@@ -1172,7 +1172,7 @@
                   the encoding name, in which the StringCoding class will
                   pickup the iso-8859-1 as the fallback converter for us.
                 */
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([B)V"));
                 str = (*env)->NewObject(env, cls, mid, ary);
@@ -1195,7 +1195,7 @@
     jarray ary;
     int i;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
     NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0));
     for (i = 0; i < strc; i++) {
         jstring str = NewPlatformString(env, *strv++);
@@ -1224,6 +1224,7 @@
         c = *t++;
         *s++ = (c == '.') ? '/' : c;
     } while (c != '\0');
+    // use the application class loader for main-class
     cls = (*env)->FindClass(env, buf);
     free(buf);
 
@@ -1250,7 +1251,7 @@
     jobject jar, man, attr;
     jstring str, result = 0;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile"));
     NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "(Ljava/lang/String;)V"));
     NULL_CHECK0(str = NewPlatformString(env, jarname));
@@ -1471,7 +1472,7 @@
     jclass ver;
     jmethodID print;
 
-    NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version"));
+    NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version"));
     NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V"));
 
     (*env)->CallStaticVoidMethod(env, ver, print);
--- a/hotspot/src/os/linux/launcher/java.h	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/linux/launcher/java.h	Wed Jul 05 16:43:15 2017 +0200
@@ -100,5 +100,15 @@
  * Make launcher spit debug output.
  */
 extern jboolean _launcher_debug;
+/*
+ * This allows for finding classes from the VM's bootstrap class loader
+ * directly, FindClass uses the application class loader internally, this will
+ * cause unnecessary searching of the classpath for the required classes.
+ */
+typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env,
+                                                const char *name,
+                                                jboolean throwError));
+
+jclass FindBootStrapClass(JNIEnv *env, const char *classname);
 
 #endif /* _JAVA_H_ */
--- a/hotspot/src/os/linux/launcher/java_md.c	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/linux/launcher/java_md.c	Wed Jul 05 16:43:15 2017 +0200
@@ -1826,3 +1826,23 @@
 {
     return(borrowed_unsetenv(name));
 }
+/*
+ * The implementation for finding classes from the bootstrap
+ * class loader, refer to java.h
+ */
+static FindClassFromBootLoader_t *findBootClass = NULL;
+
+jclass
+FindBootStrapClass(JNIEnv *env, const char* classname)
+{
+   if (findBootClass == NULL) {
+       findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT,
+          "JVM_FindClassFromBootLoader");
+       if (findBootClass == NULL) {
+           fprintf(stderr, "Error: could load method JVM_FindClassFromBootLoader");
+           return NULL;
+       }
+   }
+   return findBootClass(env, classname, JNI_FALSE);
+}
+
--- a/hotspot/src/os/linux/vm/globals_linux.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/linux/vm/globals_linux.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -38,5 +38,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1261,6 +1261,17 @@
   return (1000 * 1000);
 }
 
+// For now, we say that linux does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime()   { return false; }
+bool os::vtime_enabled()  { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
   timeval time;
   int status = gettimeofday(&time, NULL);
--- a/hotspot/src/os/solaris/launcher/java.c	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/solaris/launcher/java.c	Wed Jul 05 16:43:15 2017 +0200
@@ -1110,7 +1110,7 @@
         if (propname) {
             jclass cls;
             jmethodID mid;
-            NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System"));
+            NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System"));
             NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                                    env, cls,
                                    "getProperty",
@@ -1125,7 +1125,7 @@
 static jboolean isEncodingSupported(JNIEnv *env, jstring enc) {
     jclass cls;
     jmethodID mid;
-    NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset"));
+    NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset"));
     NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                            env, cls,
                            "isSupported",
@@ -1161,7 +1161,7 @@
 #else
             if (isEncodingSupported(env, enc) == JNI_TRUE) {
 #endif
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([BLjava/lang/String;)V"));
                 str = (*env)->NewObject(env, cls, mid, ary, enc);
@@ -1172,7 +1172,7 @@
                   the encoding name, in which the StringCoding class will
                   pickup the iso-8859-1 as the fallback converter for us.
                 */
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([B)V"));
                 str = (*env)->NewObject(env, cls, mid, ary);
@@ -1195,7 +1195,7 @@
     jarray ary;
     int i;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
     NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0));
     for (i = 0; i < strc; i++) {
         jstring str = NewPlatformString(env, *strv++);
@@ -1224,6 +1224,7 @@
         c = *t++;
         *s++ = (c == '.') ? '/' : c;
     } while (c != '\0');
+    // use the application class loader for the main-class
     cls = (*env)->FindClass(env, buf);
     free(buf);
 
@@ -1250,7 +1251,7 @@
     jobject jar, man, attr;
     jstring str, result = 0;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile"));
     NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "(Ljava/lang/String;)V"));
     NULL_CHECK0(str = NewPlatformString(env, jarname));
@@ -1471,7 +1472,7 @@
     jclass ver;
     jmethodID print;
 
-    NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version"));
+    NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version"));
     NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V"));
 
     (*env)->CallStaticVoidMethod(env, ver, print);
--- a/hotspot/src/os/solaris/launcher/java.h	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/solaris/launcher/java.h	Wed Jul 05 16:43:15 2017 +0200
@@ -101,4 +101,15 @@
  */
 extern jboolean _launcher_debug;
 
+/*
+ * This allows for finding classes from the VM's bootstrap class loader
+ * directly, FindClass uses the application class loader internally, this will
+ * cause unnecessary searching of the classpath for the required classes.
+ */
+typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env,
+                                                const char *name,
+                                                jboolean throwError));
+
+jclass FindBootStrapClass(JNIEnv *env, const char *classname);
+
 #endif /* _JAVA_H_ */
--- a/hotspot/src/os/solaris/launcher/java_md.c	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/solaris/launcher/java_md.c	Wed Jul 05 16:43:15 2017 +0200
@@ -1826,3 +1826,24 @@
 {
     return(borrowed_unsetenv(name));
 }
+
+/*
+ * The implementation for finding classes from the bootstrap
+ * class loader, refer to java.h
+ */
+static FindClassFromBootLoader_t *findBootClass = NULL;
+
+jclass
+FindBootStrapClass(JNIEnv *env, const char* classname)
+{
+   if (findBootClass == NULL) {
+       findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT,
+          "JVM_FindClassFromBootLoader");
+       if (findBootClass == NULL) {
+           fprintf(stderr, "Error: could not load method JVM_FindClassFromBootLoader");
+           return NULL;
+       }
+   }
+   return findBootClass(env, classname, JNI_FALSE);
+}
+
--- a/hotspot/src/os/solaris/vm/globals_solaris.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/solaris/vm/globals_solaris.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -44,5 +44,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, true);
+define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, false);
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -462,16 +462,14 @@
   int online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
   pid_t pid = getpid();
   psetid_t pset = PS_NONE;
-  // Are we running in a processor set?
+  // Are we running in a processor set or is there any processor set around?
   if (pset_bind(PS_QUERY, P_PID, pid, &pset) == 0) {
-    if (pset != PS_NONE) {
-      uint_t pset_cpus;
-      // Query number of cpus in processor set
-      if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) {
-        assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check");
-        _processors_online = pset_cpus;
-        return pset_cpus;
-      }
+    uint_t pset_cpus;
+    // Query the number of cpus available to us.
+    if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) {
+      assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check");
+      _processors_online = pset_cpus;
+      return pset_cpus;
     }
   }
   // Otherwise return number of online cpus
@@ -1691,6 +1689,40 @@
   }
 }
 
+bool os::supports_vtime() { return true; }
+
+bool os::enable_vtime() {
+  int fd = open("/proc/self/ctl", O_WRONLY);
+  if (fd == -1)
+    return false;
+
+  long cmd[] = { PCSET, PR_MSACCT };
+  int res = write(fd, cmd, sizeof(long) * 2);
+  close(fd);
+  if (res != sizeof(long) * 2)
+    return false;
+
+  return true;
+}
+
+bool os::vtime_enabled() {
+  int fd = open("/proc/self/status", O_RDONLY);
+  if (fd == -1)
+    return false;
+
+  pstatus_t status;
+  int res = read(fd, (void*) &status, sizeof(pstatus_t));
+  close(fd);
+  if (res != sizeof(pstatus_t))
+    return false;
+
+  return status.pr_flags & PR_MSACCT;
+}
+
+double os::elapsedVTime() {
+  return (double)gethrvtime() / (double)hrtime_hz;
+}
+
 // Used internally for comparisons only
 // getTimeMillis guaranteed to not move backwards on Solaris
 jlong getTimeMillis() {
@@ -2688,7 +2720,7 @@
    return bottom;
 }
 
-// Detect the topology change. Typically happens during CPU pluggin-unplugging.
+// Detect the topology change. Typically happens during CPU plugging-unplugging.
 bool os::numa_topology_changed() {
   int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie());
   if (is_stale != -1 && is_stale) {
--- a/hotspot/src/os/windows/vm/globals_windows.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/windows/vm/globals_windows.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -37,5 +37,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePagesIndividualAllocation, true);
 define_pd_global(bool, UseOSErrorReporting, false);  // for now.
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -737,6 +737,17 @@
   return result;
 }
 
+// For now, we say that Windows does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime() { return false; }
+bool os::vtime_enabled() { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
   if (UseFakeTimers) {
     return fake_time++;
@@ -2582,9 +2593,104 @@
 }
 
 char* os::reserve_memory_special(size_t bytes) {
-  DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
-  char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_EXECUTE_READWRITE);
-  return res;
+
+  if (UseLargePagesIndividualAllocation) {
+    if (TracePageSizes && Verbose) {
+       tty->print_cr("Reserving large pages individually.");
+    }
+    char * p_buf;
+    // first reserve enough address space in advance since we want to be
+    // able to break a single contiguous virtual address range into multiple
+    // large page commits but WS2003 does not allow reserving large page space
+    // so we just use 4K pages for reserve, this gives us a legal contiguous
+    // address space. then we will deallocate that reservation, and re alloc
+    // using large pages
+    const size_t size_of_reserve = bytes + _large_page_size;
+    if (bytes > size_of_reserve) {
+      // Overflowed.
+      warning("Individually allocated large pages failed, "
+        "use -XX:-UseLargePagesIndividualAllocation to turn off");
+      return NULL;
+    }
+    p_buf = (char *) VirtualAlloc(NULL,
+                                 size_of_reserve,  // size of Reserve
+                                 MEM_RESERVE,
+                                 PAGE_EXECUTE_READWRITE);
+    // If reservation failed, return NULL
+    if (p_buf == NULL) return NULL;
+
+    release_memory(p_buf, bytes + _large_page_size);
+    // round up to page boundary.  If the size_of_reserve did not
+    // overflow and the reservation did not fail, this align up
+    // should not overflow.
+    p_buf = (char *) align_size_up((size_t)p_buf, _large_page_size);
+
+    // now go through and allocate one page at a time until all bytes are
+    // allocated
+    size_t  bytes_remaining = align_size_up(bytes, _large_page_size);
+    // An overflow of align_size_up() would have been caught above
+    // in the calculation of size_of_reserve.
+    char * next_alloc_addr = p_buf;
+
+#ifdef ASSERT
+    // Variable for the failure injection
+    long ran_num = os::random();
+    size_t fail_after = ran_num % bytes;
+#endif
+
+    while (bytes_remaining) {
+      size_t bytes_to_rq = MIN2(bytes_remaining, _large_page_size);
+      // Note allocate and commit
+      char * p_new;
+
+#ifdef ASSERT
+      bool inject_error = LargePagesIndividualAllocationInjectError &&
+          (bytes_remaining <= fail_after);
+#else
+      const bool inject_error = false;
+#endif
+
+      if (inject_error) {
+        p_new = NULL;
+      } else {
+        p_new = (char *) VirtualAlloc(next_alloc_addr,
+                                    bytes_to_rq,
+                                    MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
+                                    PAGE_EXECUTE_READWRITE);
+      }
+
+      if (p_new == NULL) {
+        // Free any allocated pages
+        if (next_alloc_addr > p_buf) {
+          // Some memory was committed so release it.
+          size_t bytes_to_release = bytes - bytes_remaining;
+          release_memory(p_buf, bytes_to_release);
+        }
+#ifdef ASSERT
+        if (UseLargePagesIndividualAllocation &&
+            LargePagesIndividualAllocationInjectError) {
+          if (TracePageSizes && Verbose) {
+             tty->print_cr("Reserving large pages individually failed.");
+          }
+        }
+#endif
+        return NULL;
+      }
+      bytes_remaining -= bytes_to_rq;
+      next_alloc_addr += bytes_to_rq;
+    }
+
+    return p_buf;
+
+  } else {
+    // normal policy just allocate it all at once
+    DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
+    char * res = (char *)VirtualAlloc(NULL,
+                                      bytes,
+                                      flag,
+                                      PAGE_EXECUTE_READWRITE);
+    return res;
+  }
 }
 
 bool os::release_memory_special(char* base, size_t bytes) {
@@ -2972,6 +3078,7 @@
 volatile intx os::win32::_os_thread_count    = 0;
 
 bool   os::win32::_is_nt              = false;
+bool   os::win32::_is_windows_2003    = false;
 
 
 void os::win32::initialize_system_info() {
@@ -2994,7 +3101,15 @@
   GetVersionEx(&oi);
   switch(oi.dwPlatformId) {
     case VER_PLATFORM_WIN32_WINDOWS: _is_nt = false; break;
-    case VER_PLATFORM_WIN32_NT:      _is_nt = true;  break;
+    case VER_PLATFORM_WIN32_NT:
+      _is_nt = true;
+      {
+        int os_vers = oi.dwMajorVersion * 1000 + oi.dwMinorVersion;
+        if (os_vers == 5002) {
+          _is_windows_2003 = true;
+        }
+      }
+      break;
     default: fatal("Unknown platform");
   }
 
@@ -3092,9 +3207,13 @@
     NoYieldsInMicrolock = true;
   }
 #endif
+  // This may be overridden later when argument processing is done.
+  FLAG_SET_ERGO(bool, UseLargePagesIndividualAllocation,
+    os::win32::is_windows_2003());
+
   // Initialize main_process and main_thread
   main_process = GetCurrentProcess();  // Remember main_process is a pseudo handle
-  if (!DuplicateHandle(main_process, GetCurrentThread(), main_process,
+ if (!DuplicateHandle(main_process, GetCurrentThread(), main_process,
                        &main_thread, THREAD_ALL_ACCESS, false, 0)) {
     fatal("DuplicateHandle failed\n");
   }
--- a/hotspot/src/os/windows/vm/os_windows.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/os/windows/vm/os_windows.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -34,6 +34,7 @@
   static julong _physical_memory;
   static size_t _default_stack_size;
   static bool   _is_nt;
+  static bool   _is_windows_2003;
 
  public:
   // Windows-specific interface:
@@ -60,6 +61,9 @@
   // Tells whether the platform is NT or Windown95
   static bool is_nt() { return _is_nt; }
 
+  // Tells whether the platform is Windows 2003
+  static bool is_windows_2003() { return _is_windows_2003; }
+
   // Returns the byte size of a virtual memory page
   static int vm_page_size() { return _vm_page_size; }
 
--- a/hotspot/src/share/vm/adlc/formssel.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/adlc/formssel.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -3768,6 +3768,10 @@
 int MatchRule::is_ideal_copy() const {
   if( _rChild ) {
     const char  *opType = _rChild->_opType;
+#if 1
+    if( strcmp(opType,"CastIP")==0 )
+      return 1;
+#else
     if( strcmp(opType,"CastII")==0 )
       return 1;
     // Do not treat *CastPP this way, because it
@@ -3787,6 +3791,7 @@
     //  return 1;
     //if( strcmp(opType,"CastP2X")==0 )
     //  return 1;
+#endif
   }
   if( is_chain_rule(_AD.globalNames()) &&
       _lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 )
--- a/hotspot/src/share/vm/asm/assembler.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/asm/assembler.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -249,8 +249,6 @@
 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   // Exception handler checks the nmethod's implicit null checks table
   // only when this method returns false.
-#ifndef SPARC
-  // Sparc does not have based addressing
   if (UseCompressedOops) {
     // The first page after heap_base is unmapped and
     // the 'offset' is equal to [heap_base + offset] for
@@ -261,7 +259,6 @@
       offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
     }
   }
-#endif // SPARC
   return offset < 0 || os::vm_page_size() <= offset;
 }
 
--- a/hotspot/src/share/vm/c1/c1_CodeStubs.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_CodeStubs.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -482,3 +482,81 @@
   virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); }
 #endif // PRODUCT
 };
+
+//////////////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+// Code stubs for Garbage-First barriers.
+class G1PreBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _pre_val;
+  LIR_PatchCode _patch_code;
+  CodeEmitInfo* _info;
+
+ public:
+  // pre_val (a temporary register) must be a register;
+  // addr (the address of the field to be read) must be a LIR_Address
+  G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) :
+    _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info)
+  {
+    assert(_pre_val->is_register(), "should be temporary register");
+    assert(_addr->is_address(), "should be the address of the field");
+  }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr pre_val() const { return _pre_val; }
+  LIR_PatchCode patch_code() const { return _patch_code; }
+  CodeEmitInfo* info() const { return _info; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast
+    // path
+    if (_info != NULL)
+      visitor->do_slow_case(_info);
+    else
+      visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_temp(_pre_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); }
+#endif // PRODUCT
+};
+
+class G1PostBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _new_val;
+
+  static jbyte* _byte_map_base;
+  static jbyte* byte_map_base_slow();
+  static jbyte* byte_map_base() {
+    if (_byte_map_base == NULL) {
+      _byte_map_base = byte_map_base_slow();
+    }
+    return _byte_map_base;
+  }
+
+ public:
+  // addr (the address of the object head) and new_val must be registers.
+  G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr new_val() const { return _new_val; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast path
+    visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_input(_new_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
+#endif // PRODUCT
+};
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////////////
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -74,6 +74,7 @@
 LIR_Assembler::LIR_Assembler(Compilation* c):
    _compilation(c)
  , _masm(c->masm())
+ , _bs(Universe::heap()->barrier_set())
  , _frame_map(c->frame_map())
  , _current_block(NULL)
  , _pending_non_safepoint(NULL)
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -24,11 +24,13 @@
 
 class Compilation;
 class ScopeValue;
+class BarrierSet;
 
 class LIR_Assembler: public CompilationResourceObj {
  private:
   C1_MacroAssembler* _masm;
   CodeStubList*      _slow_case_stubs;
+  BarrierSet*        _bs;
 
   Compilation*       _compilation;
   FrameMap*          _frame_map;
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -285,16 +285,7 @@
 
 
 void LIRGenerator::init() {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-#ifdef _LP64
-  _card_table_base = new LIR_Const((jlong)ct->byte_map_base);
-#else
-  _card_table_base = new LIR_Const((jint)ct->byte_map_base);
-#endif
+  _bs = Universe::heap()->barrier_set();
 }
 
 
@@ -1239,8 +1230,37 @@
 
 // Various barriers
 
+void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  // Do the pre-write barrier, if any.
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info);
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      // No pre barriers
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      // No pre barriers
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
-  switch (Universe::heap()->barrier_set()->kind()) {
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_post_barrier(addr,  new_val);
+      break;
+#endif // SERIALGC
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
       CardTableModRef_post_barrier(addr,  new_val);
@@ -1254,11 +1274,120 @@
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  if (G1DisablePreBarrier) return;
+
+  // First we test whether marking is in progress.
+  BasicType flag_type;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    flag_type = T_INT;
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    flag_type = T_BYTE;
+  }
+  LIR_Opr thrd = getThreadPointer();
+  LIR_Address* mark_active_flag_addr =
+    new LIR_Address(thrd,
+                    in_bytes(JavaThread::satb_mark_queue_offset() +
+                             PtrQueue::byte_offset_of_active()),
+                    flag_type);
+  // Read the marking-in-progress flag.
+  LIR_Opr flag_val = new_register(T_INT);
+  __ load(mark_active_flag_addr, flag_val);
+
+  LabelObj* start_store = new LabelObj();
+
+  LIR_PatchCode pre_val_patch_code =
+    patch ? lir_patch_normal : lir_patch_none;
+
+  LIR_Opr pre_val = new_register(T_OBJECT);
+
+  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+  if (!addr_opr->is_address()) {
+    assert(addr_opr->is_register(), "must be");
+    addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT));
+  }
+  CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
+                                        info);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
+  if (G1DisablePostBarrier) return;
+
+  // If the "new_val" is a constant NULL, no barrier is necessary.
+  if (new_val->is_constant() &&
+      new_val->as_constant_ptr()->as_jobject() == NULL) return;
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    if (new_val->is_constant()) {
+      __ move(new_val, new_val_reg);
+    } else {
+      __ leal(new_val, new_val_reg);
+    }
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  if (addr->is_address()) {
+    LIR_Address* address = addr->as_address_ptr();
+    LIR_Opr ptr = new_pointer_register();
+    if (!address->index()->is_valid() && address->disp() == 0) {
+      __ move(address->base(), ptr);
+    } else {
+      assert(address->disp() != max_jint, "lea doesn't support patched addresses!");
+      __ leal(addr, ptr);
+    }
+    addr = ptr;
+  }
+  assert(addr->is_register(), "must be a register at this point");
+
+  LIR_Opr xor_res = new_pointer_register();
+  LIR_Opr xor_shift_res = new_pointer_register();
+
+  if (TwoOperandLIRForm ) {
+    __ move(addr, xor_res);
+    __ logical_xor(xor_res, new_val, xor_res);
+    __ move(xor_res, xor_shift_res);
+    __ unsigned_shift_right(xor_shift_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  } else {
+    __ logical_xor(addr, new_val, xor_res);
+    __ unsigned_shift_right(xor_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  }
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    __ leal(new_val, new_val_reg);
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
+
+  CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+#endif // SERIALGC
+////////////////////////////////////////////////////////////////////////
+
 void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
 
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
-  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base);
+  assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
+  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base);
   if (addr->is_address()) {
     LIR_Address* address = addr->as_address_ptr();
     LIR_Opr ptr = new_register(T_OBJECT);
@@ -1388,6 +1517,13 @@
     __ membar_release();
   }
 
+  if (is_oop) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(address),
+                needs_patching,
+                (info ? new CodeEmitInfo(info) : NULL));
+  }
+
   if (is_volatile) {
     assert(!needs_patching && x->is_loaded(),
            "how do we know it's volatile if it's not loaded");
@@ -1398,7 +1534,12 @@
   }
 
   if (is_oop) {
+#ifdef PRECISE_CARDMARK
+    // Precise cardmarks don't work
+    post_barrier(LIR_OprFact::address(address), value.result());
+#else
     post_barrier(object.result(), value.result());
+#endif // PRECISE_CARDMARK
   }
 
   if (is_volatile && os::is_MP()) {
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -145,6 +145,7 @@
 
 // only the classes below belong in the same file
 class LIRGenerator: public InstructionVisitor, public BlockClosure {
+
  private:
   Compilation*  _compilation;
   ciMethod*     _method;    // method that we are compiling
@@ -154,6 +155,7 @@
   Values        _instruction_for_operand;
   BitMap2D      _vreg_flags; // flags which can be set on a per-vreg basis
   LIR_List*     _lir;
+  BarrierSet*   _bs;
 
   LIRGenerator* gen() {
     return this;
@@ -174,8 +176,6 @@
   LIR_OprList                     _reg_for_constants;
   Values                          _unpinned_constants;
 
-  LIR_Const*                      _card_table_base;
-
   friend class PhiResolver;
 
   // unified bailout support
@@ -196,8 +196,6 @@
   LIR_Opr load_constant(Constant* x);
   LIR_Opr load_constant(LIR_Const* constant);
 
-  LIR_Const* card_table_base() const { return _card_table_base; }
-
   void  set_result(Value x, LIR_Opr opr)           {
     assert(opr->is_valid(), "must set to valid value");
     assert(x->operand()->is_illegal(), "operand should never change");
@@ -253,12 +251,17 @@
 
   // generic interface
 
+  void pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
   void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
 
   // specific implementations
+  // pre barriers
+
+  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
 
   // post barriers
 
+  void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
   void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
 
 
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -168,6 +168,8 @@
   switch (id) {
     // These stubs don't need to have an oopmap
     case dtrace_object_alloc_id:
+    case g1_pre_barrier_slow_id:
+    case g1_post_barrier_slow_id:
     case slow_subtype_check_id:
     case fpu2long_stub_id:
     case unwind_exception_id:
--- a/hotspot/src/share/vm/c1/c1_Runtime1.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -56,6 +56,8 @@
   stub(access_field_patching)        \
   stub(load_klass_patching)          \
   stub(jvmti_exception_throw)        \
+  stub(g1_pre_barrier_slow)          \
+  stub(g1_post_barrier_slow)         \
   stub(fpu2long_stub)                \
   stub(counter_overflow)             \
   last_entry(number_of_ids)
--- a/hotspot/src/share/vm/c1/c1_globals.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/c1/c1_globals.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -213,9 +213,6 @@
   develop(bool, UseFastLocking, true,                                       \
           "Use fast inlined locking code")                                  \
                                                                             \
-  product(bool, FastTLABRefill, true,                                       \
-          "Use fast TLAB refill code")                                      \
-                                                                            \
   develop(bool, UseSlowPath, false,                                         \
           "For debugging: test slow cases by always using them")            \
                                                                             \
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciMethodBlocks.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -49,7 +49,7 @@
 // first half.  Returns the range beginning at bci.
 ciBlock *ciMethodBlocks::split_block_at(int bci) {
   ciBlock *former_block = block_containing(bci);
-  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci());
+  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci());
   _blocks->append(new_block);
   assert(former_block != NULL, "must not be NULL");
   new_block->set_limit_bci(bci);
@@ -83,7 +83,7 @@
   if (cb == NULL ) {
     // This is our first time visiting this bytecode.  Create
     // a fresh block and assign it this starting point.
-    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci);
+    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci);
     _blocks->append(nb);
      _bci_to_block[bci] = nb;
     return nb;
@@ -98,6 +98,11 @@
   }
 }
 
+ciBlock *ciMethodBlocks::make_dummy_block() {
+  ciBlock *dum = new(_arena) ciBlock(_method, -1, 0);
+  return dum;
+}
+
 void ciMethodBlocks::do_analysis() {
   ciBytecodeStream s(_method);
   ciBlock *cur_block = block_containing(0);
@@ -253,7 +258,7 @@
   Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord));
 
   // create initial block covering the entire method
-  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0);
+  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0);
   _blocks->append(b);
   _bci_to_block[0] = b;
 
@@ -334,7 +339,7 @@
 #endif
 
 
-ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) :
+ciBlock::ciBlock(ciMethod *method, int index, int start_bci) :
 #ifndef PRODUCT
                          _method(method),
 #endif
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciMethodBlocks.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -48,6 +48,8 @@
   int num_blocks()  { return _num_blocks;}
   void clear_processed();
 
+  ciBlock *make_dummy_block(); // a block not associated with a bci
+
 #ifndef PRODUCT
   void dump();
 #endif
@@ -81,7 +83,7 @@
     fall_through_bci = -1
   };
 
-  ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci);
+  ciBlock(ciMethod *method, int index, int start_bci);
   int start_bci() const         { return _start_bci; }
   int limit_bci() const         { return _limit_bci; }
   int control_bci() const       { return _control_bci; }
@@ -94,7 +96,6 @@
   int ex_limit_bci() const      { return _ex_limit_bci; }
   bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); }
 
-
   // flag handling
   bool  processed() const           { return (_flags & Processed) != 0; }
   bool  is_handler() const          { return (_flags & Handler) != 0; }
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -338,8 +338,10 @@
   }
   _trap_bci = -1;
   _trap_index = 0;
+  _def_locals.clear();
 }
 
+
 // ------------------------------------------------------------------
 // ciTypeFlow::get_start_state
 //
@@ -735,7 +737,7 @@
 void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) {
   bool will_link;
   ciKlass* klass = str->get_klass(will_link);
-  if (!will_link) {
+  if (!will_link || str->is_unresolved_klass()) {
     trap(str, klass, str->get_klass_index());
   } else {
     push_object(klass);
@@ -1268,7 +1270,9 @@
     }
   case Bytecodes::_iinc:
     {
-      check_int(local(str->get_index()));
+      int lnum = str->get_index();
+      check_int(local(lnum));
+      store_to_local(lnum);
       break;
     }
   case Bytecodes::_iload:   load_local_int(str->get_index()); break;
@@ -1506,6 +1510,46 @@
 }
 #endif
 
+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::next
+//
+void ciTypeFlow::SuccIter::next() {
+  int succ_ct = _pred->successors()->length();
+  int next = _index + 1;
+  if (next < succ_ct) {
+    _index = next;
+    _succ = _pred->successors()->at(next);
+    return;
+  }
+  for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) {
+    // Do not compile any code for unloaded exception types.
+    // Following compiler passes are responsible for doing this also.
+    ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i);
+    if (exception_klass->is_loaded()) {
+      _index = next;
+      _succ = _pred->exceptions()->at(i);
+      return;
+    }
+    next++;
+  }
+  _index = -1;
+  _succ = NULL;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::set_succ
+//
+void ciTypeFlow::SuccIter::set_succ(Block* succ) {
+  int succ_ct = _pred->successors()->length();
+  if (_index < succ_ct) {
+    _pred->successors()->at_put(_index, succ);
+  } else {
+    int idx = _index - succ_ct;
+    _pred->exceptions()->at_put(idx, succ);
+  }
+}
+
 // ciTypeFlow::Block
 //
 // A basic block.
@@ -1526,10 +1570,11 @@
   _jsrs = new_jsrs;
   _next = NULL;
   _on_work_list = false;
-  _pre_order = -1; assert(!has_pre_order(), "");
-  _private_copy = false;
+  _backedge_copy = false;
+  _exception_entry = false;
   _trap_bci = -1;
   _trap_index = 0;
+  df_init();
 
   if (CITraceTypeFlow) {
     tty->print_cr(">> Created new block");
@@ -1541,55 +1586,13 @@
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::clone_loop_head
-//
-ciTypeFlow::Block*
-ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer,
-                                   int branch_bci,
-                                   ciTypeFlow::Block* target,
-                                   ciTypeFlow::JsrSet* jsrs) {
-  // Loop optimizations are not performed on Tier1 compiles. Do nothing.
-  if (analyzer->env()->comp_level() < CompLevel_full_optimization) {
-    return target;
-  }
-
-  // The current block ends with a branch.
-  //
-  // If the target block appears to be the test-clause of a for loop, and
-  // it is not too large, and it has not yet been cloned, clone it.
-  // The pre-existing copy becomes the private clone used only by
-  // the initial iteration of the loop.  (We know we are simulating
-  // the initial iteration right now, since we have never calculated
-  // successors before for this block.)
-
-  if (branch_bci <= start()
-      && (target->limit() - target->start()) <= CICloneLoopTestLimit
-      && target->private_copy_count() == 0) {
-    // Setting the private_copy bit ensures that the target block cannot be
-    // reached by any other paths, such as fall-in from the loop body.
-    // The private copy will be accessible only on successor lists
-    // created up to this point.
-    target->set_private_copy(true);
-    if (CITraceTypeFlow) {
-      tty->print(">> Cloning a test-clause block ");
-      print_value_on(tty);
-      tty->cr();
-    }
-    // If the target is the current block, then later on a new copy of the
-    // target block will be created when its bytecodes are reached by
-    // an alternate path. (This is the case for loops with the loop
-    // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.)
-    //
-    // Otherwise, duplicate the target block now and use it immediately.
-    // (The case for loops with the loop head at the bci-wise top of the
-    // loop, as with 1.4.2 javac.)
-    //
-    // In either case, the new copy of the block will remain public.
-    if (target != this) {
-      target = analyzer->block_at(branch_bci, jsrs);
-    }
-  }
-  return target;
+// ciTypeFlow::Block::df_init
+void ciTypeFlow::Block::df_init() {
+  _pre_order = -1; assert(!has_pre_order(), "");
+  _post_order = -1; assert(!has_post_order(), "");
+  _loop = NULL;
+  _irreducible_entry = false;
+  _rpo_next = NULL;
 }
 
 // ------------------------------------------------------------------
@@ -1644,7 +1647,6 @@
       case Bytecodes::_ifnull:       case Bytecodes::_ifnonnull:
         // Our successors are the branch target and the next bci.
         branch_bci = str->get_dest();
-        clone_loop_head(analyzer, branch_bci, this, jsrs);
         _successors =
           new (arena) GrowableArray<Block*>(arena, 2, 0, NULL);
         assert(_successors->length() == IF_NOT_TAKEN, "");
@@ -1658,14 +1660,7 @@
         _successors =
           new (arena) GrowableArray<Block*>(arena, 1, 0, NULL);
         assert(_successors->length() == GOTO_TARGET, "");
-        target = analyzer->block_at(branch_bci, jsrs);
-        // If the target block has not been visited yet, and looks like
-        // a two-way branch, attempt to clone it if it is a loop head.
-        if (target->_successors != NULL
-            && target->_successors->length() == (IF_TAKEN + 1)) {
-          target = clone_loop_head(analyzer, branch_bci, target, jsrs);
-        }
-        _successors->append(target);
+        _successors->append(analyzer->block_at(branch_bci, jsrs));
         break;
 
       case Bytecodes::_jsr:
@@ -1801,65 +1796,60 @@
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::is_simpler_than
-//
-// A relation used to order our work list.  We work on a block earlier
-// if it has a smaller jsr stack or it occurs earlier in the program
-// text.
-//
-// Note: maybe we should redo this functionality to make blocks
-// which correspond to exceptions lower priority.
-bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) {
-  if (other == NULL) {
-    return true;
-  } else {
-    int size1 = _jsrs->size();
-    int size2 = other->_jsrs->size();
-    if (size1 < size2) {
-      return true;
-    } else if (size2 < size1) {
-      return false;
-    } else {
-#if 0
-      if (size1 > 0) {
-        int r1 = _jsrs->record_at(0)->return_address();
-        int r2 = _jsrs->record_at(0)->return_address();
-        if (r1 < r2) {
-          return true;
-        } else if (r2 < r1) {
-          return false;
-        } else {
-          int e1 = _jsrs->record_at(0)->return_address();
-          int e2 = _jsrs->record_at(0)->return_address();
-          if (e1 < e2) {
-            return true;
-          } else if (e2 < e1) {
-            return false;
-          }
-        }
-      }
-#endif
-      return (start() <= other->start());
-    }
-  }
+// ciTypeFlow::Block::set_backedge_copy
+// Use this only to make a pre-existing public block into a backedge copy.
+void ciTypeFlow::Block::set_backedge_copy(bool z) {
+  assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public");
+  _backedge_copy = z;
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::set_private_copy
-// Use this only to make a pre-existing public block into a private copy.
-void ciTypeFlow::Block::set_private_copy(bool z) {
-  assert(z || (z == is_private_copy()), "cannot make a private copy public");
-  _private_copy = z;
+// ciTypeFlow::Block::is_clonable_exit
+//
+// At most 2 normal successors, one of which continues looping,
+// and all exceptional successors must exit.
+bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) {
+  int normal_cnt  = 0;
+  int in_loop_cnt = 0;
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (iter.is_normal_ctrl()) {
+      if (++normal_cnt > 2) return false;
+      if (lp->contains(succ->loop())) {
+        if (++in_loop_cnt > 1) return false;
+      }
+    } else {
+      if (lp->contains(succ->loop())) return false;
+    }
+  }
+  return in_loop_cnt == 1;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Block::looping_succ
+//
+ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) {
+  assert(successors()->length() <= 2, "at most 2 normal successors");
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (lp->contains(succ->loop())) {
+      return succ;
+    }
+  }
+  return NULL;
 }
 
 #ifndef PRODUCT
 // ------------------------------------------------------------------
 // ciTypeFlow::Block::print_value_on
 void ciTypeFlow::Block::print_value_on(outputStream* st) const {
-  if (has_pre_order())  st->print("#%-2d ", pre_order());
+  if (has_pre_order()) st->print("#%-2d ", pre_order());
+  if (has_rpo())       st->print("rpo#%-2d ", rpo());
   st->print("[%d - %d)", start(), limit());
+  if (is_loop_head()) st->print(" lphd");
+  if (is_irreducible_entry()) st->print(" irred");
   if (_jsrs->size() > 0) { st->print("/");  _jsrs->print_on(st); }
-  if (is_private_copy())  st->print("/private_copy");
+  if (is_backedge_copy())  st->print("/backedge_copy");
 }
 
 // ------------------------------------------------------------------
@@ -1871,6 +1861,16 @@
   st->print_cr("  ====================================================  ");
   st->print ("  ");
   print_value_on(st);
+  st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr();
+  if (loop() && loop()->parent() != NULL) {
+    st->print(" loops:");
+    Loop* lp = loop();
+    do {
+      st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order());
+      if (lp->is_irreducible()) st->print("(ir)");
+      lp = lp->parent();
+    } while (lp->parent() != NULL);
+  }
   st->cr();
   _state->print_on(st);
   if (_successors == NULL) {
@@ -1907,6 +1907,21 @@
 }
 #endif
 
+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::LocalSet::print_on
+void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const {
+  st->print("{");
+  for (int i = 0; i < max; i++) {
+    if (test(i)) st->print(" %d", i);
+  }
+  if (limit > max) {
+    st->print(" %d..%d ", max, limit);
+  }
+  st->print(" }");
+}
+#endif
+
 // ciTypeFlow
 //
 // This is a pass over the bytecodes which computes the following:
@@ -1922,12 +1937,11 @@
   _max_locals = method->max_locals();
   _max_stack = method->max_stack();
   _code_size = method->code_size();
+  _has_irreducible_entry = false;
   _osr_bci = osr_bci;
   _failure_reason = NULL;
   assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument");
-
   _work_list = NULL;
-  _next_pre_order = 0;
 
   _ciblock_count = _methodBlocks->num_blocks();
   _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray<Block*>*, _ciblock_count);
@@ -1949,12 +1963,6 @@
   _work_list = next_block->next();
   next_block->set_next(NULL);
   next_block->set_on_work_list(false);
-  if (!next_block->has_pre_order()) {
-    // Assign "pre_order" as each new block is taken from the work list.
-    // This number may be used by following phases to order block visits.
-    assert(!have_block_count(), "must not have mapped blocks yet")
-    next_block->set_pre_order(_next_pre_order++);
-  }
   return next_block;
 }
 
@@ -1962,30 +1970,37 @@
 // ciTypeFlow::add_to_work_list
 //
 // Add a basic block to our work list.
+// List is sorted by decreasing postorder sort (same as increasing RPO)
 void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) {
   assert(!block->is_on_work_list(), "must not already be on work list");
 
   if (CITraceTypeFlow) {
-    tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : "");
+    tty->print(">> Adding block ");
     block->print_value_on(tty);
     tty->print_cr(" to the work list : ");
   }
 
   block->set_on_work_list(true);
-  if (block->is_simpler_than(_work_list)) {
+
+  // decreasing post order sort
+
+  Block* prev = NULL;
+  Block* current = _work_list;
+  int po = block->post_order();
+  while (current != NULL) {
+    if (!current->has_post_order() || po > current->post_order())
+      break;
+    prev = current;
+    current = current->next();
+  }
+  if (prev == NULL) {
     block->set_next(_work_list);
     _work_list = block;
   } else {
-    Block *temp = _work_list;
-    while (!block->is_simpler_than(temp->next())) {
-      if (CITraceTypeFlow) {
-        tty->print(".");
-      }
-      temp = temp->next();
-    }
-    block->set_next(temp->next());
-    temp->set_next(block);
+    block->set_next(current);
+    prev->set_next(block);
   }
+
   if (CITraceTypeFlow) {
     tty->cr();
   }
@@ -2008,7 +2023,7 @@
   assert(ciblk->start_bci() == bci, "bad ciBlock boundaries");
   Block* block = get_block_for(ciblk->index(), jsrs, option);
 
-  assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result");
+  assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result");
 
   if (CITraceTypeFlow) {
     if (block != NULL) {
@@ -2072,8 +2087,9 @@
     }
 
     if (block->meet_exception(exception_klass, state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
         add_to_work_list(block);
       }
     }
@@ -2091,8 +2107,9 @@
   for (int i = 0; i < len; i++) {
     Block* block = successors->at(i);
     if (block->meet(state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
         add_to_work_list(block);
       }
     }
@@ -2133,6 +2150,111 @@
   return true;
 }
 
+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_heads
+//
+// Clone the loop heads
+bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  bool rslt = false;
+  for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) {
+    lp = iter.current();
+    Block* head = lp->head();
+    if (lp == loop_tree_root() ||
+        lp->is_irreducible() ||
+        !head->is_clonable_exit(lp))
+      continue;
+
+    // check not already cloned
+    if (head->backedge_copy_count() != 0)
+      continue;
+
+    // check _no_ shared head below us
+    Loop* ch;
+    for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling());
+    if (ch != NULL)
+      continue;
+
+    // Clone head
+    Block* new_head = head->looping_succ(lp);
+    Block* clone = clone_loop_head(lp, temp_vector, temp_set);
+    // Update lp's info
+    clone->set_loop(lp);
+    lp->set_head(new_head);
+    lp->set_tail(clone);
+    // And move original head into outer loop
+    head->set_loop(lp->parent());
+
+    rslt = true;
+  }
+  return rslt;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_head
+//
+// Clone lp's head and replace tail's successors with clone.
+//
+//  |
+//  v
+// head <-> body
+//  |
+//  v
+// exit
+//
+// new_head
+//
+//  |
+//  v
+// head ----------\
+//  |             |
+//  |             v
+//  |  clone <-> body
+//  |    |
+//  | /--/
+//  | |
+//  v v
+// exit
+//
+ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  Block* head = lp->head();
+  Block* tail = lp->tail();
+  if (CITraceTypeFlow) {
+    tty->print(">> Requesting clone of loop head "); head->print_value_on(tty);
+    tty->print("  for predecessor ");                tail->print_value_on(tty);
+    tty->cr();
+  }
+  Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy);
+  assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges");
+
+  assert(!clone->has_pre_order(), "just created");
+  clone->set_next_pre_order();
+
+  // Insert clone after (orig) tail in reverse post order
+  clone->set_rpo_next(tail->rpo_next());
+  tail->set_rpo_next(clone);
+
+  // tail->head becomes tail->clone
+  for (SuccIter iter(tail); !iter.done(); iter.next()) {
+    if (iter.succ() == head) {
+      iter.set_succ(clone);
+      break;
+    }
+  }
+  flow_block(tail, temp_vector, temp_set);
+  if (head == tail) {
+    // For self-loops, clone->head becomes clone->clone
+    flow_block(clone, temp_vector, temp_set);
+    for (SuccIter iter(clone); !iter.done(); iter.next()) {
+      if (iter.succ() == head) {
+        iter.set_succ(clone);
+        break;
+      }
+    }
+  }
+  flow_block(clone, temp_vector, temp_set);
+
+  return clone;
+}
 
 // ------------------------------------------------------------------
 // ciTypeFlow::flow_block
@@ -2159,11 +2281,14 @@
 
   // Grab the state from the current block.
   block->copy_state_into(state);
+  state->def_locals()->clear();
 
   GrowableArray<Block*>*           exceptions = block->exceptions();
   GrowableArray<ciInstanceKlass*>* exc_klasses = block->exc_klasses();
   bool has_exceptions = exceptions->length() > 0;
 
+  bool exceptions_used = false;
+
   ciBytecodeStream str(method());
   str.reset_to_bci(start);
   Bytecodes::Code code;
@@ -2172,6 +2297,7 @@
     // Check for exceptional control flow from this point.
     if (has_exceptions && can_trap(str)) {
       flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
     }
     // Apply the effects of the current bytecode to our state.
     bool res = state->apply_one_bytecode(&str);
@@ -2189,9 +2315,14 @@
         block->print_on(tty);
       }
 
+      // Save set of locals defined in this block
+      block->def_locals()->add(state->def_locals());
+
       // Record (no) successors.
       block->successors(&str, state, jsrs);
 
+      assert(!has_exceptions || exceptions_used, "Not removing exceptions");
+
       // Discontinue interpretation of this Block.
       return;
     }
@@ -2202,6 +2333,7 @@
     // Check for exceptional control flow from this point.
     if (has_exceptions && can_trap(str)) {
       flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
     }
 
     // Fix the JsrSet to reflect effect of the bytecode.
@@ -2218,11 +2350,306 @@
     successors = block->successors(&str, NULL, NULL);
   }
 
+  // Save set of locals defined in this block
+  block->def_locals()->add(state->def_locals());
+
+  // Remove untaken exception paths
+  if (!exceptions_used)
+    exceptions->clear();
+
   // Pass our state to successors.
   flow_successors(successors, state);
 }
 
 // ------------------------------------------------------------------
+// ciTypeFlow::PostOrderLoops::next
+//
+// Advance to next loop tree using a postorder, left-to-right traversal.
+void ciTypeFlow::PostorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+    while (_current->child() != NULL) {
+      _current = _current->child();
+    }
+  } else {
+    _current = _current->parent();
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::PreOrderLoops::next
+//
+// Advance to next loop tree using a preorder, left-to-right traversal.
+void ciTypeFlow::PreorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->child() != NULL) {
+    _current = _current->child();
+  } else if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+  } else {
+    while (_current != _root && _current->sibling() == NULL) {
+      _current = _current->parent();
+    }
+    if (_current == _root) {
+      _current = NULL;
+      assert(done(), "must be done.");
+    } else {
+      assert(_current->sibling() != NULL, "must be more to do");
+      _current = _current->sibling();
+    }
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::sorted_merge
+//
+// Merge the branch lp into this branch, sorting on the loop head
+// pre_orders. Returns the leaf of the merged branch.
+// Child and sibling pointers will be setup later.
+// Sort is (looking from leaf towards the root)
+//  descending on primary key: loop head's pre_order, and
+//  ascending  on secondary key: loop tail's pre_order.
+ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) {
+  Loop* leaf = this;
+  Loop* prev = NULL;
+  Loop* current = leaf;
+  while (lp != NULL) {
+    int lp_pre_order = lp->head()->pre_order();
+    // Find insertion point for "lp"
+    while (current != NULL) {
+      if (current == lp)
+        return leaf; // Already in list
+      if (current->head()->pre_order() < lp_pre_order)
+        break;
+      if (current->head()->pre_order() == lp_pre_order &&
+          current->tail()->pre_order() > lp->tail()->pre_order()) {
+        break;
+      }
+      prev = current;
+      current = current->parent();
+    }
+    Loop* next_lp = lp->parent(); // Save future list of items to insert
+    // Insert lp before current
+    lp->set_parent(current);
+    if (prev != NULL) {
+      prev->set_parent(lp);
+    } else {
+      leaf = lp;
+    }
+    prev = lp;     // Inserted item is new prev[ious]
+    lp = next_lp;  // Next item to insert
+  }
+  return leaf;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::build_loop_tree
+//
+// Incrementally build loop tree.
+void ciTypeFlow::build_loop_tree(Block* blk) {
+  assert(!blk->is_post_visited(), "precondition");
+  Loop* innermost = NULL; // merge of loop tree branches over all successors
+
+  for (SuccIter iter(blk); !iter.done(); iter.next()) {
+    Loop*  lp   = NULL;
+    Block* succ = iter.succ();
+    if (!succ->is_post_visited()) {
+      // Found backedge since predecessor post visited, but successor is not
+      assert(succ->pre_order() <= blk->pre_order(), "should be backedge");
+
+      // Create a LoopNode to mark this loop.
+      lp = new (arena()) Loop(succ, blk);
+      if (succ->loop() == NULL)
+        succ->set_loop(lp);
+      // succ->loop will be updated to innermost loop on a later call, when blk==succ
+
+    } else {  // Nested loop
+      lp = succ->loop();
+
+      // If succ is loop head, find outer loop.
+      while (lp != NULL && lp->head() == succ) {
+        lp = lp->parent();
+      }
+      if (lp == NULL) {
+        // Infinite loop, it's parent is the root
+        lp = loop_tree_root();
+      }
+    }
+
+    // Check for irreducible loop.
+    // Successor has already been visited. If the successor's loop head
+    // has already been post-visited, then this is another entry into the loop.
+    while (lp->head()->is_post_visited() && lp != loop_tree_root()) {
+      _has_irreducible_entry = true;
+      lp->set_irreducible(succ);
+      if (!succ->is_on_work_list()) {
+        // Assume irreducible entries need more data flow
+        add_to_work_list(succ);
+      }
+      lp = lp->parent();
+      assert(lp != NULL, "nested loop must have parent by now");
+    }
+
+    // Merge loop tree branch for all successors.
+    innermost = innermost == NULL ? lp : innermost->sorted_merge(lp);
+
+  } // end loop
+
+  if (innermost == NULL) {
+    assert(blk->successors()->length() == 0, "CFG exit");
+    blk->set_loop(loop_tree_root());
+  } else if (innermost->head() == blk) {
+    // If loop header, complete the tree pointers
+    if (blk->loop() != innermost) {
+#if ASSERT
+      assert(blk->loop()->head() == innermost->head(), "same head");
+      Loop* dl;
+      for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent());
+      assert(dl == blk->loop(), "blk->loop() already in innermost list");
+#endif
+      blk->set_loop(innermost);
+    }
+    innermost->def_locals()->add(blk->def_locals());
+    Loop* l = innermost;
+    Loop* p = l->parent();
+    while (p && l->head() == blk) {
+      l->set_sibling(p->child());  // Put self on parents 'next child'
+      p->set_child(l);             // Make self the first child of parent
+      p->def_locals()->add(l->def_locals());
+      l = p;                       // Walk up the parent chain
+      p = l->parent();
+    }
+  } else {
+    blk->set_loop(innermost);
+    innermost->def_locals()->add(blk->def_locals());
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::contains
+//
+// Returns true if lp is nested loop.
+bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const {
+  assert(lp != NULL, "");
+  if (this == lp || head() == lp->head()) return true;
+  int depth1 = depth();
+  int depth2 = lp->depth();
+  if (depth1 > depth2)
+    return false;
+  while (depth1 < depth2) {
+    depth2--;
+    lp = lp->parent();
+  }
+  return this == lp;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::depth
+//
+// Loop depth
+int ciTypeFlow::Loop::depth() const {
+  int dp = 0;
+  for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent())
+    dp++;
+  return dp;
+}
+
+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::print
+void ciTypeFlow::Loop::print(outputStream* st, int indent) const {
+  for (int i = 0; i < indent; i++) st->print(" ");
+  st->print("%d<-%d %s",
+            is_root() ? 0 : this->head()->pre_order(),
+            is_root() ? 0 : this->tail()->pre_order(),
+            is_irreducible()?" irr":"");
+  st->print(" defs: ");
+  def_locals()->print_on(st, _head->outer()->method()->max_locals());
+  st->cr();
+  for (Loop* ch = child(); ch != NULL; ch = ch->sibling())
+    ch->print(st, indent+2);
+}
+#endif
+
+// ------------------------------------------------------------------
+// ciTypeFlow::df_flow_types
+//
+// Perform the depth first type flow analysis. Helper for flow_types.
+void ciTypeFlow::df_flow_types(Block* start,
+                               bool do_flow,
+                               StateVector* temp_vector,
+                               JsrSet* temp_set) {
+  int dft_len = 100;
+  GrowableArray<Block*> stk(arena(), dft_len, 0, NULL);
+
+  ciBlock* dummy = _methodBlocks->make_dummy_block();
+  JsrSet* root_set = new JsrSet(NULL, 0);
+  Block* root_head = new (arena()) Block(this, dummy, root_set);
+  Block* root_tail = new (arena()) Block(this, dummy, root_set);
+  root_head->set_pre_order(0);
+  root_head->set_post_order(0);
+  root_tail->set_pre_order(max_jint);
+  root_tail->set_post_order(max_jint);
+  set_loop_tree_root(new (arena()) Loop(root_head, root_tail));
+
+  stk.push(start);
+
+  _next_pre_order = 0;  // initialize pre_order counter
+  _rpo_list = NULL;
+  int next_po = 0;      // initialize post_order counter
+
+  // Compute RPO and the control flow graph
+  int size;
+  while ((size = stk.length()) > 0) {
+    Block* blk = stk.top(); // Leave node on stack
+    if (!blk->is_visited()) {
+      // forward arc in graph
+      assert (!blk->has_pre_order(), "");
+      blk->set_next_pre_order();
+
+      if (_next_pre_order >= MaxNodeLimit / 2) {
+        // Too many basic blocks.  Bail out.
+        // This can happen when try/finally constructs are nested to depth N,
+        // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
+        // "MaxNodeLimit / 2" is used because probably the parser will
+        // generate at least twice that many nodes and bail out.
+        record_failure("too many basic blocks");
+        return;
+      }
+      if (do_flow) {
+        flow_block(blk, temp_vector, temp_set);
+        if (failing()) return; // Watch for bailouts.
+      }
+    } else if (!blk->is_post_visited()) {
+      // cross or back arc
+      for (SuccIter iter(blk); !iter.done(); iter.next()) {
+        Block* succ = iter.succ();
+        if (!succ->is_visited()) {
+          stk.push(succ);
+        }
+      }
+      if (stk.length() == size) {
+        // There were no additional children, post visit node now
+        stk.pop(); // Remove node from stack
+
+        build_loop_tree(blk);
+        blk->set_post_order(next_po++);   // Assign post order
+        prepend_to_rpo_list(blk);
+        assert(blk->is_post_visited(), "");
+
+        if (blk->is_loop_head() && !blk->is_on_work_list()) {
+          // Assume loop heads need more data flow
+          add_to_work_list(blk);
+        }
+      }
+    } else {
+      stk.pop(); // Remove post-visited node from stack
+    }
+  }
+}
+
+// ------------------------------------------------------------------
 // ciTypeFlow::flow_types
 //
 // Perform the type flow analysis, creating and cloning Blocks as
@@ -2233,91 +2660,93 @@
   JsrSet* temp_set = new JsrSet(NULL, 16);
 
   // Create the method entry block.
-  Block* block = block_at(start_bci(), temp_set);
-  block->set_pre_order(_next_pre_order++);
-  assert(block->is_start(), "start block must have order #0");
+  Block* start = block_at(start_bci(), temp_set);
 
   // Load the initial state into it.
   const StateVector* start_state = get_start_state();
   if (failing())  return;
-  block->meet(start_state);
-  add_to_work_list(block);
+  start->meet(start_state);
+
+  // Depth first visit
+  df_flow_types(start, true /*do flow*/, temp_vector, temp_set);
 
-  // Trickle away.
-  while (!work_list_empty()) {
-    Block* block = work_list_next();
-    flow_block(block, temp_vector, temp_set);
+  if (failing())  return;
+  assert(_rpo_list == start, "must be start");
 
+  // Any loops found?
+  if (loop_tree_root()->child() != NULL &&
+      env()->comp_level() >= CompLevel_full_optimization) {
+      // Loop optimizations are not performed on Tier1 compiles.
+
+    bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set);
 
-    // NodeCountCutoff is the number of nodes at which the parser
-    // will bail out.  Probably if we already have lots of BBs,
-    // the parser will generate at least twice that many nodes and bail out.
-    // Therefore, this is a conservatively large limit at which to
-    // bail out in the pre-parse typeflow pass.
-    int block_limit = MaxNodeLimit / 2;
+    // If some loop heads were cloned, recompute postorder and loop tree
+    if (changed) {
+      loop_tree_root()->set_child(NULL);
+      for (Block* blk = _rpo_list; blk != NULL;) {
+        Block* next = blk->rpo_next();
+        blk->df_init();
+        blk = next;
+      }
+      df_flow_types(start, false /*no flow*/, temp_vector, temp_set);
+    }
+  }
 
-    if (_next_pre_order >= block_limit) {
-      // Too many basic blocks.  Bail out.
-      //
-      // This can happen when try/finally constructs are nested to depth N,
-      // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
-      record_failure("too many basic blocks");
-      return;
-    }
+  if (CITraceTypeFlow) {
+    tty->print_cr("\nLoop tree");
+    loop_tree_root()->print();
+  }
+
+  // Continue flow analysis until fixed point reached
+
+  debug_only(int max_block = _next_pre_order;)
 
-    // Watch for bailouts.
-    if (failing())  return;
+  while (!work_list_empty()) {
+    Block* blk = work_list_next();
+    assert (blk->has_post_order(), "post order assigned above");
+
+    flow_block(blk, temp_vector, temp_set);
+
+    assert (max_block == _next_pre_order, "no new blocks");
+    assert (!failing(), "no more bailouts");
   }
 }
 
 // ------------------------------------------------------------------
 // ciTypeFlow::map_blocks
 //
-// Create the block map, which indexes blocks in pre_order.
+// Create the block map, which indexes blocks in reverse post-order.
 void ciTypeFlow::map_blocks() {
   assert(_block_map == NULL, "single initialization");
-  int pre_order_limit = _next_pre_order;
-  _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit);
-  assert(pre_order_limit == block_count(), "");
-  int po;
-  for (po = 0; po < pre_order_limit; po++) {
-    debug_only(_block_map[po] = NULL);
+  int block_ct = _next_pre_order;
+  _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct);
+  assert(block_ct == block_count(), "");
+
+  Block* blk = _rpo_list;
+  for (int m = 0; m < block_ct; m++) {
+    int rpo = blk->rpo();
+    assert(rpo == m, "should be sequential");
+    _block_map[rpo] = blk;
+    blk = blk->rpo_next();
   }
-  ciMethodBlocks *mblks = _methodBlocks;
-  ciBlock* current = NULL;
-  int limit_bci = code_size();
-  for (int bci = 0; bci < limit_bci; bci++) {
-    ciBlock* ciblk = mblks->block_containing(bci);
-    if (ciblk != NULL && ciblk != current) {
-      current = ciblk;
-      int curidx = ciblk->index();
-      int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length();
-      for (int i = 0; i < block_count; i++) {
-        Block* block = _idx_to_blocklist[curidx]->at(i);
-        if (!block->has_pre_order())  continue;
-        int po = block->pre_order();
-        assert(_block_map[po] == NULL, "unique ref to block");
-        assert(0 <= po && po < pre_order_limit, "");
-        _block_map[po] = block;
-      }
-    }
-  }
-  for (po = 0; po < pre_order_limit; po++) {
-    assert(_block_map[po] != NULL, "must not drop any blocks");
-    Block* block = _block_map[po];
+  assert(blk == NULL, "should be done");
+
+  for (int j = 0; j < block_ct; j++) {
+    assert(_block_map[j] != NULL, "must not drop any blocks");
+    Block* block = _block_map[j];
     // Remove dead blocks from successor lists:
     for (int e = 0; e <= 1; e++) {
       GrowableArray<Block*>* l = e? block->exceptions(): block->successors();
-      for (int i = 0; i < l->length(); i++) {
-        Block* s = l->at(i);
-        if (!s->has_pre_order()) {
+      for (int k = 0; k < l->length(); k++) {
+        Block* s = l->at(k);
+        if (!s->has_post_order()) {
           if (CITraceTypeFlow) {
             tty->print("Removing dead %s successor of #%d: ", (e? "exceptional":  "normal"), block->pre_order());
             s->print_value_on(tty);
             tty->cr();
           }
           l->remove(s);
-          --i;
+          --k;
         }
       }
     }
@@ -2329,7 +2758,7 @@
 //
 // Find a block with this ciBlock which has a compatible JsrSet.
 // If no such block exists, create it, unless the option is no_create.
-// If the option is create_private_copy, always create a fresh private copy.
+// If the option is create_backedge_copy, always create a fresh backedge copy.
 ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) {
   Arena* a = arena();
   GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
@@ -2342,11 +2771,11 @@
     _idx_to_blocklist[ciBlockIndex] = blocks;
   }
 
-  if (option != create_private_copy) {
+  if (option != create_backedge_copy) {
     int len = blocks->length();
     for (int i = 0; i < len; i++) {
       Block* block = blocks->at(i);
-      if (!block->is_private_copy() && block->is_compatible_with(jsrs)) {
+      if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
         return block;
       }
     }
@@ -2357,15 +2786,15 @@
 
   // We did not find a compatible block.  Create one.
   Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs);
-  if (option == create_private_copy)  new_block->set_private_copy(true);
+  if (option == create_backedge_copy)  new_block->set_backedge_copy(true);
   blocks->append(new_block);
   return new_block;
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::private_copy_count
+// ciTypeFlow::backedge_copy_count
 //
-int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
+int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
   GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
 
   if (blocks == NULL) {
@@ -2376,7 +2805,7 @@
   int len = blocks->length();
   for (int i = 0; i < len; i++) {
     Block* block = blocks->at(i);
-    if (block->is_private_copy() && block->is_compatible_with(jsrs)) {
+    if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
       count++;
     }
   }
@@ -2405,10 +2834,12 @@
   if (failing()) {
     return;
   }
+
+  map_blocks();
+
   if (CIPrintTypeFlow || CITraceTypeFlow) {
-    print_on(tty);
+    rpo_print_on(tty);
   }
-  map_blocks();
 }
 
 // ------------------------------------------------------------------
@@ -2466,4 +2897,19 @@
   st->print_cr("********************************************************");
   st->cr();
 }
+
+void ciTypeFlow::rpo_print_on(outputStream* st) const {
+  st->print_cr("********************************************************");
+  st->print   ("TypeFlow for ");
+  method()->name()->print_symbol_on(st);
+  int limit_bci = code_size();
+  st->print_cr("  %d bytes", limit_bci);
+  for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) {
+    blk->print_on(st);
+    st->print_cr("--------------------------------------------------------");
+    st->cr();
+  }
+  st->print_cr("********************************************************");
+  st->cr();
+}
 #endif
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -34,11 +34,13 @@
   int _max_locals;
   int _max_stack;
   int _code_size;
+  bool      _has_irreducible_entry;
 
   const char* _failure_reason;
 
 public:
   class StateVector;
+  class Loop;
   class Block;
 
   // Build a type flow analyzer
@@ -55,6 +57,7 @@
   int       max_stack() const  { return _max_stack; }
   int       max_cells() const  { return _max_locals + _max_stack; }
   int       code_size() const  { return _code_size; }
+  bool      has_irreducible_entry() const { return _has_irreducible_entry; }
 
   // Represents information about an "active" jsr call.  This
   // class represents a call to the routine at some entry address
@@ -125,6 +128,19 @@
     void print_on(outputStream* st) const PRODUCT_RETURN;
   };
 
+  class LocalSet VALUE_OBJ_CLASS_SPEC {
+  private:
+    enum Constants { max = 63 };
+    uint64_t _bits;
+  public:
+    LocalSet() : _bits(0) {}
+    void add(uint32_t i)        { if (i < (uint32_t)max) _bits |=  (1LL << i); }
+    void add(LocalSet* ls)      { _bits |= ls->_bits; }
+    bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; }
+    void clear()                { _bits = 0; }
+    void print_on(outputStream* st, int limit) const  PRODUCT_RETURN;
+  };
+
   // Used as a combined index for locals and temps
   enum Cell {
     Cell_0, Cell_max = INT_MAX
@@ -142,6 +158,8 @@
     int         _trap_bci;
     int         _trap_index;
 
+    LocalSet    _def_locals;  // For entire block
+
     static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer);
 
   public:
@@ -181,6 +199,9 @@
     int         monitor_count() const  { return _monitor_count; }
     void    set_monitor_count(int mc)  { _monitor_count = mc; }
 
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
     static Cell start_cell()           { return (Cell)0; }
     static Cell next_cell(Cell c)      { return (Cell)(((int)c) + 1); }
     Cell        limit_cell() const {
@@ -250,6 +271,10 @@
       return type->basic_type() == T_DOUBLE;
     }
 
+    void store_to_local(int lnum) {
+      _def_locals.add((uint) lnum);
+    }
+
     void      push_translate(ciType* type);
 
     void      push_int() {
@@ -358,6 +383,7 @@
              "must be reference type or return address");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_double(int index) {
@@ -376,6 +402,8 @@
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
       set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
     }
 
     void load_local_float(int index) {
@@ -388,6 +416,7 @@
       assert(is_float(type), "must be float type");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_int(int index) {
@@ -400,6 +429,7 @@
       assert(is_int(type), "must be int type");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_long(int index) {
@@ -418,6 +448,8 @@
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
       set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
     }
 
     // Stop interpretation of this path with a trap.
@@ -450,13 +482,31 @@
   };
 
   // Parameter for "find_block" calls:
-  // Describes the difference between a public and private copy.
+  // Describes the difference between a public and backedge copy.
   enum CreateOption {
     create_public_copy,
-    create_private_copy,
+    create_backedge_copy,
     no_create
   };
 
+  // Successor iterator
+  class SuccIter : public StackObj {
+  private:
+    Block* _pred;
+    int    _index;
+    Block* _succ;
+  public:
+    SuccIter()                        : _pred(NULL), _index(-1), _succ(NULL) {}
+    SuccIter(Block* pred)             : _pred(pred), _index(-1), _succ(NULL) { next(); }
+    int    index()     { return _index; }
+    Block* pred()      { return _pred; }           // Return predecessor
+    bool   done()      { return _index < 0; }      // Finished?
+    Block* succ()      { return _succ; }           // Return current successor
+    void   next();                                 // Advance
+    void   set_succ(Block* succ);                  // Update current successor
+    bool   is_normal_ctrl() { return index() < _pred->successors()->length(); }
+  };
+
   // A basic block
   class Block : public ResourceObj {
   private:
@@ -470,15 +520,24 @@
     int                              _trap_bci;
     int                              _trap_index;
 
-    // A reasonable approximation to pre-order, provided.to the client.
+    // pre_order, assigned at first visit. Used as block ID and "visited" tag
     int                              _pre_order;
 
-    // Has this block been cloned for some special purpose?
-    bool                             _private_copy;
+    // A post-order, used to compute the reverse post order (RPO) provided to the client
+    int                              _post_order;  // used to compute rpo
+
+    // Has this block been cloned for a loop backedge?
+    bool                             _backedge_copy;
 
     // A pointer used for our internal work list
-    Block*                 _next;
-    bool                   _on_work_list;
+    Block*                           _next;
+    bool                             _on_work_list;      // on the work list
+    Block*                           _rpo_next;          // Reverse post order list
+
+    // Loop info
+    Loop*                            _loop;              // nearest loop
+    bool                             _irreducible_entry; // entry to irreducible loop
+    bool                             _exception_entry;   // entry to exception handler
 
     ciBlock*     ciblock() const     { return _ciblock; }
     StateVector* state() const     { return _state; }
@@ -504,10 +563,11 @@
     int start() const         { return _ciblock->start_bci(); }
     int limit() const         { return _ciblock->limit_bci(); }
     int control() const       { return _ciblock->control_bci(); }
+    JsrSet* jsrs() const      { return _jsrs; }
 
-    bool    is_private_copy() const       { return _private_copy; }
-    void   set_private_copy(bool z);
-    int        private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); }
+    bool    is_backedge_copy() const       { return _backedge_copy; }
+    void   set_backedge_copy(bool z);
+    int        backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); }
 
     // access to entry state
     int     stack_size() const         { return _state->stack_size(); }
@@ -515,6 +575,20 @@
     ciType* local_type_at(int i) const { return _state->local_type_at(i); }
     ciType* stack_type_at(int i) const { return _state->stack_type_at(i); }
 
+    // Data flow on locals
+    bool is_invariant_local(uint v) const {
+      assert(is_loop_head(), "only loop heads");
+      // Find outermost loop with same loop head
+      Loop* lp = loop();
+      while (lp->parent() != NULL) {
+        if (lp->parent()->head() != lp->head()) break;
+        lp = lp->parent();
+      }
+      return !lp->def_locals()->test(v);
+    }
+    LocalSet* def_locals() { return _state->def_locals(); }
+    const LocalSet* def_locals() const { return _state->def_locals(); }
+
     // Get the successors for this Block.
     GrowableArray<Block*>* successors(ciBytecodeStream* str,
                                       StateVector* state,
@@ -524,13 +598,6 @@
       return _successors;
     }
 
-    // Helper function for "successors" when making private copies of
-    // loop heads for C2.
-    Block * clone_loop_head(ciTypeFlow* analyzer,
-                            int branch_bci,
-                            Block* target,
-                            JsrSet* jsrs);
-
     // Get the exceptional successors for this Block.
     GrowableArray<Block*>* exceptions() {
       if (_exceptions == NULL) {
@@ -584,17 +651,126 @@
     bool   is_on_work_list() const  { return _on_work_list; }
 
     bool   has_pre_order() const  { return _pre_order >= 0; }
-    void   set_pre_order(int po)  { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; }
+    void   set_pre_order(int po)  { assert(!has_pre_order(), ""); _pre_order = po; }
     int    pre_order() const      { assert(has_pre_order(), ""); return _pre_order; }
+    void   set_next_pre_order()   { set_pre_order(outer()->inc_next_pre_order()); }
     bool   is_start() const       { return _pre_order == outer()->start_block_num(); }
 
-    // A ranking used in determining order within the work list.
-    bool   is_simpler_than(Block* other);
+    // Reverse post order
+    void   df_init();
+    bool   has_post_order() const { return _post_order >= 0; }
+    void   set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; }
+    void   reset_post_order(int o){ _post_order = o; }
+    int    post_order() const     { assert(has_post_order(), ""); return _post_order; }
+
+    bool   has_rpo() const        { return has_post_order() && outer()->have_block_count(); }
+    int    rpo() const            { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; }
+    void   set_rpo_next(Block* b) { _rpo_next = b; }
+    Block* rpo_next()             { return _rpo_next; }
+
+    // Loops
+    Loop*  loop() const                  { return _loop; }
+    void   set_loop(Loop* lp)            { _loop = lp; }
+    bool   is_loop_head() const          { return _loop && _loop->head() == this; }
+    void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
+    bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    bool   is_visited() const            { return has_pre_order(); }
+    bool   is_post_visited() const       { return has_post_order(); }
+    bool   is_clonable_exit(Loop* lp);
+    Block* looping_succ(Loop* lp);       // Successor inside of loop
+    bool   is_single_entry_loop_head() const {
+      if (!is_loop_head()) return false;
+      for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent())
+        if (lp->is_irreducible()) return false;
+      return true;
+    }
 
     void   print_value_on(outputStream* st) const PRODUCT_RETURN;
     void   print_on(outputStream* st) const       PRODUCT_RETURN;
   };
 
+  // Loop
+  class Loop : public ResourceObj {
+  private:
+    Loop* _parent;
+    Loop* _sibling;  // List of siblings, null terminated
+    Loop* _child;    // Head of child list threaded thru sibling pointer
+    Block* _head;    // Head of loop
+    Block* _tail;    // Tail of loop
+    bool   _irreducible;
+    LocalSet _def_locals;
+
+  public:
+    Loop(Block* head, Block* tail) :
+      _head(head),   _tail(tail),
+      _parent(NULL), _sibling(NULL), _child(NULL),
+      _irreducible(false), _def_locals() {}
+
+    Loop* parent()  const { return _parent; }
+    Loop* sibling() const { return _sibling; }
+    Loop* child()   const { return _child; }
+    Block* head()   const { return _head; }
+    Block* tail()   const { return _tail; }
+    void set_parent(Loop* p)  { _parent = p; }
+    void set_sibling(Loop* s) { _sibling = s; }
+    void set_child(Loop* c)   { _child = c; }
+    void set_head(Block* hd)  { _head = hd; }
+    void set_tail(Block* tl)  { _tail = tl; }
+
+    int depth() const;              // nesting depth
+
+    // Returns true if lp is a nested loop or us.
+    bool contains(Loop* lp) const;
+    bool contains(Block* blk) const { return contains(blk->loop()); }
+
+    // Data flow on locals
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
+    // Merge the branch lp into this branch, sorting on the loop head
+    // pre_orders. Returns the new branch.
+    Loop* sorted_merge(Loop* lp);
+
+    // Mark non-single entry to loop
+    void set_irreducible(Block* entry) {
+      _irreducible = true;
+      entry->set_irreducible_entry(true);
+    }
+    bool is_irreducible() const { return _irreducible; }
+
+    bool is_root() const { return _tail->pre_order() == max_jint; }
+
+    void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
+  };
+
+  // Postorder iteration over the loop tree.
+  class PostorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PostorderLoops(Loop* root) : _root(root), _current(root) {
+      while (_current->child() != NULL) {
+        _current = _current->child();
+      }
+    }
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
+  // Preorder iteration over the loop tree.
+  class PreorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PreorderLoops(Loop* root) : _root(root), _current(root) {}
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
   // Standard indexes of successors, for various bytecodes.
   enum {
     FALL_THROUGH   = 0,  // normal control
@@ -619,6 +795,12 @@
   // Tells if a given instruction is able to generate an exception edge.
   bool can_trap(ciBytecodeStream& str);
 
+  // Clone the loop heads. Returns true if any cloning occurred.
+  bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
+  // Clone lp's head and replace tail's successors with clone.
+  Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
 public:
   // Return the block beginning at bci which has a JsrSet compatible
   // with jsrs.
@@ -627,8 +809,8 @@
   // block factory
   Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy);
 
-  // How many of the blocks have the private_copy bit set?
-  int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
+  // How many of the blocks have the backedge_copy bit set?
+  int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
 
   // Return an existing block containing bci which has a JsrSet compatible
   // with jsrs, or NULL if there is none.
@@ -651,11 +833,18 @@
                                       return _block_map[po]; }
   Block* start_block() const        { return pre_order_at(start_block_num()); }
   int start_block_num() const       { return 0; }
+  Block* rpo_at(int rpo) const      { assert(0 <= rpo && rpo < block_count(), "out of bounds");
+                                      return _block_map[rpo]; }
+  int next_pre_order()              { return _next_pre_order; }
+  int inc_next_pre_order()          { return _next_pre_order++; }
 
 private:
   // A work list used during flow analysis.
   Block* _work_list;
 
+  // List of blocks in reverse post order
+  Block* _rpo_list;
+
   // Next Block::_pre_order.  After mapping, doubles as block_count.
   int _next_pre_order;
 
@@ -668,6 +857,15 @@
   // Add a basic block to our work list.
   void add_to_work_list(Block* block);
 
+  // Prepend a basic block to rpo list.
+  void prepend_to_rpo_list(Block* blk) {
+    blk->set_rpo_next(_rpo_list);
+    _rpo_list = blk;
+  }
+
+  // Root of the loop tree
+  Loop* _loop_tree_root;
+
   // State used for make_jsr_record
   int _jsr_count;
   GrowableArray<JsrRecord*>* _jsr_records;
@@ -677,6 +875,9 @@
   // does not already exist.
   JsrRecord* make_jsr_record(int entry_address, int return_address);
 
+  void  set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; }
+  Loop* loop_tree_root()              { return _loop_tree_root; }
+
 private:
   // Get the initial state for start_bci:
   const StateVector* get_start_state();
@@ -703,6 +904,15 @@
   // necessary.
   void flow_types();
 
+  // Perform the depth first type flow analysis. Helper for flow_types.
+  void df_flow_types(Block* start,
+                     bool do_flow,
+                     StateVector* temp_vector,
+                     JsrSet* temp_set);
+
+  // Incrementally build loop tree.
+  void build_loop_tree(Block* blk);
+
   // Create the block map, which indexes blocks in pre_order.
   void map_blocks();
 
@@ -711,4 +921,6 @@
   void do_flow();
 
   void print_on(outputStream* st) const PRODUCT_RETURN;
+
+  void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
 };
--- a/hotspot/src/share/vm/code/nmethod.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1350,11 +1350,7 @@
       return false;
     }
   }
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-    // Cannot do this test if verification of the UseParallelOldGC
-    // code using the PSMarkSweep code is being done.
-    assert(unloading_occurred, "Inconsistency in unloading");
-  }
+  assert(unloading_occurred, "Inconsistency in unloading");
   make_unloaded(is_alive, obj);
   return true;
 }
--- a/hotspot/src/share/vm/compiler/methodLiveness.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/compiler/methodLiveness.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -76,8 +76,9 @@
   BitCounter() : _count(0) {}
 
   // Callback when bit in map is set
-  virtual void do_bit(size_t offset) {
+  virtual bool do_bit(size_t offset) {
     _count++;
+    return true;
   }
 
   int count() {
@@ -467,7 +468,7 @@
     bci = 0;
   }
 
-  MethodLivenessResult answer(NULL,0);
+  MethodLivenessResult answer((uintptr_t*)NULL,0);
 
   if (_block_count > 0) {
     if (TimeLivenessAnalysis) _time_total.start();
--- a/hotspot/src/share/vm/compiler/methodLiveness.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/compiler/methodLiveness.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -29,7 +29,7 @@
   bool _is_valid;
 
  public:
-  MethodLivenessResult(uintptr_t* map, idx_t size_in_bits)
+  MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits)
     : BitMap(map, size_in_bits)
     , _is_valid(false)
   {}
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -790,7 +790,7 @@
 }
 
 
-HeapWord* CompactibleFreeListSpace::block_start(const void* p) const {
+HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const {
   NOT_PRODUCT(verify_objects_initialized());
   return _bt.block_start(p);
 }
@@ -2286,9 +2286,9 @@
 }
 
 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  guarantee(size % 2 == 0, "Odd slots should be empty");
-  for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL;
-    fc = fc->next()) {
+  FreeChunk* fc =  _indexedFreeList[size].head();
+  guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
+  for (; fc != NULL; fc = fc->next()) {
     guarantee(fc->size() == size, "Size inconsistency");
     guarantee(fc->isFree(), "!free?");
     guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
@@ -2790,10 +2790,11 @@
   assert(n_threads > 0, "Unexpected n_threads argument");
   const size_t task_size = rescan_task_size();
   size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size;
-  assert((used_region().start() + (n_tasks - 1)*task_size <
-          used_region().end()) &&
-         (used_region().start() + n_tasks*task_size >=
-          used_region().end()), "n_task calculation incorrect");
+  assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect");
+  assert(n_tasks == 0 ||
+         ((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) &&
+          (used_region().start() + n_tasks*task_size >= used_region().end())),
+         "n_tasks calculation incorrect");
   SequentialSubTasksDone* pst = conc_par_seq_tasks();
   assert(!pst->valid(), "Clobbering existing data?");
   pst->set_par_threads(n_threads);
@@ -2833,7 +2834,7 @@
   assert(n_tasks == 0 ||
          ((span.start() + (n_tasks - 1)*task_size < span.end()) &&
           (span.start() + n_tasks*task_size >= span.end())),
-         "n_task calculation incorrect");
+         "n_tasks calculation incorrect");
   SequentialSubTasksDone* pst = conc_par_seq_tasks();
   assert(!pst->valid(), "Clobbering existing data?");
   pst->set_par_threads(n_threads);
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -502,7 +502,7 @@
 
   void blk_iterate(BlkClosure* cl);
   void blk_iterate_careful(BlkClosureCareful* cl);
-  HeapWord* block_start(const void* p) const;
+  HeapWord* block_start_const(const void* p) const;
   HeapWord* block_start_careful(const void* p) const;
   size_t block_size(const HeapWord* p) const;
   size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -2761,13 +2761,14 @@
  public:
   VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}
 
-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
     HeapWord* addr = _marks->offsetToHeapWord(offset);
     if (!_marks->isMarked(addr)) {
       oop(addr)->print();
       gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
       _failed = true;
     }
+    return true;
   }
 
   bool failed() { return _failed; }
@@ -3650,6 +3651,7 @@
   CompactibleFreeListSpace*  _cms_space;
   CompactibleFreeListSpace* _perm_space;
   HeapWord*     _global_finger;
+  HeapWord*     _restart_addr;
 
   //  Exposed here for yielding support
   Mutex* const _bit_map_lock;
@@ -3680,7 +3682,7 @@
     _term.set_task(this);
     assert(_cms_space->bottom() < _perm_space->bottom(),
            "Finger incorrectly initialized below");
-    _global_finger = _cms_space->bottom();
+    _restart_addr = _global_finger = _cms_space->bottom();
   }
 
 
@@ -3698,6 +3700,10 @@
   bool result() { return _result; }
 
   void reset(HeapWord* ra) {
+    assert(_global_finger >= _cms_space->end(),  "Postcondition of ::work(i)");
+    assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)");
+    assert(ra             <  _perm_space->end(), "ra too large");
+    _restart_addr = _global_finger = ra;
     _term.reset_for_reuse();
   }
 
@@ -3842,16 +3848,24 @@
   int n_tasks = pst->n_tasks();
   // We allow that there may be no tasks to do here because
   // we are restarting after a stack overflow.
-  assert(pst->valid() || n_tasks == 0, "Uninitializd use?");
+  assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
   int nth_task = 0;
 
-  HeapWord* start = sp->bottom();
+  HeapWord* aligned_start = sp->bottom();
+  if (sp->used_region().contains(_restart_addr)) {
+    // Align down to a card boundary for the start of 0th task
+    // for this space.
+    aligned_start =
+      (HeapWord*)align_size_down((uintptr_t)_restart_addr,
+                                 CardTableModRefBS::card_size);
+  }
+
   size_t chunk_size = sp->marking_task_size();
   while (!pst->is_task_claimed(/* reference */ nth_task)) {
     // Having claimed the nth task in this space,
     // compute the chunk that it corresponds to:
-    MemRegion span = MemRegion(start + nth_task*chunk_size,
-                               start + (nth_task+1)*chunk_size);
+    MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
+                               aligned_start + (nth_task+1)*chunk_size);
     // Try and bump the global finger via a CAS;
     // note that we need to do the global finger bump
     // _before_ taking the intersection below, because
@@ -3866,26 +3880,40 @@
     // beyond the "top" address of the space.
     span = span.intersection(sp->used_region());
     if (!span.is_empty()) {  // Non-null task
-      // We want to skip the first object because
-      // the protocol is to scan any object in its entirety
-      // that _starts_ in this span; a fortiori, any
-      // object starting in an earlier span is scanned
-      // as part of an earlier claimed task.
-      // Below we use the "careful" version of block_start
-      // so we do not try to navigate uninitialized objects.
-      HeapWord* prev_obj = sp->block_start_careful(span.start());
-      // Below we use a variant of block_size that uses the
-      // Printezis bits to avoid waiting for allocated
-      // objects to become initialized/parsable.
-      while (prev_obj < span.start()) {
-        size_t sz = sp->block_size_no_stall(prev_obj, _collector);
-        if (sz > 0) {
-          prev_obj += sz;
+      HeapWord* prev_obj;
+      assert(!span.contains(_restart_addr) || nth_task == 0,
+             "Inconsistency");
+      if (nth_task == 0) {
+        // For the 0th task, we'll not need to compute a block_start.
+        if (span.contains(_restart_addr)) {
+          // In the case of a restart because of stack overflow,
+          // we might additionally skip a chunk prefix.
+          prev_obj = _restart_addr;
         } else {
-          // In this case we may end up doing a bit of redundant
-          // scanning, but that appears unavoidable, short of
-          // locking the free list locks; see bug 6324141.
-          break;
+          prev_obj = span.start();
+        }
+      } else {
+        // We want to skip the first object because
+        // the protocol is to scan any object in its entirety
+        // that _starts_ in this span; a fortiori, any
+        // object starting in an earlier span is scanned
+        // as part of an earlier claimed task.
+        // Below we use the "careful" version of block_start
+        // so we do not try to navigate uninitialized objects.
+        prev_obj = sp->block_start_careful(span.start());
+        // Below we use a variant of block_size that uses the
+        // Printezis bits to avoid waiting for allocated
+        // objects to become initialized/parsable.
+        while (prev_obj < span.start()) {
+          size_t sz = sp->block_size_no_stall(prev_obj, _collector);
+          if (sz > 0) {
+            prev_obj += sz;
+          } else {
+            // In this case we may end up doing a bit of redundant
+            // scanning, but that appears unavoidable, short of
+            // locking the free list locks; see bug 6324141.
+            break;
+          }
         }
       }
       if (prev_obj < span.end()) {
@@ -3938,12 +3966,14 @@
   void handle_stack_overflow(HeapWord* lost);
 };
 
-// Grey object rescan during work stealing phase --
-// the salient assumption here is that stolen oops must
-// always be initialized, so we do not need to check for
-// uninitialized objects before scanning here.
+// Grey object scanning during work stealing phase --
+// the salient assumption here is that any references
+// that are in these stolen objects being scanned must
+// already have been initialized (else they would not have
+// been published), so we do not need to check for
+// uninitialized objects before pushing here.
 void Par_ConcMarkingClosure::do_oop(oop obj) {
-  assert(obj->is_oop_or_null(), "expected an oop or NULL");
+  assert(obj->is_oop_or_null(true), "expected an oop or NULL");
   HeapWord* addr = (HeapWord*)obj;
   // Check if oop points into the CMS generation
   // and is not marked
@@ -4001,7 +4031,7 @@
 // in CMSCollector's _restart_address.
 void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
   // We need to do this under a mutex to prevent other
-  // workers from interfering with the expansion below.
+  // workers from interfering with the work done below.
   MutexLockerEx ml(_overflow_stack->par_lock(),
                    Mutex::_no_safepoint_check_flag);
   // Remember the least grey address discarded
@@ -4640,8 +4670,11 @@
       startTimer();
       sample_eden();
       // Get and clear dirty region from card table
-      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean(
-                                    MemRegion(nextAddr, endAddr));
+      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
+                                    MemRegion(nextAddr, endAddr),
+                                    true,
+                                    CardTableModRefBS::precleaned_card_val());
+
       assert(dirtyRegion.start() >= nextAddr,
              "returned region inconsistent?");
     }
@@ -5409,8 +5442,8 @@
                               &mrias_cl);
   {
     TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty);
-    // Iterate over the dirty cards, marking them precleaned, and
-    // setting the corresponding bits in the mod union table.
+    // Iterate over the dirty cards, setting the corresponding bits in the
+    // mod union table.
     {
       ModUnionClosure modUnionClosure(&_modUnionTable);
       _ct->ct_bs()->dirty_card_iterate(
@@ -6182,7 +6215,7 @@
 // bit vector itself. That is done by a separate call CMSBitMap::allocate()
 // further below.
 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
-  _bm(NULL,0),
+  _bm(),
   _shifter(shifter),
   _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
 {
@@ -6207,7 +6240,7 @@
   }
   assert(_virtual_space.committed_size() == brs.size(),
          "didn't reserve backing store for all of CMS bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
+  _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
          _bmWordSize, "inconsistency in bit map sizing");
   _bm.set_size(_bmWordSize >> _shifter);
@@ -6554,7 +6587,7 @@
   if (obj != NULL) {
     // Ignore mark word because this could be an already marked oop
     // that may be chained at the end of the overflow list.
-    assert(obj->is_oop(), "expected an oop");
+    assert(obj->is_oop(true), "expected an oop");
     HeapWord* addr = (HeapWord*)obj;
     if (_span.contains(addr) &&
         !_bit_map->isMarked(addr)) {
@@ -6845,10 +6878,10 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsClosure::do_bit(size_t offset) {
+bool MarkFromRootsClosure::do_bit(size_t offset) {
   if (_skipBits > 0) {
     _skipBits--;
-    return;
+    return true;
   }
   // convert offset into a HeapWord*
   HeapWord* addr = _bitMap->startWord() + offset;
@@ -6886,10 +6919,11 @@
           } // ...else the setting of klass will dirty the card anyway.
         }
       DEBUG_ONLY(})
-      return;
+      return true;
     }
   }
   scanOopsInOop(addr);
+  return true;
 }
 
 // We take a break if we've been at this for a while,
@@ -7023,10 +7057,10 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void Par_MarkFromRootsClosure::do_bit(size_t offset) {
+bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
   if (_skip_bits > 0) {
     _skip_bits--;
-    return;
+    return true;
   }
   // convert offset into a HeapWord*
   HeapWord* addr = _bit_map->startWord() + offset;
@@ -7041,10 +7075,11 @@
     if (p->klass_or_null() == NULL || !p->is_parsable()) {
       // in the case of Clean-on-Enter optimization, redirty card
       // and avoid clearing card by increasing  the threshold.
-      return;
+      return true;
     }
   }
   scan_oops_in_oop(addr);
+  return true;
 }
 
 void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
@@ -7167,7 +7202,7 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsVerifyClosure::do_bit(size_t offset) {
+bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
   // convert offset into a HeapWord*
   HeapWord* addr = _verification_bm->startWord() + offset;
   assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
@@ -7195,6 +7230,7 @@
     new_oop->oop_iterate(&_pam_verify_closure);
   }
   assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
+  return true;
 }
 
 PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
@@ -7289,6 +7325,8 @@
   _should_remember_klasses(collector->should_unload_classes())
 { }
 
+// Assumes thread-safe access by callers, who are
+// responsible for mutual exclusion.
 void CMSCollector::lower_restart_addr(HeapWord* low) {
   assert(_span.contains(low), "Out of bounds addr");
   if (_restart_addr == NULL) {
@@ -7314,7 +7352,7 @@
 // in CMSCollector's _restart_address.
 void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
   // We need to do this under a mutex to prevent other
-  // workers from interfering with the expansion below.
+  // workers from interfering with the work done below.
   MutexLockerEx ml(_overflow_stack->par_lock(),
                    Mutex::_no_safepoint_check_flag);
   // Remember the least grey address discarded
@@ -7438,8 +7476,12 @@
 // Grey object rescan during pre-cleaning and second checkpoint phases --
 // the non-parallel version (the parallel version appears further below.)
 void PushAndMarkClosure::do_oop(oop obj) {
-  // If _concurrent_precleaning, ignore mark word verification
-  assert(obj->is_oop_or_null(_concurrent_precleaning),
+  // Ignore mark word verification. If during concurrent precleaning,
+  // the object monitor may be locked. If during the checkpoint
+  // phases, the object may already have been reached by a  different
+  // path and may be at the end of the global overflow list (so
+  // the mark word may be NULL).
+  assert(obj->is_oop_or_null(true /* ignore mark word */),
          "expected an oop or NULL");
   HeapWord* addr = (HeapWord*)obj;
   // Check if oop points into the CMS generation
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1327,7 +1327,7 @@
                        CMSMarkStack*  markStack,
                        CMSMarkStack*  revisitStack,
                        bool should_yield, bool verifying = false);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   void reset(HeapWord* addr);
   inline void do_yield_check();
 
@@ -1363,7 +1363,7 @@
                        CMSMarkStack*  overflow_stack,
                        CMSMarkStack*  revisit_stack,
                        bool should_yield);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   inline void do_yield_check();
 
  private:
@@ -1411,7 +1411,7 @@
                              CMSBitMap* verification_bm,
                              CMSBitMap* cms_bm,
                              CMSMarkStack*  mark_stack);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   void reset(HeapWord* addr);
 };
 
@@ -1420,8 +1420,9 @@
 // "empty" (i.e. the bit vector doesn't have any 1-bits).
 class FalseBitMapClosure: public BitMapClosure {
  public:
-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
     guarantee(false, "Should not have a 1 bit");
+    return true;
   }
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A BufferingOops closure tries to separate out the cost of finding roots
+// from the cost of applying closures to them.  It maintains an array of
+// ref-containing locations.  Until the array is full, applying the closure
+// to an oop* merely records that location in the array.  Since this
+// closure app cost is small, an elapsed timer can approximately attribute
+// all of this cost to the cost of finding the roots.  When the array fills
+// up, the wrapped closure is applied to all elements, keeping track of
+// this elapsed time of this process, and leaving the array empty.
+// The caller must be sure to call "done" to process any unprocessed
+// buffered entriess.
+
+class Generation;
+class HeapRegion;
+
+class BufferingOopClosure: public OopClosure {
+protected:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop          *_buffer[BufferLength];
+  oop         **_buffer_top;
+  oop         **_buffer_curr;
+
+  OopClosure  *_oc;
+  double       _closure_app_seconds;
+
+  void process_buffer () {
+
+    double start = os::elapsedTime();
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      _oc->do_oop(*curr);
+    }
+    _buffer_curr = _buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopClosure (OopClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _closure_app_seconds(0.0) { }
+};
+
+class BufferingOopsInGenClosure: public OopsInGenClosure {
+  BufferingOopClosure _boc;
+  OopsInGenClosure* _oc;
+public:
+  BufferingOopsInGenClosure(OopsInGenClosure *oc) :
+    _boc(oc), _oc(oc) {}
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop* p) {
+    assert(generation()->is_in_reserved(p), "Must be in!");
+    _boc.do_oop(p);
+  }
+
+  void done() {
+    _boc.done();
+  }
+
+  double closure_app_seconds () {
+    return _boc.closure_app_seconds();
+  }
+
+  void set_generation(Generation* gen) {
+    OopsInGenClosure::set_generation(gen);
+    _oc->set_generation(gen);
+  }
+
+  void reset_generation() {
+    // Make sure we finish the current work with the current generation.
+    _boc.done();
+    OopsInGenClosure::reset_generation();
+    _oc->reset_generation();
+  }
+
+};
+
+
+class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure {
+private:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop                      *_buffer[BufferLength];
+  oop                     **_buffer_top;
+  oop                     **_buffer_curr;
+
+  HeapRegion               *_hr_buffer[BufferLength];
+  HeapRegion              **_hr_curr;
+
+  OopsInHeapRegionClosure  *_oc;
+  double                    _closure_app_seconds;
+
+  void process_buffer () {
+
+    assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer),
+           "the two lengths should be the same");
+
+    double start = os::elapsedTime();
+    HeapRegion **hr_curr = _hr_buffer;
+    HeapRegion *hr_prev = NULL;
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      HeapRegion *region = *hr_curr;
+      if (region != hr_prev) {
+        _oc->set_region(region);
+        hr_prev = region;
+      }
+      _oc->do_oop(*curr);
+      ++hr_curr;
+    }
+    _buffer_curr = _buffer;
+    _hr_curr = _hr_buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop *p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+    *_hr_curr = _from;
+    ++_hr_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _hr_curr(_hr_buffer),
+    _closure_app_seconds(0.0) { }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,409 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_collectionSetChooser.cpp.incl"
+
+CSetChooserCache::CSetChooserCache() {
+  for (int i = 0; i < CacheLength; ++i)
+    _cache[i] = NULL;
+  clear();
+}
+
+void CSetChooserCache::clear() {
+  _occupancy = 0;
+  _first = 0;
+  for (int i = 0; i < CacheLength; ++i) {
+    HeapRegion *hr = _cache[i];
+    if (hr != NULL)
+      hr->set_sort_index(-1);
+    _cache[i] = NULL;
+  }
+}
+
+#ifndef PRODUCT
+bool CSetChooserCache::verify() {
+  int index = _first;
+  HeapRegion *prev = NULL;
+  for (int i = 0; i < _occupancy; ++i) {
+    guarantee(_cache[index] != NULL, "cache entry should not be empty");
+    HeapRegion *hr = _cache[index];
+    guarantee(!hr->is_young(), "should not be young!");
+    if (prev != NULL) {
+      guarantee(prev->gc_efficiency() >= hr->gc_efficiency(),
+                "cache should be correctly ordered");
+    }
+    guarantee(hr->sort_index() == get_sort_index(index),
+              "sort index should be correct");
+    index = trim_index(index + 1);
+    prev = hr;
+  }
+
+  for (int i = 0; i < (CacheLength - _occupancy); ++i) {
+    guarantee(_cache[index] == NULL, "cache entry should be empty");
+    index = trim_index(index + 1);
+  }
+
+  guarantee(index == _first, "we should have reached where we started from");
+  return true;
+}
+#endif // PRODUCT
+
+void CSetChooserCache::insert(HeapRegion *hr) {
+  assert(!is_full(), "cache should not be empty");
+  hr->calc_gc_efficiency();
+
+  int empty_index;
+  if (_occupancy == 0) {
+    empty_index = _first;
+  } else {
+    empty_index = trim_index(_first + _occupancy);
+    assert(_cache[empty_index] == NULL, "last slot should be empty");
+    int last_index = trim_index(empty_index - 1);
+    HeapRegion *last = _cache[last_index];
+    assert(last != NULL,"as the cache is not empty, last should not be empty");
+    while (empty_index != _first &&
+           last->gc_efficiency() < hr->gc_efficiency()) {
+      _cache[empty_index] = last;
+      last->set_sort_index(get_sort_index(empty_index));
+      empty_index = last_index;
+      last_index = trim_index(last_index - 1);
+      last = _cache[last_index];
+    }
+  }
+  _cache[empty_index] = hr;
+  hr->set_sort_index(get_sort_index(empty_index));
+
+  ++_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+HeapRegion *CSetChooserCache::remove_first() {
+  if (_occupancy > 0) {
+    assert(_cache[_first] != NULL, "cache should have at least one region");
+    HeapRegion *ret = _cache[_first];
+    _cache[_first] = NULL;
+    ret->set_sort_index(-1);
+    --_occupancy;
+    _first = trim_index(_first + 1);
+    assert(verify(), "cache should be consistent");
+    return ret;
+  } else {
+    return NULL;
+  }
+}
+
+// this is a bit expensive... but we expect that it should not be called
+// to often.
+void CSetChooserCache::remove(HeapRegion *hr) {
+  assert(_occupancy > 0, "cache should not be empty");
+  assert(hr->sort_index() < -1, "should already be in the cache");
+  int index = get_index(hr->sort_index());
+  assert(_cache[index] == hr, "index should be correct");
+  int next_index = trim_index(index + 1);
+  int last_index = trim_index(_first + _occupancy - 1);
+  while (index != last_index) {
+    assert(_cache[next_index] != NULL, "should not be null");
+    _cache[index] = _cache[next_index];
+    _cache[index]->set_sort_index(get_sort_index(index));
+
+    index = next_index;
+    next_index = trim_index(next_index+1);
+  }
+  assert(index == last_index, "should have reached the last one");
+  _cache[index] = NULL;
+  hr->set_sort_index(-1);
+  --_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
+  if (hr1 == NULL) {
+    if (hr2 == NULL) return 0;
+    else return 1;
+  } else if (hr2 == NULL) {
+    return -1;
+  }
+  if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1;
+  else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1;
+  else return 0;
+}
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  return orderRegions(*hr1p, *hr2p);
+}
+
+CollectionSetChooser::CollectionSetChooser() :
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _markedRegions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                             (void*)&_markedRegions,
+                                             ResourceObj::C_HEAP),
+                  100),
+                 true),
+  _curMarkedIndex(0),
+  _numMarkedRegions(0),
+  _unmarked_age_1_returned_as_new(false),
+  _first_par_unreserved_idx(0)
+{}
+
+
+
+#ifndef PRODUCT
+bool CollectionSetChooser::verify() {
+  int index = 0;
+  guarantee(_curMarkedIndex <= _numMarkedRegions,
+            "_curMarkedIndex should be within bounds");
+  while (index < _curMarkedIndex) {
+    guarantee(_markedRegions.at(index++) == NULL,
+              "all entries before _curMarkedIndex should be NULL");
+  }
+  HeapRegion *prev = NULL;
+  while (index < _numMarkedRegions) {
+    HeapRegion *curr = _markedRegions.at(index++);
+    if (curr != NULL) {
+      int si = curr->sort_index();
+      guarantee(!curr->is_young(), "should not be young!");
+      guarantee(si > -1 && si == (index-1), "sort index invariant");
+      if (prev != NULL) {
+        guarantee(orderRegions(prev, curr) != 1, "regions should be sorted");
+      }
+      prev = curr;
+    }
+  }
+  return _cache.verify();
+}
+#endif
+
+bool
+CollectionSetChooser::addRegionToCache() {
+  assert(!_cache.is_full(), "cache should not be full");
+
+  HeapRegion *hr = NULL;
+  while (hr == NULL && _curMarkedIndex < _numMarkedRegions) {
+    hr = _markedRegions.at(_curMarkedIndex++);
+  }
+  if (hr == NULL)
+    return false;
+  assert(!hr->is_young(), "should not be young!");
+  assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant");
+  _markedRegions.at_put(hr->sort_index(), NULL);
+  _cache.insert(hr);
+  assert(!_cache.is_empty(), "cache should not be empty");
+  assert(verify(), "cache should be consistent");
+  return false;
+}
+
+void
+CollectionSetChooser::fillCache() {
+  while (!_cache.is_full() && addRegionToCache()) {
+  }
+}
+
+void
+CollectionSetChooser::sortMarkedHeapRegions() {
+  guarantee(_cache.is_empty(), "cache should be empty");
+  // First trim any unused portion of the top in the parallel case.
+  if (_first_par_unreserved_idx > 0) {
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Truncating _markedRegions from %d to %d.\n",
+                          _markedRegions.length(), _first_par_unreserved_idx);
+    }
+    assert(_first_par_unreserved_idx <= _markedRegions.length(),
+           "Or we didn't reserved enough length");
+    _markedRegions.trunc_to(_first_par_unreserved_idx);
+  }
+  _markedRegions.sort(orderRegions);
+  assert(_numMarkedRegions <= _markedRegions.length(), "Requirement");
+  assert(_numMarkedRegions == 0
+         || _markedRegions.at(_numMarkedRegions-1) != NULL,
+         "Testing _numMarkedRegions");
+  assert(_numMarkedRegions == _markedRegions.length()
+         || _markedRegions.at(_numMarkedRegions) == NULL,
+         "Testing _numMarkedRegions");
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("     Sorted %d marked regions.", _numMarkedRegions);
+  }
+  for (int i = 0; i < _numMarkedRegions; i++) {
+    assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
+    _markedRegions.at(i)->set_sort_index(i);
+    if (G1PrintRegionLivenessInfo > 0) {
+      if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:");
+      if (i < G1PrintRegionLivenessInfo ||
+          (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) {
+        HeapRegion* hr = _markedRegions.at(i);
+        size_t u = hr->used();
+        gclog_or_tty->print_cr("  Region %d: %d used, %d max live, %5.2f%%.",
+                      i, u, hr->max_live_bytes(),
+                      100.0*(float)hr->max_live_bytes()/(float)u);
+      }
+    }
+  }
+  if (G1PolicyVerbose > 1)
+    printSortedHeapRegions();
+  assert(verify(), "should now be sorted");
+}
+
+void
+printHeapRegion(HeapRegion *hr) {
+  if (hr->isHumongous())
+    gclog_or_tty->print("H: ");
+  if (hr->in_collection_set())
+    gclog_or_tty->print("CS: ");
+  if (hr->popular())
+    gclog_or_tty->print("pop: ");
+  gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) "
+                         "[" PTR_FORMAT ", " PTR_FORMAT"] "
+                         "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.",
+                         hr, hr->is_young() ? "Y " : "  ",
+                         hr->is_marked()? "M1" : "M0",
+                         hr->bottom(), hr->end(),
+                         hr->used()/K, hr->garbage_bytes()/K);
+}
+
+void
+CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
+  assert(!hr->isHumongous(),
+         "Humongous regions shouldn't be added to the collection set");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.append(hr);
+  _numMarkedRegions++;
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::
+prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
+  _first_par_unreserved_idx = 0;
+  size_t max_waste = ParallelGCThreads * chunkSize;
+  // it should be aligned with respect to chunkSize
+  size_t aligned_n_regions =
+                     (n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
+  assert( aligned_n_regions % chunkSize == 0, "should be aligned" );
+  _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
+}
+
+jint
+CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
+  jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
+  assert(_markedRegions.length() > res + n_regions - 1,
+         "Should already have been expanded");
+  return res - n_regions;
+}
+
+void
+CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
+  assert(_markedRegions.at(index) == NULL, "precondition");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.at_put(index, hr);
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) {
+  (void)Atomic::add(inc_by, &_numMarkedRegions);
+}
+
+void
+CollectionSetChooser::clearMarkedHeapRegions(){
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    HeapRegion* r =   _markedRegions.at(i);
+    if (r != NULL) r->set_sort_index(-1);
+  }
+  _markedRegions.clear();
+  _curMarkedIndex = 0;
+  _numMarkedRegions = 0;
+  _cache.clear();
+};
+
+void
+CollectionSetChooser::updateAfterFullCollection() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  clearMarkedHeapRegions();
+}
+
+void
+CollectionSetChooser::printSortedHeapRegions() {
+  gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
+                _numMarkedRegions);
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    printHeapRegion(_markedRegions.at(i));
+  }
+  gclog_or_tty->print_cr("Done sorted heap region print");
+}
+
+void CollectionSetChooser::removeRegion(HeapRegion *hr) {
+  int si = hr->sort_index();
+  assert(si == -1 || hr->is_marked(), "Sort index not valid.");
+  if (si > -1) {
+    assert(_markedRegions.at(si) == hr, "Sort index not valid." );
+    _markedRegions.at_put(si, NULL);
+  } else if (si < -1) {
+    assert(_cache.region_in_cache(hr), "should be in the cache");
+    _cache.remove(hr);
+    assert(hr->sort_index() == -1, "sort index invariant");
+  }
+  hr->set_sort_index(-1);
+}
+
+// if time_remaining < 0.0, then this method should try to return
+// a region, whether it fits within the remaining time or not
+HeapRegion*
+CollectionSetChooser::getNextMarkedRegion(double time_remaining,
+                                          double avg_prediction) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  fillCache();
+  if (_cache.is_empty()) {
+    assert(_curMarkedIndex == _numMarkedRegions,
+           "if cache is empty, list should also be empty");
+    return NULL;
+  }
+
+  HeapRegion *hr = _cache.get_first();
+  assert(hr != NULL, "if cache not empty, first entry should be non-null");
+  double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false);
+
+  if (g1p->adaptive_young_list_length()) {
+    if (time_remaining - predicted_time < 0.0) {
+      g1h->check_if_region_is_too_expensive(predicted_time);
+      return NULL;
+    }
+  } else {
+    if (predicted_time > 2.0 * avg_prediction) {
+      return NULL;
+    }
+  }
+
+  HeapRegion *hr2 = _cache.remove_first();
+  assert(hr == hr2, "cache contents should not have changed");
+
+  return hr;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// We need to sort heap regions by collection desirability.
+
+class CSetChooserCache {
+private:
+  enum {
+    CacheLength = 16
+  } PrivateConstants;
+
+  HeapRegion*  _cache[CacheLength];
+  int          _occupancy; // number of region in cache
+  int          _first; // "first" region in the cache
+
+  // adding CacheLength to deal with negative values
+  inline int trim_index(int index) {
+    return (index + CacheLength) % CacheLength;
+  }
+
+  inline int get_sort_index(int index) {
+    return -index-2;
+  }
+  inline int get_index(int sort_index) {
+    return -sort_index-2;
+  }
+
+public:
+  CSetChooserCache(void);
+
+  inline int occupancy(void) { return _occupancy; }
+  inline bool is_full()      { return _occupancy == CacheLength; }
+  inline bool is_empty()     { return _occupancy == 0; }
+
+  void clear(void);
+  void insert(HeapRegion *hr);
+  HeapRegion *remove_first(void);
+  void remove (HeapRegion *hr);
+  inline HeapRegion *get_first(void) {
+    return _cache[_first];
+  }
+
+#ifndef PRODUCT
+  bool verify (void);
+  bool region_in_cache(HeapRegion *hr) {
+    int sort_index = hr->sort_index();
+    if (sort_index < -1) {
+      int index = get_index(sort_index);
+      guarantee(index < CacheLength, "should be within bounds");
+      return _cache[index] == hr;
+    } else
+      return 0;
+  }
+#endif // PRODUCT
+};
+
+class CollectionSetChooser: public CHeapObj {
+
+  GrowableArray<HeapRegion*> _markedRegions;
+  int _curMarkedIndex;
+  int _numMarkedRegions;
+  CSetChooserCache _cache;
+
+  // True iff last collection pause ran of out new "age 0" regions, and
+  // returned an "age 1" region.
+  bool _unmarked_age_1_returned_as_new;
+
+  jint _first_par_unreserved_idx;
+
+public:
+
+  HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction);
+
+  CollectionSetChooser();
+
+  void printSortedHeapRegions();
+
+  void sortMarkedHeapRegions();
+  void fillCache();
+  bool addRegionToCache(void);
+  void addMarkedHeapRegion(HeapRegion *hr);
+
+  // Must be called before calls to getParMarkedHeapRegionChunk.
+  // "n_regions" is the number of regions, "chunkSize" the chunk size.
+  void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize);
+  // Returns the first index in a contiguous chunk of "n_regions" indexes
+  // that the calling thread has reserved.  These must be set by the
+  // calling thread using "setMarkedHeapRegion" (to NULL if necessary).
+  jint getParMarkedHeapRegionChunk(jint n_regions);
+  // Set the marked array entry at index to hr.  Careful to claim the index
+  // first if in parallel.
+  void setMarkedHeapRegion(jint index, HeapRegion* hr);
+  // Atomically increment the number of claimed regions by "inc_by".
+  void incNumMarkedHeapRegions(jint inc_by);
+
+  void clearMarkedHeapRegions();
+
+  void updateAfterFullCollection();
+
+  // Ensure that "hr" is not a member of the marked region array or the cache
+  void removeRegion(HeapRegion* hr);
+
+  bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; }
+
+  // Returns true if the used portion of "_markedRegions" is properly
+  // sorted, otherwise asserts false.
+#ifndef PRODUCT
+  bool verify(void);
+  bool regionProperlyOrdered(HeapRegion* r) {
+    int si = r->sort_index();
+    return (si == -1) ||
+      (si > -1 && _markedRegions.at(si) == r) ||
+      (si < -1 && _cache.region_in_cache(r));
+  }
+#endif
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1Refine.cpp.incl"
+
+bool ConcurrentG1Refine::_enabled = false;
+
+ConcurrentG1Refine::ConcurrentG1Refine() :
+  _pya(PYA_continue), _last_pya(PYA_continue),
+  _last_cards_during(), _first_traversal(false),
+  _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
+  _hot_cache(NULL),
+  _def_use_cache(false), _use_cache(false),
+  _n_periods(0), _total_cards(0), _total_travs(0)
+{
+  if (G1ConcRefine) {
+    _cg1rThread = new ConcurrentG1RefineThread(this);
+    assert(cg1rThread() != NULL, "Conc refine should have been created");
+    assert(cg1rThread()->cg1r() == this,
+           "Conc refine thread should refer to this");
+  } else {
+    _cg1rThread = NULL;
+  }
+}
+
+void ConcurrentG1Refine::init() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    _n_card_counts =
+      (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
+    _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
+    for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
+    ModRefBarrierSet* bs = g1h->mr_bs();
+    guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
+    CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
+    _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
+    if (G1ConcRSCountTraversals) {
+      _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      for (int i = 0; i < 256; i++) {
+        _cur_card_count_histo[i] = 0;
+        _cum_card_count_histo[i] = 0;
+      }
+    }
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    _def_use_cache = true;
+    _use_cache = true;
+    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
+    _n_hot = 0;
+    _hot_cache_idx = 0;
+  }
+}
+
+ConcurrentG1Refine::~ConcurrentG1Refine() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    assert(_card_counts != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
+    assert(_cur_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
+    assert(_cum_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    assert(_hot_cache != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
+  }
+}
+
+bool ConcurrentG1Refine::refine() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards();
+  clear_hot_cache();  // Any previous values in this are now invalid.
+  g1h->g1_rem_set()->concurrentRefinementPass(this);
+  _traversals++;
+  unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
+  unsigned cards_during = cards_after-cards_before;
+  // If this is the first traversal in the current enabling
+  // and we did some cards, or if the number of cards found is decreasing
+  // sufficiently quickly, then keep going.  Otherwise, sleep a while.
+  bool res =
+    (_first_traversal && cards_during > 0)
+    ||
+    (!_first_traversal && cards_during * 3 < _last_cards_during * 2);
+  _last_cards_during = cards_during;
+  _first_traversal = false;
+  return res;
+}
+
+void ConcurrentG1Refine::enable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (!_enabled) {
+    _enabled = true;
+    _first_traversal = true; _last_cards_during = 0;
+    G1ConcRefine_mon->notify_all();
+  }
+}
+
+unsigned ConcurrentG1Refine::disable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (_enabled) {
+    _enabled = false;
+    return _traversals;
+  } else {
+    return 0;
+  }
+}
+
+void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
+  G1ConcRefine_mon->lock();
+  while (!_enabled) {
+    G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+  G1ConcRefine_mon->unlock();
+  _traversals = 0;
+};
+
+void ConcurrentG1Refine::set_pya_restart() {
+  // If we're using the log-based RS barrier, the above will cause
+  // in-progress traversals of completed log buffers to quit early; we will
+  // also abandon all other buffers.
+  if (G1RSBarrierUseQueue) {
+    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+    dcqs.abandon_logs();
+    if (_cg1rThread->do_traversal()) {
+      _pya = PYA_restart;
+    } else {
+      _cg1rThread->set_do_traversal(true);
+      // Reset the post-yield actions.
+      _pya = PYA_continue;
+      _last_pya = PYA_continue;
+    }
+  } else {
+    _pya = PYA_restart;
+  }
+}
+
+void ConcurrentG1Refine::set_pya_cancel() {
+  _pya = PYA_cancel;
+}
+
+PostYieldAction ConcurrentG1Refine::get_pya() {
+  if (_pya != PYA_continue) {
+    jint val = _pya;
+    while (true) {
+      jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
+      if (val_read == val) {
+        PostYieldAction res = (PostYieldAction)val;
+        assert(res != PYA_continue, "Only the refine thread should reset.");
+        _last_pya = res;
+        return res;
+      } else {
+        val = val_read;
+      }
+    }
+  }
+  // QQQ WELL WHAT DO WE RETURN HERE???
+  // make up something!
+  return PYA_continue;
+}
+
+PostYieldAction ConcurrentG1Refine::get_last_pya() {
+  PostYieldAction res = _last_pya;
+  _last_pya = PYA_continue;
+  return res;
+}
+
+bool ConcurrentG1Refine::do_traversal() {
+  return _cg1rThread->do_traversal();
+}
+
+int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
+  size_t card_num = (card_ptr - _ct_bot);
+  guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
+  unsigned char cnt = _card_counts[card_num];
+  if (cnt < 255) _card_counts[card_num]++;
+  return cnt;
+  _total_travs++;
+}
+
+jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
+  int count = add_card_count(card_ptr);
+  // Count previously unvisited cards.
+  if (count == 0) _total_cards++;
+  // We'll assume a traversal unless we store it in the cache.
+  if (count < G1ConcRSHotCardLimit) {
+    _total_travs++;
+    return card_ptr;
+  }
+  // Otherwise, it's hot.
+  jbyte* res = NULL;
+  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
+  if (_n_hot == _hot_cache_size) {
+    _total_travs++;
+    res = _hot_cache[_hot_cache_idx];
+    _n_hot--;
+  }
+  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
+  _hot_cache[_hot_cache_idx] = card_ptr;
+  _hot_cache_idx++;
+  if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
+  _n_hot++;
+  return res;
+}
+
+
+void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
+  assert(!use_cache(), "cache should be disabled");
+  int start_ind = _hot_cache_idx-1;
+  for (int i = 0; i < _n_hot; i++) {
+    int ind = start_ind - i;
+    if (ind < 0) ind = ind + _hot_cache_size;
+    jbyte* entry = _hot_cache[ind];
+    if (entry != NULL) {
+      g1rs->concurrentRefineOneCard(entry, worker_i);
+    }
+  }
+  _n_hot = 0;
+  _hot_cache_idx = 0;
+}
+
+void ConcurrentG1Refine::clear_and_record_card_counts() {
+  if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
+  _n_periods++;
+  if (G1ConcRSCountTraversals) {
+    for (size_t i = 0; i < _n_card_counts; i++) {
+      unsigned char bucket = _card_counts[i];
+      _cur_card_count_histo[bucket]++;
+      _card_counts[i] = 0;
+    }
+    gclog_or_tty->print_cr("Card counts:");
+    for (int i = 0; i < 256; i++) {
+      if (_cur_card_count_histo[i] > 0) {
+        gclog_or_tty->print_cr("  %3d: %9d", i, _cur_card_count_histo[i]);
+        _cum_card_count_histo[i] += _cur_card_count_histo[i];
+        _cur_card_count_histo[i] = 0;
+      }
+    }
+  } else {
+    assert(G1ConcRSLogCacheSize > 0, "Logic");
+    Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
+                        _n_card_counts / HeapWordSize);
+  }
+}
+
+void
+ConcurrentG1Refine::
+print_card_count_histo_range(unsigned* histo, int from, int to,
+                             float& cum_card_pct,
+                             float& cum_travs_pct) {
+  unsigned cards = 0;
+  unsigned travs = 0;
+  guarantee(to <= 256, "Precondition");
+  for (int i = from; i < to-1; i++) {
+    cards += histo[i];
+    travs += histo[i] * i;
+  }
+  if (to == 256) {
+    unsigned histo_card_sum = 0;
+    unsigned histo_trav_sum = 0;
+    for (int i = 1; i < 255; i++) {
+      histo_trav_sum += histo[i] * i;
+    }
+    cards += histo[255];
+    // correct traversals for the last one.
+    unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
+    travs += travs_255;
+
+  } else {
+    cards += histo[to-1];
+    travs += histo[to-1] * (to-1);
+  }
+  float fperiods = (float)_n_periods;
+  float f_tot_cards = (float)_total_cards/fperiods;
+  float f_tot_travs = (float)_total_travs/fperiods;
+  if (cards > 0) {
+    float fcards = (float)cards/fperiods;
+    float ftravs = (float)travs/fperiods;
+    if (to == 256) {
+      gclog_or_tty->print(" %4d-       %10.2f%10.2f", from, fcards, ftravs);
+    } else {
+      gclog_or_tty->print(" %4d-%4d   %10.2f%10.2f", from, to-1, fcards, ftravs);
+    }
+    float pct_cards = fcards*100.0/f_tot_cards;
+    cum_card_pct += pct_cards;
+    float pct_travs = ftravs*100.0/f_tot_travs;
+    cum_travs_pct += pct_travs;
+    gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
+                  pct_cards, cum_card_pct,
+                  pct_travs, cum_travs_pct);
+  }
+}
+
+void ConcurrentG1Refine::print_final_card_counts() {
+  if (!G1ConcRSCountTraversals) return;
+
+  gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
+                _total_travs, _total_cards);
+  float fperiods = (float)_n_periods;
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals, %8.2f cards, "
+                "per collection.", (float)_total_travs/fperiods,
+                (float)_total_cards/fperiods);
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals/distinct "
+                "dirty card.\n",
+                _total_cards > 0 ?
+                (float)_total_travs/(float)_total_cards : 0.0);
+
+
+  gclog_or_tty->print_cr("Histogram:\n\n%10s   %10s%10s%10s%10s%10s%10s",
+                "range", "# cards", "# travs", "% cards", "(cum)",
+                "% travs", "(cum)");
+  gclog_or_tty->print_cr("------------------------------------------------------------"
+                "-------------");
+  float cum_cards_pct = 0.0;
+  float cum_travs_pct = 0.0;
+  for (int i = 1; i < 10; i++) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+1,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  for (int i = 10; i < 100; i += 10) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+10,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  print_card_count_histo_range(_cum_card_count_histo, 100, 150,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 200,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 255,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 255, 256,
+                               cum_cards_pct, cum_travs_pct);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward decl
+class ConcurrentG1RefineThread;
+class G1RemSet;
+
+// What to do after a yield:
+enum PostYieldAction {
+  PYA_continue,  // Continue the traversal
+  PYA_restart,   // Restart
+  PYA_cancel     // It's been completed by somebody else: cancel.
+};
+
+class ConcurrentG1Refine {
+  ConcurrentG1RefineThread* _cg1rThread;
+
+  volatile jint _pya;
+  PostYieldAction _last_pya;
+
+  static bool _enabled;  // Protected by G1ConcRefine_mon.
+  unsigned _traversals;
+
+  // Number of cards processed during last refinement traversal.
+  unsigned _first_traversal;
+  unsigned _last_cards_during;
+
+  // The cache for card refinement.
+  bool     _use_cache;
+  bool     _def_use_cache;
+  size_t _n_periods;
+  size_t _total_cards;
+  size_t _total_travs;
+
+  unsigned char*  _card_counts;
+  unsigned _n_card_counts;
+  const jbyte* _ct_bot;
+  unsigned* _cur_card_count_histo;
+  unsigned* _cum_card_count_histo;
+  jbyte**  _hot_cache;
+  int      _hot_cache_size;
+  int      _n_hot;
+  int      _hot_cache_idx;
+
+  // Returns the count of this card after incrementing it.
+  int add_card_count(jbyte* card_ptr);
+
+  void print_card_count_histo_range(unsigned* histo, int from, int to,
+                                    float& cum_card_pct,
+                                    float& cum_travs_pct);
+ public:
+  ConcurrentG1Refine();
+  ~ConcurrentG1Refine();
+
+  void init(); // Accomplish some initialization that has to wait.
+
+  // Enabled Conc refinement, waking up thread if necessary.
+  void enable();
+
+  // Returns the number of traversals performed since this refiner was enabled.
+  unsigned disable();
+
+  // Requires G1ConcRefine_mon to be held.
+  bool enabled() { return _enabled; }
+
+  // Returns only when G1 concurrent refinement has been enabled.
+  void wait_for_ConcurrentG1Refine_enabled();
+
+  // Do one concurrent refinement pass over the card table.  Returns "true"
+  // if heuristics determine that another pass should be done immediately.
+  bool refine();
+
+  // Indicate that an in-progress refinement pass should start over.
+  void set_pya_restart();
+  // Indicate that an in-progress refinement pass should quit.
+  void set_pya_cancel();
+
+  // Get the appropriate post-yield action.  Also sets last_pya.
+  PostYieldAction get_pya();
+
+  // The last PYA read by "get_pya".
+  PostYieldAction get_last_pya();
+
+  bool do_traversal();
+
+  ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
+
+  // If this is the first entry for the slot, writes into the cache and
+  // returns NULL.  If it causes an eviction, returns the evicted pointer.
+  // Otherwise, its a cache hit, and returns NULL.
+  jbyte* cache_insert(jbyte* card_ptr);
+
+  // Process the cached entries.
+  void clean_up_cache(int worker_i, G1RemSet* g1rs);
+
+  // Discard entries in the hot cache.
+  void clear_hot_cache() {
+    _hot_cache_idx = 0; _n_hot = 0;
+  }
+
+  bool hot_cache_is_empty() { return _n_hot == 0; }
+
+  bool use_cache() { return _use_cache; }
+  void set_use_cache(bool b) {
+    if (b) _use_cache = _def_use_cache;
+    else   _use_cache = false;
+  }
+
+  void clear_and_record_card_counts();
+  void print_final_card_counts();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1RefineThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+ConcurrentG1RefineThread::
+ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
+  ConcurrentGCThread(),
+  _cg1r(cg1r),
+  _started(false),
+  _in_progress(false),
+  _do_traversal(false),
+  _vtime_accum(0.0),
+  _co_tracker(G1CRGroup),
+  _interval_ms(5.0)
+{
+  create_and_start();
+}
+
+const long timeout = 200; // ms.
+
+void ConcurrentG1RefineThread::traversalBasedRefinement() {
+  _cg1r->wait_for_ConcurrentG1Refine_enabled();
+  MutexLocker x(G1ConcRefine_mon);
+  while (_cg1r->enabled()) {
+    MutexUnlocker ux(G1ConcRefine_mon);
+    ResourceMark rm;
+    HandleMark   hm;
+
+    if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass");
+    _sts.join();
+    bool no_sleep = _cg1r->refine();
+    _sts.leave();
+    if (!no_sleep) {
+      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+      // We do this only for the timeout; we don't expect this to be signalled.
+      CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
+    }
+  }
+}
+
+void ConcurrentG1RefineThread::queueBasedRefinement() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  // Wait for completed log buffers to exist.
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    while (!_do_traversal && !dcqs.process_completed_buffers() &&
+           !_should_terminate) {
+      DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+
+  if (_should_terminate) {
+    return;
+  }
+
+  // Now we take them off (this doesn't hold locks while it applies
+  // closures.)  (If we did a full collection, then we'll do a full
+  // traversal.
+  _sts.join();
+  if (_do_traversal) {
+    (void)_cg1r->refine();
+    switch (_cg1r->get_last_pya()) {
+    case PYA_cancel: case PYA_continue:
+      // Continue was caught and handled inside "refine".  If it's still
+      // "continue" when we get here, we're done.
+      _do_traversal = false;
+      break;
+    case PYA_restart:
+      assert(_do_traversal, "Because of Full GC.");
+      break;
+    }
+  } else {
+    int n_logs = 0;
+    int lower_limit = 0;
+    double start_vtime_sec; // only used when G1SmoothConcRefine is on
+    int prev_buffer_num; // only used when G1SmoothConcRefine is on
+
+    if (G1SmoothConcRefine) {
+      lower_limit = 0;
+      start_vtime_sec = os::elapsedVTime();
+      prev_buffer_num = (int) dcqs.completed_buffers_num();
+    } else {
+      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
+    }
+    while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) {
+      double end_vtime_sec;
+      double elapsed_vtime_sec;
+      int elapsed_vtime_ms;
+      int curr_buffer_num;
+
+      if (G1SmoothConcRefine) {
+        end_vtime_sec = os::elapsedVTime();
+        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
+        curr_buffer_num = (int) dcqs.completed_buffers_num();
+
+        if (curr_buffer_num > prev_buffer_num ||
+            curr_buffer_num > DCQBarrierProcessCompletedThreshold) {
+          decreaseInterval(elapsed_vtime_ms);
+        } else if (curr_buffer_num < prev_buffer_num) {
+          increaseInterval(elapsed_vtime_ms);
+        }
+      }
+
+      sample_young_list_rs_lengths();
+      _co_tracker.update(false);
+
+      if (G1SmoothConcRefine) {
+        start_vtime_sec = os::elapsedVTime();
+        prev_buffer_num = curr_buffer_num;
+
+        _sts.leave();
+        os::sleep(Thread::current(), (jlong) _interval_ms, false);
+        _sts.join();
+      }
+
+      n_logs++;
+    }
+    // Make sure we harvest the PYA, if any.
+    (void)_cg1r->get_pya();
+  }
+  _sts.leave();
+}
+
+void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  if (g1p->adaptive_young_list_length()) {
+    int regions_visited = 0;
+
+    g1h->young_list_rs_length_sampling_init();
+    while (g1h->young_list_rs_length_sampling_more()) {
+      g1h->young_list_rs_length_sampling_next();
+      ++regions_visited;
+
+      // we try to yield every time we visit 10 regions
+      if (regions_visited == 10) {
+        if (_sts.should_yield()) {
+          _sts.yield("G1 refine");
+          // we just abandon the iteration
+          break;
+        }
+        regions_visited = 0;
+      }
+    }
+
+    g1p->check_prediction_validity();
+  }
+}
+
+void ConcurrentG1RefineThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    if (G1RSBarrierUseQueue) {
+      queueBasedRefinement();
+    } else {
+      traversalBasedRefinement();
+    }
+    _sts.join();
+    _co_tracker.update();
+    _sts.leave();
+    if (os::supports_vtime()) {
+      _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    } else {
+      _vtime_accum = 0.0;
+    }
+  }
+  _sts.join();
+  _co_tracker.update(true);
+  _sts.leave();
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentG1RefineThread::yield() {
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield");
+  _sts.yield("G1 refine");
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end");
+}
+
+void ConcurrentG1RefineThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  {
+    MutexLockerEx mu(Terminator_lock);
+    _should_terminate = true;
+  }
+
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    DirtyCardQ_CBL_mon->notify_all();
+  }
+
+  {
+    MutexLockerEx mu(Terminator_lock);
+    while (!_has_terminated) {
+      Terminator_lock->wait();
+    }
+  }
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop");
+}
+
+void ConcurrentG1RefineThread::print() {
+  gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentG1RefineThread::set_do_traversal(bool b) {
+  _do_traversal = b;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward Decl.
+class ConcurrentG1Refine;
+
+// The G1 Concurrent Refinement Thread (could be several in the future).
+
+class ConcurrentG1RefineThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class G1CollectedHeap;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Initial virtual time.
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentG1Refine*              _cg1r;
+  bool                             _started;
+  bool                             _in_progress;
+  volatile bool                    _restart;
+
+  COTracker                        _co_tracker;
+  double                           _interval_ms;
+
+  bool                             _do_traversal;
+
+  void decreaseInterval(int processing_time_ms) {
+    double min_interval_ms = (double) processing_time_ms;
+    _interval_ms = 0.8 * _interval_ms;
+    if (_interval_ms < min_interval_ms)
+      _interval_ms = min_interval_ms;
+  }
+  void increaseInterval(int processing_time_ms) {
+    double max_interval_ms = 9.0 * (double) processing_time_ms;
+    _interval_ms = 1.1 * _interval_ms;
+    if (max_interval_ms > 0 && _interval_ms > max_interval_ms)
+      _interval_ms = max_interval_ms;
+  }
+
+  void sleepBeforeNextCycle();
+
+  void traversalBasedRefinement();
+
+  void queueBasedRefinement();
+
+  // For use by G1CollectedHeap, which is a friend.
+  static SuspendibleThreadSet* sts() { return &_sts; }
+
+ public:
+  // Constructor
+  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r);
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum() { return _vtime_accum; }
+
+  ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
+
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  void            set_do_traversal(bool b);
+  bool            do_traversal() { return _do_traversal; }
+
+  void            sample_young_list_rs_lengths();
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,3979 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentMark.cpp.incl"
+
+//
+// CMS Bit Map Wrapper
+
+CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
+  _bm((uintptr_t*)NULL,0),
+  _shifter(shifter) {
+  _bmStartWord = (HeapWord*)(rs.base());
+  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
+  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
+                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
+
+  guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
+  // For now we'll just commit all of the bit map up fromt.
+  // Later on we'll try to be more parsimonious with swap.
+  guarantee(_virtual_space.initialize(brs, brs.size()),
+            "couldn't reseve backing store for CMS bit map");
+  assert(_virtual_space.committed_size() == brs.size(),
+         "didn't reserve backing store for all of CMS bit map?");
+  _bm.set_map((uintptr_t*)_virtual_space.low());
+  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
+         _bmWordSize, "inconsistency in bit map sizing");
+  _bm.set_size(_bmWordSize >> _shifter);
+}
+
+HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
+                                               HeapWord* limit) const {
+  // First we must round addr *up* to a possible object boundary.
+  addr = (HeapWord*)align_size_up((intptr_t)addr,
+                                  HeapWordSize << _shifter);
+  size_t addrOffset = heapWordToOffset(addr);
+  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
+  size_t limitOffset = heapWordToOffset(limit);
+  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
+  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
+  assert(nextAddr >= addr, "get_next_one postcondition");
+  assert(nextAddr == limit || isMarked(nextAddr),
+         "get_next_one postcondition");
+  return nextAddr;
+}
+
+HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
+                                                 HeapWord* limit) const {
+  size_t addrOffset = heapWordToOffset(addr);
+  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
+  size_t limitOffset = heapWordToOffset(limit);
+  size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
+  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
+  assert(nextAddr >= addr, "get_next_one postcondition");
+  assert(nextAddr == limit || !isMarked(nextAddr),
+         "get_next_one postcondition");
+  return nextAddr;
+}
+
+int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
+  assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
+  return (int) (diff >> _shifter);
+}
+
+bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+  HeapWord* left  = MAX2(_bmStartWord, mr.start());
+  HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
+  if (right > left) {
+    // Right-open interval [leftOffset, rightOffset).
+    return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
+  } else {
+    return true;
+  }
+}
+
+void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
+                                             size_t    from_start_index,
+                                             HeapWord* to_start_word,
+                                             size_t    word_num) {
+  _bm.mostly_disjoint_range_union(from_bitmap,
+                                  from_start_index,
+                                  heapWordToOffset(to_start_word),
+                                  word_num);
+}
+
+#ifndef PRODUCT
+bool CMBitMapRO::covers(ReservedSpace rs) const {
+  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
+  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
+         "size inconsistency");
+  return _bmStartWord == (HeapWord*)(rs.base()) &&
+         _bmWordSize  == rs.size()>>LogHeapWordSize;
+}
+#endif
+
+void CMBitMap::clearAll() {
+  _bm.clear();
+  return;
+}
+
+void CMBitMap::markRange(MemRegion mr) {
+  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
+  assert(!mr.is_empty(), "unexpected empty region");
+  assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
+          ((HeapWord *) mr.end())),
+         "markRange memory region end is not card aligned");
+  // convert address range into offset range
+  _bm.at_put_range(heapWordToOffset(mr.start()),
+                   heapWordToOffset(mr.end()), true);
+}
+
+void CMBitMap::clearRange(MemRegion mr) {
+  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
+  assert(!mr.is_empty(), "unexpected empty region");
+  // convert address range into offset range
+  _bm.at_put_range(heapWordToOffset(mr.start()),
+                   heapWordToOffset(mr.end()), false);
+}
+
+MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
+                                            HeapWord* end_addr) {
+  HeapWord* start = getNextMarkedWordAddress(addr);
+  start = MIN2(start, end_addr);
+  HeapWord* end   = getNextUnmarkedWordAddress(start);
+  end = MIN2(end, end_addr);
+  assert(start <= end, "Consistency check");
+  MemRegion mr(start, end);
+  if (!mr.is_empty()) {
+    clearRange(mr);
+  }
+  return mr;
+}
+
+CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
+  _base(NULL), _cm(cm)
+#ifdef ASSERT
+  , _drain_in_progress(false)
+  , _drain_in_progress_yields(false)
+#endif
+{}
+
+void CMMarkStack::allocate(size_t size) {
+  _base = NEW_C_HEAP_ARRAY(oop, size);
+  if (_base == NULL)
+    vm_exit_during_initialization("Failed to allocate "
+                                  "CM region mark stack");
+  _index = 0;
+  // QQQQ cast ...
+  _capacity = (jint) size;
+  _oops_do_bound = -1;
+  NOT_PRODUCT(_max_depth = 0);
+}
+
+CMMarkStack::~CMMarkStack() {
+  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
+}
+
+void CMMarkStack::par_push(oop ptr) {
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index+1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      _base[index] = ptr;
+      // Note that we don't maintain this atomically.  We could, but it
+      // doesn't seem necessary.
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index + n;
+    if (next_index > _capacity) {
+      _overflow = true;
+      return;
+    }
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      for (int i = 0; i < n; i++) {
+        int ind = index + i;
+        assert(ind < _capacity, "By overflow test above.");
+        _base[ind] = ptr_arr[i];
+      }
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+
+void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint start = _index;
+  jint next_index = start + n;
+  if (next_index > _capacity) {
+    _overflow = true;
+    return;
+  }
+  // Otherwise.
+  _index = next_index;
+  for (int i = 0; i < n; i++) {
+    int ind = start + i;
+    guarantee(ind < _capacity, "By overflow test above.");
+    _base[ind] = ptr_arr[i];
+  }
+}
+
+
+bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint index = _index;
+  if (index == 0) {
+    *n = 0;
+    return false;
+  } else {
+    int k = MIN2(max, index);
+    jint new_ind = index - k;
+    for (int j = 0; j < k; j++) {
+      ptr_arr[j] = _base[new_ind + j];
+    }
+    _index = new_ind;
+    *n = k;
+    return true;
+  }
+}
+
+
+CMRegionStack::CMRegionStack() : _base(NULL) {}
+
+void CMRegionStack::allocate(size_t size) {
+  _base = NEW_C_HEAP_ARRAY(MemRegion, size);
+  if (_base == NULL)
+    vm_exit_during_initialization("Failed to allocate "
+                                  "CM region mark stack");
+  _index = 0;
+  // QQQQ cast ...
+  _capacity = (jint) size;
+}
+
+CMRegionStack::~CMRegionStack() {
+  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
+}
+
+void CMRegionStack::push(MemRegion mr) {
+  assert(mr.word_size() > 0, "Precondition");
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index+1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      _base[index] = mr;
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+MemRegion CMRegionStack::pop() {
+  while (true) {
+    // Otherwise...
+    jint index = _index;
+
+    if (index == 0) {
+      return MemRegion();
+    }
+    jint next_index = index-1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      MemRegion mr = _base[next_index];
+      if (mr.start() != NULL) {
+        tmp_guarantee_CM( mr.end() != NULL, "invariant" );
+        tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+        return mr;
+      } else {
+        // that entry was invalidated... let's skip it
+        tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+      }
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+bool CMRegionStack::invalidate_entries_into_cset() {
+  bool result = false;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  for (int i = 0; i < _oops_do_bound; ++i) {
+    MemRegion mr = _base[i];
+    if (mr.start() != NULL) {
+      tmp_guarantee_CM( mr.end() != NULL, "invariant");
+      tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+      HeapRegion* hr = g1h->heap_region_containing(mr.start());
+      tmp_guarantee_CM( hr != NULL, "invariant" );
+      if (hr->in_collection_set()) {
+        // The region points into the collection set
+        _base[i] = MemRegion();
+        result = true;
+      }
+    } else {
+      // that entry was invalidated... let's skip it
+      tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+    }
+  }
+  return result;
+}
+
+template<class OopClosureClass>
+bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
+  assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
+         || SafepointSynchronize::is_at_safepoint(),
+         "Drain recursion must be yield-safe.");
+  bool res = true;
+  debug_only(_drain_in_progress = true);
+  debug_only(_drain_in_progress_yields = yield_after);
+  while (!isEmpty()) {
+    oop newOop = pop();
+    assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
+    assert(newOop->is_oop(), "Expected an oop");
+    assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
+           "only grey objects on this stack");
+    // iterate over the oops in this oop, marking and pushing
+    // the ones in CMS generation.
+    newOop->oop_iterate(cl);
+    if (yield_after && _cm->do_yield_check()) {
+      res = false; break;
+    }
+  }
+  debug_only(_drain_in_progress = false);
+  return res;
+}
+
+void CMMarkStack::oops_do(OopClosure* f) {
+  if (_index == 0) return;
+  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
+         "Bound must be set.");
+  for (int i = 0; i < _oops_do_bound; i++) {
+    f->do_oop(&_base[i]);
+  }
+  _oops_do_bound = -1;
+}
+
+bool ConcurrentMark::not_yet_marked(oop obj) const {
+  return (_g1h->is_obj_ill(obj)
+          || (_g1h->is_in_permanent(obj)
+              && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
+}
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+ConcurrentMark::ConcurrentMark(ReservedSpace rs,
+                               int max_regions) :
+  _markBitMap1(rs, MinObjAlignment - 1),
+  _markBitMap2(rs, MinObjAlignment - 1),
+
+  _parallel_marking_threads(0),
+  _sleep_factor(0.0),
+  _marking_task_overhead(1.0),
+  _cleanup_sleep_factor(0.0),
+  _cleanup_task_overhead(1.0),
+  _region_bm(max_regions, false /* in_resource_area*/),
+  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
+           CardTableModRefBS::card_shift,
+           false /* in_resource_area*/),
+  _prevMarkBitMap(&_markBitMap1),
+  _nextMarkBitMap(&_markBitMap2),
+  _at_least_one_mark_complete(false),
+
+  _markStack(this),
+  _regionStack(),
+  // _finger set in set_non_marking_state
+
+  _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
+  // _active_tasks set in set_non_marking_state
+  // _tasks set inside the constructor
+  _task_queues(new CMTaskQueueSet((int) _max_task_num)),
+  _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
+
+  _has_overflown(false),
+  _concurrent(false),
+
+  // _verbose_level set below
+
+  _init_times(),
+  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
+  _cleanup_times(),
+  _total_counting_time(0.0),
+  _total_rs_scrub_time(0.0),
+
+  _parallel_workers(NULL),
+  _cleanup_co_tracker(G1CLGroup)
+{
+  CMVerboseLevel verbose_level =
+    (CMVerboseLevel) G1MarkingVerboseLevel;
+  if (verbose_level < no_verbose)
+    verbose_level = no_verbose;
+  if (verbose_level > high_verbose)
+    verbose_level = high_verbose;
+  _verbose_level = verbose_level;
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
+                           "heap end = "PTR_FORMAT, _heap_start, _heap_end);
+
+  _markStack.allocate(G1CMStackSize);
+  _regionStack.allocate(G1CMRegionStackSize);
+
+  // Create & start a ConcurrentMark thread.
+  if (G1ConcMark) {
+    _cmThread = new ConcurrentMarkThread(this);
+    assert(cmThread() != NULL, "CM Thread should have been created");
+    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
+  } else {
+    _cmThread = NULL;
+  }
+  _g1h = G1CollectedHeap::heap();
+  assert(CGC_lock != NULL, "Where's the CGC_lock?");
+  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
+  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+  satb_qs.set_buffer_size(G1SATBLogBufferSize);
+
+  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
+  _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
+  for (int i = 0 ; i < size; i++) {
+    _par_cleanup_thread_state[i] = new ParCleanupThreadState;
+  }
+
+  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
+
+  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
+  _active_tasks = _max_task_num;
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    CMTaskQueue* task_queue = new CMTaskQueue();
+    task_queue->initialize();
+    _task_queues->register_queue(i, task_queue);
+
+    _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
+    _accum_task_vtime[i] = 0.0;
+  }
+
+  if (ParallelMarkingThreads > ParallelGCThreads) {
+    vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
+                                  "than ParallelGCThreads.");
+  }
+  if (ParallelGCThreads == 0) {
+    // if we are not running with any parallel GC threads we will not
+    // spawn any marking threads either
+    _parallel_marking_threads =   0;
+    _sleep_factor             = 0.0;
+    _marking_task_overhead    = 1.0;
+  } else {
+    if (ParallelMarkingThreads > 0) {
+      // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
+      // if both are set
+
+      _parallel_marking_threads = ParallelMarkingThreads;
+      _sleep_factor             = 0.0;
+      _marking_task_overhead    = 1.0;
+    } else if (G1MarkingOverheadPerc > 0) {
+      // we will calculate the number of parallel marking threads
+      // based on a target overhead with respect to the soft real-time
+      // goal
+
+      double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
+      double overall_cm_overhead =
+        (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
+      double cpu_ratio = 1.0 / (double) os::processor_count();
+      double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
+      double marking_task_overhead =
+        overall_cm_overhead / marking_thread_num *
+                                                (double) os::processor_count();
+      double sleep_factor =
+                         (1.0 - marking_task_overhead) / marking_task_overhead;
+
+      _parallel_marking_threads = (size_t) marking_thread_num;
+      _sleep_factor             = sleep_factor;
+      _marking_task_overhead    = marking_task_overhead;
+    } else {
+      _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
+      _sleep_factor             = 0.0;
+      _marking_task_overhead    = 1.0;
+    }
+
+    if (parallel_marking_threads() > 1)
+      _cleanup_task_overhead = 1.0;
+    else
+      _cleanup_task_overhead = marking_task_overhead();
+    _cleanup_sleep_factor =
+                     (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
+
+#if 0
+    gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
+    gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
+    gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
+    gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
+    gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
+#endif
+
+    guarantee( parallel_marking_threads() > 0, "peace of mind" );
+    _parallel_workers = new WorkGang("Parallel Marking Threads",
+                                     (int) parallel_marking_threads(), false, true);
+    if (_parallel_workers == NULL)
+      vm_exit_during_initialization("Failed necessary allocation.");
+  }
+
+  // so that the call below can read a sensible value
+  _heap_start = (HeapWord*) rs.base();
+  set_non_marking_state();
+}
+
+void ConcurrentMark::update_g1_committed(bool force) {
+  // If concurrent marking is not in progress, then we do not need to
+  // update _heap_end. This has a subtle and important
+  // side-effect. Imagine that two evacuation pauses happen between
+  // marking completion and remark. The first one can grow the
+  // heap (hence now the finger is below the heap end). Then, the
+  // second one could unnecessarily push regions on the region
+  // stack. This causes the invariant that the region stack is empty
+  // at the beginning of remark to be false. By ensuring that we do
+  // not observe heap expansions after marking is complete, then we do
+  // not have this problem.
+  if (!concurrent_marking_in_progress() && !force)
+    return;
+
+  MemRegion committed = _g1h->g1_committed();
+  tmp_guarantee_CM( committed.start() == _heap_start,
+                    "start shouldn't change" );
+  HeapWord* new_end = committed.end();
+  if (new_end > _heap_end) {
+    // The heap has been expanded.
+
+    _heap_end = new_end;
+  }
+  // Notice that the heap can also shrink. However, this only happens
+  // during a Full GC (at least currently) and the entire marking
+  // phase will bail out and the task will not be restarted. So, let's
+  // do nothing.
+}
+
+void ConcurrentMark::reset() {
+  // Starting values for these two. This should be called in a STW
+  // phase. CM will be notified of any future g1_committed expansions
+  // will be at the end of evacuation pauses, when tasks are
+  // inactive.
+  MemRegion committed = _g1h->g1_committed();
+  _heap_start = committed.start();
+  _heap_end   = committed.end();
+
+  guarantee( _heap_start != NULL &&
+             _heap_end != NULL   &&
+             _heap_start < _heap_end, "heap bounds should look ok" );
+
+  // reset all the marking data structures and any necessary flags
+  clear_marking_state();
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] resetting");
+
+  // We do reset all of them, since different phases will use
+  // different number of active threads. So, it's easiest to have all
+  // of them ready.
+  for (int i = 0; i < (int) _max_task_num; ++i)
+    _tasks[i]->reset(_nextMarkBitMap);
+
+  // we need this to make sure that the flag is on during the evac
+  // pause with initial mark piggy-backed
+  set_concurrent_marking_in_progress();
+}
+
+void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
+  guarantee( active_tasks <= _max_task_num, "we should not have more" );
+
+  _active_tasks = active_tasks;
+  // Need to update the three data structures below according to the
+  // number of active threads for this phase.
+  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
+  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
+  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
+
+  _concurrent = concurrent;
+  // We propagate this to all tasks, not just the active ones.
+  for (int i = 0; i < (int) _max_task_num; ++i)
+    _tasks[i]->set_concurrent(concurrent);
+
+  if (concurrent) {
+    set_concurrent_marking_in_progress();
+  } else {
+    // We currently assume that the concurrent flag has been set to
+    // false before we start remark. At this point we should also be
+    // in a STW phase.
+    guarantee( !concurrent_marking_in_progress(), "invariant" );
+    guarantee( _finger == _heap_end, "only way to get here" );
+    update_g1_committed(true);
+  }
+}
+
+void ConcurrentMark::set_non_marking_state() {
+  // We set the global marking state to some default values when we're
+  // not doing marking.
+  clear_marking_state();
+  _active_tasks = 0;
+  clear_concurrent_marking_in_progress();
+}
+
+ConcurrentMark::~ConcurrentMark() {
+  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
+  for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
+  FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
+                    _par_cleanup_thread_state);
+
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    delete _task_queues->queue(i);
+    delete _tasks[i];
+  }
+  delete _task_queues;
+  FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
+}
+
+// This closure is used to mark refs into the g1 generation
+// from external roots in the CMS bit map.
+// Called at the first checkpoint.
+//
+
+#define PRINT_REACHABLE_AT_INITIAL_MARK 0
+#if PRINT_REACHABLE_AT_INITIAL_MARK
+static FILE* reachable_file = NULL;
+
+class PrintReachableClosure: public OopsInGenClosure {
+  CMBitMap* _bm;
+  int _level;
+public:
+  PrintReachableClosure(CMBitMap* bm) :
+    _bm(bm), _level(0) {
+    guarantee(reachable_file != NULL, "pre-condition");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    HeapWord* obj_addr = (HeapWord*)obj;
+    if (obj == NULL) return;
+    fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
+            _level, p, (void*) obj, _bm->isMarked(obj_addr));
+    if (!_bm->isMarked(obj_addr)) {
+      _bm->mark(obj_addr);
+      _level++;
+      obj->oop_iterate(this);
+      _level--;
+    }
+  }
+};
+#endif // PRINT_REACHABLE_AT_INITIAL_MARK
+
+#define SEND_HEAP_DUMP_TO_FILE 0
+#if SEND_HEAP_DUMP_TO_FILE
+static FILE* heap_dump_file = NULL;
+#endif // SEND_HEAP_DUMP_TO_FILE
+
+void ConcurrentMark::clearNextBitmap() {
+   guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
+
+   // clear the mark bitmap (no grey objects to start with).
+   // We need to do this in chunks and offer to yield in between
+   // each chunk.
+   HeapWord* start  = _nextMarkBitMap->startWord();
+   HeapWord* end    = _nextMarkBitMap->endWord();
+   HeapWord* cur    = start;
+   size_t chunkSize = M;
+   while (cur < end) {
+     HeapWord* next = cur + chunkSize;
+     if (next > end)
+       next = end;
+     MemRegion mr(cur,next);
+     _nextMarkBitMap->clearRange(mr);
+     cur = next;
+     do_yield_check();
+   }
+}
+
+class NoteStartOfMarkHRClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      r->note_start_of_marking(true);
+    }
+    return false;
+  }
+};
+
+void ConcurrentMark::checkpointRootsInitialPre() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+
+  _has_aborted = false;
+
+  // Find all the reachable objects...
+#if PRINT_REACHABLE_AT_INITIAL_MARK
+  guarantee(reachable_file == NULL, "Protocol");
+  char fn_buf[100];
+  sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
+  reachable_file = fopen(fn_buf, "w");
+  // clear the mark bitmap (no grey objects to start with)
+  _nextMarkBitMap->clearAll();
+  PrintReachableClosure prcl(_nextMarkBitMap);
+  g1h->process_strong_roots(
+                            false,   // fake perm gen collection
+                            SharedHeap::SO_AllClasses,
+                            &prcl, // Regular roots
+                            &prcl    // Perm Gen Roots
+                            );
+  // The root iteration above "consumed" dirty cards in the perm gen.
+  // Therefore, as a shortcut, we dirty all such cards.
+  g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
+  fclose(reachable_file);
+  reachable_file = NULL;
+  // clear the mark bitmap again.
+  _nextMarkBitMap->clearAll();
+  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+  COMPILER2_PRESENT(DerivedPointerTable::clear());
+#endif // PRINT_REACHABLE_AT_INITIAL_MARK
+
+  // Initialise marking structures. This has to be done in a STW phase.
+  reset();
+}
+
+class CMMarkRootsClosure: public OopsInGenClosure {
+private:
+  ConcurrentMark*  _cm;
+  G1CollectedHeap* _g1h;
+  bool             _do_barrier;
+
+public:
+  CMMarkRootsClosure(ConcurrentMark* cm,
+                     G1CollectedHeap* g1h,
+                     bool do_barrier) : _cm(cm), _g1h(g1h),
+                                        _do_barrier(do_barrier) { }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop* p) {
+    oop thisOop = *p;
+    if (thisOop != NULL) {
+      assert(thisOop->is_oop() || thisOop->mark() == NULL,
+             "expected an oop, possibly with mark word displaced");
+      HeapWord* addr = (HeapWord*)thisOop;
+      if (_g1h->is_in_g1_reserved(addr)) {
+        _cm->grayRoot(thisOop);
+      }
+    }
+    if (_do_barrier) {
+      assert(!_g1h->is_in_g1_reserved(p),
+             "Should be called on external roots");
+      do_barrier(p);
+    }
+  }
+};
+
+void ConcurrentMark::checkpointRootsInitialPost() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
+
+  // Start weak-reference discovery.
+  ReferenceProcessor* rp = g1h->ref_processor();
+  rp->verify_no_references_recorded();
+  rp->enable_discovery(); // enable ("weak") refs discovery
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
+  satb_mq_set.set_active_all_threads(true);
+
+  // update_g1_committed() will be called at the end of an evac pause
+  // when marking is on. So, it's also called at the end of the
+  // initial-mark pause to update the heap end, if the heap expands
+  // during it. No need to call it here.
+
+  guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
+
+  size_t max_marking_threads =
+    MAX2((size_t) 1, parallel_marking_threads());
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    _tasks[i]->enable_co_tracker();
+    if (i < (int) max_marking_threads)
+      _tasks[i]->reset_co_tracker(marking_task_overhead());
+    else
+      _tasks[i]->reset_co_tracker(0.0);
+  }
+}
+
+// Checkpoint the roots into this generation from outside
+// this generation. [Note this initial checkpoint need only
+// be approximate -- we'll do a catch up phase subsequently.]
+void ConcurrentMark::checkpointRootsInitial() {
+  assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  // If there has not been a GC[n-1] since last GC[n] cycle completed,
+  // precede our marking with a collection of all
+  // younger generations to keep floating garbage to a minimum.
+  // YSR: we won't do this for now -- it's an optimization to be
+  // done post-beta.
+
+  // YSR:    ignoring weak refs for now; will do at bug fixing stage
+  // EVM:    assert(discoveredRefsAreClear());
+
+
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->record_concurrent_mark_init_start();
+  checkpointRootsInitialPre();
+
+  // YSR: when concurrent precleaning is in place, we'll
+  // need to clear the cached card table here
+
+  ResourceMark rm;
+  HandleMark  hm;
+
+  g1h->ensure_parsability(false);
+  g1h->perm_gen()->save_marks();
+
+  CMMarkRootsClosure notOlder(this, g1h, false);
+  CMMarkRootsClosure older(this, g1h, true);
+
+  g1h->set_marking_started();
+  g1h->rem_set()->prepare_for_younger_refs_iterate(false);
+
+  g1h->process_strong_roots(false,   // fake perm gen collection
+                            SharedHeap::SO_AllClasses,
+                            &notOlder, // Regular roots
+                            &older    // Perm Gen Roots
+                            );
+  checkpointRootsInitialPost();
+
+  // Statistics.
+  double end = os::elapsedTime();
+  _init_times.add((end - start) * 1000.0);
+  GCOverheadReporter::recordSTWEnd(end);
+
+  g1p->record_concurrent_mark_init_end();
+}
+
+/*
+   Notice that in the next two methods, we actually leave the STS
+   during the barrier sync and join it immediately afterwards. If we
+   do not do this, this then the following deadlock can occur: one
+   thread could be in the barrier sync code, waiting for the other
+   thread to also sync up, whereas another one could be trying to
+   yield, while also waiting for the other threads to sync up too.
+
+   Because the thread that does the sync barrier has left the STS, it
+   is possible to be suspended for a Full GC or an evacuation pause
+   could occur. This is actually safe, since the entering the sync
+   barrier is one of the last things do_marking_step() does, and it
+   doesn't manipulate any data structures afterwards.
+*/
+
+void ConcurrentMark::enter_first_sync_barrier(int task_num) {
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
+
+  ConcurrentGCThread::stsLeave();
+  _first_overflow_barrier_sync.enter();
+  ConcurrentGCThread::stsJoin();
+  // at this point everyone should have synced up and not be doing any
+  // more work
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
+
+  // let task 0 do this
+  if (task_num == 0) {
+    // task 0 is responsible for clearing the global data structures
+    clear_marking_state();
+
+    if (PrintGC) {
+      gclog_or_tty->date_stamp(PrintGCDateStamps);
+      gclog_or_tty->stamp(PrintGCTimeStamps);
+      gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
+    }
+  }
+
+  // after this, each task should reset its own data structures then
+  // then go into the second barrier
+}
+
+void ConcurrentMark::enter_second_sync_barrier(int task_num) {
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
+
+  ConcurrentGCThread::stsLeave();
+  _second_overflow_barrier_sync.enter();
+  ConcurrentGCThread::stsJoin();
+  // at this point everything should be re-initialised and ready to go
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
+}
+
+void ConcurrentMark::grayRoot(oop p) {
+  HeapWord* addr = (HeapWord*) p;
+  // We can't really check against _heap_start and _heap_end, since it
+  // is possible during an evacuation pause with piggy-backed
+  // initial-mark that the committed space is expanded during the
+  // pause without CM observing this change. So the assertions below
+  // is a bit conservative; but better than nothing.
+  tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
+                    "address should be within the heap bounds" );
+
+  if (!_nextMarkBitMap->isMarked(addr))
+    _nextMarkBitMap->parMark(addr);
+}
+
+void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  // The objects on the region have already been marked "in bulk" by
+  // the caller. We only need to decide whether to push the region on
+  // the region stack or not.
+
+  if (!concurrent_marking_in_progress() || !_should_gray_objects)
+    // We're done with marking and waiting for remark. We do not need to
+    // push anything else on the region stack.
+    return;
+
+  HeapWord* finger = _finger;
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] attempting to push "
+                           "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
+                           PTR_FORMAT, mr.start(), mr.end(), finger);
+
+  if (mr.start() < finger) {
+    // The finger is always heap region aligned and it is not possible
+    // for mr to span heap regions.
+    tmp_guarantee_CM( mr.end() <= finger, "invariant" );
+
+    tmp_guarantee_CM( mr.start() <= mr.end() &&
+                      _heap_start <= mr.start() &&
+                      mr.end() <= _heap_end,
+                  "region boundaries should fall within the committed space" );
+    if (verbose_low())
+      gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
+                             "below the finger, pushing it",
+                             mr.start(), mr.end());
+
+    if (!region_stack_push(mr)) {
+      if (verbose_low())
+        gclog_or_tty->print_cr("[global] region stack has overflown.");
+    }
+  }
+}
+
+void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  // The object is not marked by the caller. We need to at least mark
+  // it and maybe push in on the stack.
+
+  HeapWord* addr = (HeapWord*)p;
+  if (!_nextMarkBitMap->isMarked(addr)) {
+    // We definitely need to mark it, irrespective whether we bail out
+    // because we're done with marking.
+    if (_nextMarkBitMap->parMark(addr)) {
+      if (!concurrent_marking_in_progress() || !_should_gray_objects)
+        // If we're done with concurrent marking and we're waiting for
+        // remark, then we're not pushing anything on the stack.
+        return;
+
+      // No OrderAccess:store_load() is needed. It is implicit in the
+      // CAS done in parMark(addr) above
+      HeapWord* finger = _finger;
+
+      if (addr < finger) {
+        if (!mark_stack_push(oop(addr))) {
+          if (verbose_low())
+            gclog_or_tty->print_cr("[global] global stack overflow "
+                                   "during parMark");
+        }
+      }
+    }
+  }
+}
+
+class CMConcurrentMarkingTask: public AbstractGangTask {
+private:
+  ConcurrentMark*       _cm;
+  ConcurrentMarkThread* _cmt;
+
+public:
+  void work(int worker_i) {
+    guarantee( Thread::current()->is_ConcurrentGC_thread(),
+               "this should only be done by a conc GC thread" );
+
+    double start_vtime = os::elapsedVTime();
+
+    ConcurrentGCThread::stsJoin();
+
+    guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
+    CMTask* the_task = _cm->task(worker_i);
+    the_task->start_co_tracker();
+    the_task->record_start_time();
+    if (!_cm->has_aborted()) {
+      do {
+        double start_vtime_sec = os::elapsedVTime();
+        double start_time_sec = os::elapsedTime();
+        the_task->do_marking_step(10.0);
+        double end_time_sec = os::elapsedTime();
+        double end_vtime_sec = os::elapsedVTime();
+        double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        double elapsed_time_sec = end_time_sec - start_time_sec;
+        _cm->clear_has_overflown();
+
+        bool ret = _cm->do_yield_check(worker_i);
+
+        jlong sleep_time_ms;
+        if (!_cm->has_aborted() && the_task->has_aborted()) {
+          sleep_time_ms =
+            (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
+          ConcurrentGCThread::stsLeave();
+          os::sleep(Thread::current(), sleep_time_ms, false);
+          ConcurrentGCThread::stsJoin();
+        }
+        double end_time2_sec = os::elapsedTime();
+        double elapsed_time2_sec = end_time2_sec - start_time_sec;
+
+        the_task->update_co_tracker();
+
+#if 0
+          gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
+                                 "overhead %1.4lf",
+                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
+                                 the_task->conc_overhead(os::elapsedTime()) * 8.0);
+          gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
+                                 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
+#endif
+      } while (!_cm->has_aborted() && the_task->has_aborted());
+    }
+    the_task->record_end_time();
+    guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
+
+    ConcurrentGCThread::stsLeave();
+
+    double end_vtime = os::elapsedVTime();
+    the_task->update_co_tracker(true);
+    _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
+  }
+
+  CMConcurrentMarkingTask(ConcurrentMark* cm,
+                          ConcurrentMarkThread* cmt) :
+      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
+
+  ~CMConcurrentMarkingTask() { }
+};
+
+void ConcurrentMark::markFromRoots() {
+  // we might be tempted to assert that:
+  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
+  //        "inconsistent argument?");
+  // However that wouldn't be right, because it's possible that
+  // a safepoint is indeed in progress as a younger generation
+  // stop-the-world GC happens even as we mark in this generation.
+
+  _restart_for_overflow = false;
+
+  set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
+
+  CMConcurrentMarkingTask markingTask(this, cmThread());
+  if (parallel_marking_threads() > 0)
+    _parallel_workers->run_task(&markingTask);
+  else
+    markingTask.work(0);
+  print_stats();
+}
+
+void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->set_marking_complete(); // So bitmap clearing isn't confused
+    return;
+  }
+
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  g1p->record_concurrent_mark_remark_start();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  checkpointRootsFinalWork();
+
+  double mark_work_end = os::elapsedTime();
+
+  weakRefsWork(clear_all_soft_refs);
+
+  if (has_overflown()) {
+    // Oops.  We overflowed.  Restart concurrent marking.
+    _restart_for_overflow = true;
+    // Clear the flag. We do not need it any more.
+    clear_has_overflown();
+    if (G1TraceMarkStackOverflow)
+      gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
+  } else {
+    // We're done with marking.
+    JavaThread::satb_mark_queue_set().set_active_all_threads(false);
+  }
+
+#if VERIFY_OBJS_PROCESSED
+  _scan_obj_cl.objs_processed = 0;
+  ThreadLocalObjQueue::objs_enqueued = 0;
+#endif
+
+  // Statistics
+  double now = os::elapsedTime();
+  _remark_mark_times.add((mark_work_end - start) * 1000.0);
+  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
+  _remark_times.add((now - start) * 1000.0);
+
+  GCOverheadReporter::recordSTWEnd(now);
+  for (int i = 0; i < (int)_max_task_num; ++i)
+    _tasks[i]->disable_co_tracker();
+  _cleanup_co_tracker.enable();
+  _cleanup_co_tracker.reset(cleanup_task_overhead());
+  g1p->record_concurrent_mark_remark_end();
+}
+
+
+#define CARD_BM_TEST_MODE 0
+
+class CalcLiveObjectsClosure: public HeapRegionClosure {
+
+  CMBitMapRO* _bm;
+  ConcurrentMark* _cm;
+  COTracker* _co_tracker;
+  bool _changed;
+  bool _yield;
+  size_t _words_done;
+  size_t _tot_live;
+  size_t _tot_used;
+  size_t _regions_done;
+  double _start_vtime_sec;
+
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+  intptr_t _bottom_card_num;
+  bool _final;
+
+  void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
+    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
+#if CARD_BM_TEST_MODE
+      guarantee(_card_bm->at(i - _bottom_card_num),
+                "Should already be set.");
+#else
+      _card_bm->par_at_put(i - _bottom_card_num, 1);
+#endif
+    }
+  }
+
+public:
+  CalcLiveObjectsClosure(bool final,
+                         CMBitMapRO *bm, ConcurrentMark *cm,
+                         BitMap* region_bm, BitMap* card_bm,
+                         COTracker* co_tracker) :
+    _bm(bm), _cm(cm), _changed(false), _yield(true),
+    _words_done(0), _tot_live(0), _tot_used(0),
+    _region_bm(region_bm), _card_bm(card_bm),
+    _final(final), _co_tracker(co_tracker),
+    _regions_done(0), _start_vtime_sec(0.0)
+  {
+    _bottom_card_num =
+      intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
+               CardTableModRefBS::card_shift);
+  }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (_co_tracker != NULL)
+      _co_tracker->update();
+
+    if (!_final && _regions_done == 0)
+      _start_vtime_sec = os::elapsedVTime();
+
+    if (hr->continuesHumongous()) return false;
+
+    HeapWord* nextTop = hr->next_top_at_mark_start();
+    HeapWord* start   = hr->top_at_conc_mark_count();
+    assert(hr->bottom() <= start && start <= hr->end() &&
+           hr->bottom() <= nextTop && nextTop <= hr->end() &&
+           start <= nextTop,
+           "Preconditions.");
+    // Otherwise, record the number of word's we'll examine.
+    size_t words_done = (nextTop - start);
+    // Find the first marked object at or after "start".
+    start = _bm->getNextMarkedWordAddress(start, nextTop);
+    size_t marked_bytes = 0;
+
+    // Below, the term "card num" means the result of shifting an address
+    // by the card shift -- address 0 corresponds to card number 0.  One
+    // must subtract the card num of the bottom of the heap to obtain a
+    // card table index.
+    // The first card num of the sequence of live cards currently being
+    // constructed.  -1 ==> no sequence.
+    intptr_t start_card_num = -1;
+    // The last card num of the sequence of live cards currently being
+    // constructed.  -1 ==> no sequence.
+    intptr_t last_card_num = -1;
+
+    while (start < nextTop) {
+      if (_yield && _cm->do_yield_check()) {
+        // We yielded.  It might be for a full collection, in which case
+        // all bets are off; terminate the traversal.
+        if (_cm->has_aborted()) {
+          _changed = false;
+          return true;
+        } else {
+          // Otherwise, it might be a collection pause, and the region
+          // we're looking at might be in the collection set.  We'll
+          // abandon this region.
+          return false;
+        }
+      }
+      oop obj = oop(start);
+      int obj_sz = obj->size();
+      // The card num of the start of the current object.
+      intptr_t obj_card_num =
+        intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
+
+      HeapWord* obj_last = start + obj_sz - 1;
+      intptr_t obj_last_card_num =
+        intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
+
+      if (obj_card_num != last_card_num) {
+        if (start_card_num == -1) {
+          assert(last_card_num == -1, "Both or neither.");
+          start_card_num = obj_card_num;
+        } else {
+          assert(last_card_num != -1, "Both or neither.");
+          assert(obj_card_num >= last_card_num, "Inv");
+          if ((obj_card_num - last_card_num) > 1) {
+            // Mark the last run, and start a new one.
+            mark_card_num_range(start_card_num, last_card_num);
+            start_card_num = obj_card_num;
+          }
+        }
+#if CARD_BM_TEST_MODE
+        /*
+        gclog_or_tty->print_cr("Setting bits from %d/%d.",
+                               obj_card_num - _bottom_card_num,
+                               obj_last_card_num - _bottom_card_num);
+        */
+        for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
+          _card_bm->par_at_put(j - _bottom_card_num, 1);
+        }
+#endif
+      }
+      // In any case, we set the last card num.
+      last_card_num = obj_last_card_num;
+
+      marked_bytes += obj_sz * HeapWordSize;
+      // Find the next marked object after this one.
+      start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
+      _changed = true;
+    }
+    // Handle the last range, if any.
+    if (start_card_num != -1)
+      mark_card_num_range(start_card_num, last_card_num);
+    if (_final) {
+      // Mark the allocated-since-marking portion...
+      HeapWord* tp = hr->top();
+      if (nextTop < tp) {
+        start_card_num =
+          intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
+        last_card_num =
+          intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
+        mark_card_num_range(start_card_num, last_card_num);
+        // This definitely means the region has live objects.
+        _region_bm->par_at_put(hr->hrs_index(), 1);
+      }
+    }
+
+    hr->add_to_marked_bytes(marked_bytes);
+    // Update the live region bitmap.
+    if (marked_bytes > 0) {
+      _region_bm->par_at_put(hr->hrs_index(), 1);
+    }
+    hr->set_top_at_conc_mark_count(nextTop);
+    _tot_live += hr->next_live_bytes();
+    _tot_used += hr->used();
+    _words_done = words_done;
+
+    if (!_final) {
+      ++_regions_done;
+      if (_regions_done % 10 == 0) {
+        double end_vtime_sec = os::elapsedVTime();
+        double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
+        if (elapsed_vtime_sec > (10.0 / 1000.0)) {
+          jlong sleep_time_ms =
+            (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
+#if 0
+          gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
+                                 "overhead %1.4lf",
+                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
+                                 _co_tracker->concOverhead(os::elapsedTime()));
+#endif
+          os::sleep(Thread::current(), sleep_time_ms, false);
+          _start_vtime_sec = end_vtime_sec;
+        }
+      }
+    }
+
+    return false;
+  }
+
+  bool changed() { return _changed;  }
+  void reset()   { _changed = false; _words_done = 0; }
+  void no_yield() { _yield = false; }
+  size_t words_done() { return _words_done; }
+  size_t tot_live() { return _tot_live; }
+  size_t tot_used() { return _tot_used; }
+};
+
+
+void ConcurrentMark::calcDesiredRegions() {
+  guarantee( _cleanup_co_tracker.enabled(), "invariant" );
+  _cleanup_co_tracker.start();
+
+  _region_bm.clear();
+  _card_bm.clear();
+  CalcLiveObjectsClosure calccl(false /*final*/,
+                                nextMarkBitMap(), this,
+                                &_region_bm, &_card_bm,
+                                &_cleanup_co_tracker);
+  G1CollectedHeap *g1h = G1CollectedHeap::heap();
+  g1h->heap_region_iterate(&calccl);
+
+  do {
+    calccl.reset();
+    g1h->heap_region_iterate(&calccl);
+  } while (calccl.changed());
+
+  _cleanup_co_tracker.update(true);
+}
+
+class G1ParFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  CMBitMap* _bm;
+  size_t _n_workers;
+  size_t *_live_bytes;
+  size_t *_used_bytes;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+public:
+  G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
+                      BitMap* region_bm, BitMap* card_bm) :
+    AbstractGangTask("G1 final counting"), _g1h(g1h),
+    _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
+  {
+    if (ParallelGCThreads > 0)
+      _n_workers = _g1h->workers()->total_workers();
+    else
+      _n_workers = 1;
+    _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
+    _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
+  }
+
+  ~G1ParFinalCountTask() {
+    FREE_C_HEAP_ARRAY(size_t, _live_bytes);
+    FREE_C_HEAP_ARRAY(size_t, _used_bytes);
+  }
+
+  void work(int i) {
+    CalcLiveObjectsClosure calccl(true /*final*/,
+                                  _bm, _g1h->concurrent_mark(),
+                                  _region_bm, _card_bm,
+                                  NULL /* CO tracker */);
+    calccl.no_yield();
+    if (ParallelGCThreads > 0) {
+      _g1h->heap_region_par_iterate_chunked(&calccl, i,
+                                            HeapRegion::FinalCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&calccl);
+    }
+    assert(calccl.complete(), "Shouldn't have yielded!");
+
+    guarantee( (size_t)i < _n_workers, "invariant" );
+    _live_bytes[i] = calccl.tot_live();
+    _used_bytes[i] = calccl.tot_used();
+  }
+  size_t live_bytes()  {
+    size_t live_bytes = 0;
+    for (size_t i = 0; i < _n_workers; ++i)
+      live_bytes += _live_bytes[i];
+    return live_bytes;
+  }
+  size_t used_bytes()  {
+    size_t used_bytes = 0;
+    for (size_t i = 0; i < _n_workers; ++i)
+      used_bytes += _used_bytes[i];
+    return used_bytes;
+  }
+};
+
+class G1ParNoteEndTask;
+
+class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _worker_num;
+  size_t _max_live_bytes;
+  size_t _regions_claimed;
+  size_t _freed_bytes;
+  size_t _cleared_h_regions;
+  size_t _freed_regions;
+  UncleanRegionList* _unclean_region_list;
+  double _claimed_region_time;
+  double _max_region_time;
+
+public:
+  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
+                             UncleanRegionList* list,
+                             int worker_num);
+  size_t freed_bytes() { return _freed_bytes; }
+  size_t cleared_h_regions() { return _cleared_h_regions; }
+  size_t freed_regions() { return  _freed_regions; }
+  UncleanRegionList* unclean_region_list() {
+    return _unclean_region_list;
+  }
+
+  bool doHeapRegion(HeapRegion *r);
+
+  size_t max_live_bytes() { return _max_live_bytes; }
+  size_t regions_claimed() { return _regions_claimed; }
+  double claimed_region_time_sec() { return _claimed_region_time; }
+  double max_region_time_sec() { return _max_region_time; }
+};
+
+class G1ParNoteEndTask: public AbstractGangTask {
+  friend class G1NoteEndOfConcMarkClosure;
+protected:
+  G1CollectedHeap* _g1h;
+  size_t _max_live_bytes;
+  size_t _freed_bytes;
+  ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
+public:
+  G1ParNoteEndTask(G1CollectedHeap* g1h,
+                   ConcurrentMark::ParCleanupThreadState**
+                   par_cleanup_thread_state) :
+    AbstractGangTask("G1 note end"), _g1h(g1h),
+    _max_live_bytes(0), _freed_bytes(0),
+    _par_cleanup_thread_state(par_cleanup_thread_state)
+  {}
+
+  void work(int i) {
+    double start = os::elapsedTime();
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
+                                           &_par_cleanup_thread_state[i]->list,
+                                           i);
+    if (ParallelGCThreads > 0) {
+      _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
+                                            HeapRegion::NoteEndClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&g1_note_end);
+    }
+    assert(g1_note_end.complete(), "Shouldn't have yielded!");
+
+    // Now finish up freeing the current thread's regions.
+    _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
+                                  g1_note_end.cleared_h_regions(),
+                                  0, NULL);
+    {
+      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      _max_live_bytes += g1_note_end.max_live_bytes();
+      _freed_bytes += g1_note_end.freed_bytes();
+    }
+    double end = os::elapsedTime();
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
+                          "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
+                          i, start, end, (end-start)*1000.0,
+                          g1_note_end.regions_claimed(),
+                          g1_note_end.claimed_region_time_sec()*1000.0,
+                          g1_note_end.max_region_time_sec()*1000.0);
+    }
+  }
+  size_t max_live_bytes() { return _max_live_bytes; }
+  size_t freed_bytes() { return _freed_bytes; }
+};
+
+class G1ParScrubRemSetTask: public AbstractGangTask {
+protected:
+  G1RemSet* _g1rs;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+public:
+  G1ParScrubRemSetTask(G1CollectedHeap* g1h,
+                       BitMap* region_bm, BitMap* card_bm) :
+    AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
+    _region_bm(region_bm), _card_bm(card_bm)
+  {}
+
+  void work(int i) {
+    if (ParallelGCThreads > 0) {
+      _g1rs->scrub_par(_region_bm, _card_bm, i,
+                       HeapRegion::ScrubRemSetClaimValue);
+    } else {
+      _g1rs->scrub(_region_bm, _card_bm);
+    }
+  }
+
+};
+
+G1NoteEndOfConcMarkClosure::
+G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
+                           UncleanRegionList* list,
+                           int worker_num)
+  : _g1(g1), _worker_num(worker_num),
+    _max_live_bytes(0), _regions_claimed(0),
+    _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
+    _claimed_region_time(0.0), _max_region_time(0.0),
+    _unclean_region_list(list)
+{}
+
+bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
+  // We use a claim value of zero here because all regions
+  // were claimed with value 1 in the FinalCount task.
+  r->reset_gc_time_stamp();
+  if (!r->continuesHumongous()) {
+    double start = os::elapsedTime();
+    _regions_claimed++;
+    r->note_end_of_marking();
+    _max_live_bytes += r->max_live_bytes();
+    _g1->free_region_if_totally_empty_work(r,
+                                           _freed_bytes,
+                                           _cleared_h_regions,
+                                           _freed_regions,
+                                           _unclean_region_list,
+                                           true /*par*/);
+    double region_time = (os::elapsedTime() - start);
+    _claimed_region_time += region_time;
+    if (region_time > _max_region_time) _max_region_time = region_time;
+  }
+  return false;
+}
+
+void ConcurrentMark::cleanup() {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->set_marking_complete(); // So bitmap clearing isn't confused
+    return;
+  }
+
+  _cleanup_co_tracker.disable();
+
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->record_concurrent_mark_cleanup_start();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  // Do counting once more with the world stopped for good measure.
+  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
+                                        &_region_bm, &_card_bm);
+  if (ParallelGCThreads > 0) {
+    assert(g1h->check_heap_region_claim_values(
+                                               HeapRegion::InitialClaimValue),
+           "sanity check");
+
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&g1_par_count_task);
+    g1h->set_par_threads(0);
+
+    assert(g1h->check_heap_region_claim_values(
+                                             HeapRegion::FinalCountClaimValue),
+           "sanity check");
+  } else {
+    g1_par_count_task.work(0);
+  }
+
+  size_t known_garbage_bytes =
+    g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
+#if 0
+  gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
+                         (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
+                         (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
+                         (double) known_garbage_bytes / (double) (1024 * 1024));
+#endif // 0
+  g1p->set_known_garbage_bytes(known_garbage_bytes);
+
+  size_t start_used_bytes = g1h->used();
+  _at_least_one_mark_complete = true;
+  g1h->set_marking_complete();
+
+  double count_end = os::elapsedTime();
+  double this_final_counting_time = (count_end - start);
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("Cleanup:");
+    gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
+                           this_final_counting_time*1000.0);
+  }
+  _total_counting_time += this_final_counting_time;
+
+  // Install newly created mark bitMap as "prev".
+  swapMarkBitMaps();
+
+  g1h->reset_gc_time_stamp();
+
+  // Note end of marking in all heap regions.
+  double note_end_start = os::elapsedTime();
+  G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
+  if (ParallelGCThreads > 0) {
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&g1_par_note_end_task);
+    g1h->set_par_threads(0);
+
+    assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
+           "sanity check");
+  } else {
+    g1_par_note_end_task.work(0);
+  }
+  g1h->set_unclean_regions_coming(true);
+  double note_end_end = os::elapsedTime();
+  // Tell the mutators that there might be unclean regions coming...
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
+                           (note_end_end - note_end_start)*1000.0);
+  }
+
+
+  // call below, since it affects the metric by which we sort the heap
+  // regions.
+  if (G1ScrubRemSets) {
+    double rs_scrub_start = os::elapsedTime();
+    G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
+    if (ParallelGCThreads > 0) {
+      int n_workers = g1h->workers()->total_workers();
+      g1h->set_par_threads(n_workers);
+      g1h->workers()->run_task(&g1_par_scrub_rs_task);
+      g1h->set_par_threads(0);
+
+      assert(g1h->check_heap_region_claim_values(
+                                            HeapRegion::ScrubRemSetClaimValue),
+             "sanity check");
+    } else {
+      g1_par_scrub_rs_task.work(0);
+    }
+
+    double rs_scrub_end = os::elapsedTime();
+    double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
+    _total_rs_scrub_time += this_rs_scrub_time;
+  }
+
+  // this will also free any regions totally full of garbage objects,
+  // and sort the regions.
+  g1h->g1_policy()->record_concurrent_mark_cleanup_end(
+                        g1_par_note_end_task.freed_bytes(),
+                        g1_par_note_end_task.max_live_bytes());
+
+  // Statistics.
+  double end = os::elapsedTime();
+  _cleanup_times.add((end - start) * 1000.0);
+  GCOverheadReporter::recordSTWEnd(end);
+
+  // G1CollectedHeap::heap()->print();
+  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
+  // G1CollectedHeap::heap()->get_gc_time_stamp());
+
+  if (PrintGC || PrintGCDetails) {
+    g1h->print_size_transition(gclog_or_tty,
+                               start_used_bytes,
+                               g1h->used(),
+                               g1h->capacity());
+  }
+
+  size_t cleaned_up_bytes = start_used_bytes - g1h->used();
+  g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
+
+  // We need to make this be a "collection" so any collection pause that
+  // races with it goes around and waits for completeCleanup to finish.
+  g1h->increment_total_collections();
+
+#ifndef PRODUCT
+  if (G1VerifyConcMark) {
+    G1CollectedHeap::heap()->prepare_for_verify();
+    G1CollectedHeap::heap()->verify(true,false);
+  }
+#endif
+}
+
+void ConcurrentMark::completeCleanup() {
+  // A full collection intervened.
+  if (has_aborted()) return;
+
+  int first = 0;
+  int last = (int)MAX2(ParallelGCThreads, (size_t)1);
+  for (int t = 0; t < last; t++) {
+    UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
+    assert(list->well_formed(), "Inv");
+    HeapRegion* hd = list->hd();
+    while (hd != NULL) {
+      // Now finish up the other stuff.
+      hd->rem_set()->clear();
+      HeapRegion* next_hd = hd->next_from_unclean_list();
+      (void)list->pop();
+      guarantee(list->hd() == next_hd, "how not?");
+      _g1h->put_region_on_unclean_list(hd);
+      if (!hd->isHumongous()) {
+        // Add this to the _free_regions count by 1.
+        _g1h->finish_free_region_work(0, 0, 1, NULL);
+      }
+      hd = list->hd();
+      guarantee(hd == next_hd, "how not?");
+    }
+  }
+}
+
+
+class G1CMIsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+ public:
+  G1CMIsAliveClosure(G1CollectedHeap* g1) :
+    _g1(g1)
+  {}
+
+  void do_object(oop obj) {
+    assert(false, "not to be invoked");
+  }
+  bool do_object_b(oop obj) {
+    HeapWord* addr = (HeapWord*)obj;
+    return addr != NULL &&
+           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
+  }
+};
+
+class G1CMKeepAliveClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  ConcurrentMark*  _cm;
+  CMBitMap*        _bitMap;
+ public:
+  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
+                       CMBitMap* bitMap) :
+    _g1(g1), _cm(cm),
+    _bitMap(bitMap) {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop thisOop = *p;
+    HeapWord* addr = (HeapWord*)thisOop;
+    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
+      _bitMap->mark(addr);
+      _cm->mark_stack_push(thisOop);
+    }
+  }
+};
+
+class G1CMDrainMarkingStackClosure: public VoidClosure {
+  CMMarkStack*                  _markStack;
+  CMBitMap*                     _bitMap;
+  G1CMKeepAliveClosure*         _oopClosure;
+ public:
+  G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
+                               G1CMKeepAliveClosure* oopClosure) :
+    _bitMap(bitMap),
+    _markStack(markStack),
+    _oopClosure(oopClosure)
+  {}
+
+  void do_void() {
+    _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
+  }
+};
+
+void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
+  ResourceMark rm;
+  HandleMark   hm;
+  ReferencePolicy* soft_ref_policy;
+
+  // Process weak references.
+  if (clear_all_soft_refs) {
+    soft_ref_policy = new AlwaysClearPolicy();
+  } else {
+#ifdef COMPILER2
+    soft_ref_policy = new LRUMaxHeapPolicy();
+#else
+    soft_ref_policy = new LRUCurrentHeapPolicy();
+#endif
+  }
+  assert(_markStack.isEmpty(), "mark stack should be empty");
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1CMIsAliveClosure g1IsAliveClosure(g1);
+
+  G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap());
+  G1CMDrainMarkingStackClosure
+    g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
+                               &g1KeepAliveClosure);
+
+  // XXXYYY  Also: copy the parallel ref processing code from CMS.
+  ReferenceProcessor* rp = g1->ref_processor();
+  rp->process_discovered_references(soft_ref_policy,
+                                    &g1IsAliveClosure,
+                                    &g1KeepAliveClosure,
+                                    &g1DrainMarkingStackClosure,
+                                    NULL);
+  assert(_markStack.overflow() || _markStack.isEmpty(),
+         "mark stack should be empty (unless it overflowed)");
+  if (_markStack.overflow()) {
+    set_has_overflown();
+  }
+
+  rp->enqueue_discovered_references();
+  rp->verify_no_references_recorded();
+  assert(!rp->discovery_enabled(), "should have been disabled");
+
+  // Now clean up stale oops in SymbolTable and StringTable
+  SymbolTable::unlink(&g1IsAliveClosure);
+  StringTable::unlink(&g1IsAliveClosure);
+}
+
+void ConcurrentMark::swapMarkBitMaps() {
+  CMBitMapRO* temp = _prevMarkBitMap;
+  _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
+  _nextMarkBitMap  = (CMBitMap*)  temp;
+}
+
+class CMRemarkTask: public AbstractGangTask {
+private:
+  ConcurrentMark *_cm;
+
+public:
+  void work(int worker_i) {
+    // Since all available tasks are actually started, we should
+    // only proceed if we're supposed to be actived.
+    if ((size_t)worker_i < _cm->active_tasks()) {
+      CMTask* task = _cm->task(worker_i);
+      task->record_start_time();
+      do {
+        task->do_marking_step(1000000000.0 /* something very large */);
+      } while (task->has_aborted() && !_cm->has_overflown());
+      // If we overflow, then we do not want to restart. We instead
+      // want to abort remark and do concurrent marking again.
+      task->record_end_time();
+    }
+  }
+
+  CMRemarkTask(ConcurrentMark* cm) :
+    AbstractGangTask("Par Remark"), _cm(cm) { }
+};
+
+void ConcurrentMark::checkpointRootsFinalWork() {
+  ResourceMark rm;
+  HandleMark   hm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  g1h->ensure_parsability(false);
+
+  if (ParallelGCThreads > 0) {
+    g1h->change_strong_roots_parity();
+    // this is remark, so we'll use up all available threads
+    int active_workers = ParallelGCThreads;
+    set_phase(active_workers, false);
+
+    CMRemarkTask remarkTask(this);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&remarkTask);
+    g1h->set_par_threads(0);
+
+    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  } else {
+    g1h->change_strong_roots_parity();
+    // this is remark, so we'll use up all available threads
+    int active_workers = 1;
+    set_phase(active_workers, false);
+
+    CMRemarkTask remarkTask(this);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    remarkTask.work(0);
+
+    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  }
+
+  print_stats();
+
+  if (!restart_for_overflow())
+    set_non_marking_state();
+
+#if VERIFY_OBJS_PROCESSED
+  if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
+    gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
+                           _scan_obj_cl.objs_processed,
+                           ThreadLocalObjQueue::objs_enqueued);
+    guarantee(_scan_obj_cl.objs_processed ==
+              ThreadLocalObjQueue::objs_enqueued,
+              "Different number of objs processed and enqueued.");
+  }
+#endif
+}
+
+class ReachablePrinterOopClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  CMBitMapRO*      _bitmap;
+  outputStream*    _out;
+
+public:
+  ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop         obj = *p;
+    const char* str = NULL;
+    const char* str2 = "";
+
+    if (!_g1h->is_in_g1_reserved(obj))
+      str = "outside G1 reserved";
+    else {
+      HeapRegion* hr  = _g1h->heap_region_containing(obj);
+      guarantee( hr != NULL, "invariant" );
+      if (hr->obj_allocated_since_prev_marking(obj)) {
+        str = "over TAMS";
+        if (_bitmap->isMarked((HeapWord*) obj))
+          str2 = " AND MARKED";
+      } else if (_bitmap->isMarked((HeapWord*) obj))
+        str = "marked";
+      else
+        str = "#### NOT MARKED ####";
+    }
+
+    _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
+                   p, (void*) obj, str, str2);
+  }
+};
+
+class ReachablePrinterClosure: public BitMapClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+
+  bool do_bit(size_t offset) {
+    HeapWord* addr = _bitmap->offsetToHeapWord(offset);
+    ReachablePrinterOopClosure oopCl(_bitmap, _out);
+
+    _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
+    oop(addr)->oop_iterate(&oopCl);
+    _out->print_cr("");
+
+    return true;
+  }
+};
+
+class ObjInRegionReachablePrinterClosure : public ObjectClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  void do_object(oop o) {
+    ReachablePrinterOopClosure oopCl(_bitmap, _out);
+
+    _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
+    o->oop_iterate(&oopCl);
+    _out->print_cr("");
+  }
+
+  ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+};
+
+class RegionReachablePrinterClosure : public HeapRegionClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  bool doHeapRegion(HeapRegion* hr) {
+    HeapWord* b = hr->bottom();
+    HeapWord* e = hr->end();
+    HeapWord* t = hr->top();
+    HeapWord* p = hr->prev_top_at_mark_start();
+    _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
+                   "PTAMS: "PTR_FORMAT, b, e, t, p);
+    _out->print_cr("");
+
+    ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
+    hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
+
+    return false;
+  }
+
+  RegionReachablePrinterClosure(CMBitMapRO* bitmap,
+                                outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+};
+
+void ConcurrentMark::print_prev_bitmap_reachable() {
+  outputStream* out = gclog_or_tty;
+
+#if SEND_HEAP_DUMP_TO_FILE
+  guarantee(heap_dump_file == NULL, "Protocol");
+  char fn_buf[100];
+  sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
+  heap_dump_file = fopen(fn_buf, "w");
+  fileStream fstream(heap_dump_file);
+  out = &fstream;
+#endif // SEND_HEAP_DUMP_TO_FILE
+
+  RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
+  out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
+  _g1h->heap_region_iterate(&rcl);
+  out->print_cr("");
+
+  ReachablePrinterClosure cl(_prevMarkBitMap, out);
+  out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
+  _prevMarkBitMap->iterate(&cl);
+  out->print_cr("");
+
+#if SEND_HEAP_DUMP_TO_FILE
+  fclose(heap_dump_file);
+  heap_dump_file = NULL;
+#endif // SEND_HEAP_DUMP_TO_FILE
+}
+
+// This note is for drainAllSATBBuffers and the code in between.
+// In the future we could reuse a task to do this work during an
+// evacuation pause (since now tasks are not active and can be claimed
+// during an evacuation pause). This was a late change to the code and
+// is currently not being taken advantage of.
+
+class CMGlobalObjectClosure : public ObjectClosure {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  void do_object(oop obj) {
+    _cm->deal_with_reference(obj);
+  }
+
+  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
+};
+
+void ConcurrentMark::deal_with_reference(oop obj) {
+  if (verbose_high())
+    gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
+                           (void*) obj);
+
+
+  HeapWord* objAddr = (HeapWord*) obj;
+  if (_g1h->is_in_g1_reserved(objAddr)) {
+    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    HeapRegion* hr = _g1h->heap_region_containing(obj);
+    if (_g1h->is_obj_ill(obj, hr)) {
+      if (verbose_high())
+        gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
+                               "marked", (void*) obj);
+
+      // we need to mark it first
+      if (_nextMarkBitMap->parMark(objAddr)) {
+        // No OrderAccess:store_load() is needed. It is implicit in the
+        // CAS done in parMark(objAddr) above
+        HeapWord* finger = _finger;
+        if (objAddr < finger) {
+          if (verbose_high())
+            gclog_or_tty->print_cr("[global] below the global finger "
+                                   "("PTR_FORMAT"), pushing it", finger);
+          if (!mark_stack_push(obj)) {
+            if (verbose_low())
+              gclog_or_tty->print_cr("[global] global stack overflow during "
+                                     "deal_with_reference");
+          }
+        }
+      }
+    }
+  }
+}
+
+void ConcurrentMark::drainAllSATBBuffers() {
+  CMGlobalObjectClosure oc(this);
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.set_closure(&oc);
+
+  while (satb_mq_set.apply_closure_to_completed_buffer()) {
+    if (verbose_medium())
+      gclog_or_tty->print_cr("[global] processed an SATB buffer");
+  }
+
+  // no need to check whether we should do this, as this is only
+  // called during an evacuation pause
+  satb_mq_set.iterate_closure_all_threads();
+
+  satb_mq_set.set_closure(NULL);
+  guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+}
+
+void ConcurrentMark::markPrev(oop p) {
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
+}
+
+void ConcurrentMark::clear(oop p) {
+  assert(p != NULL && p->is_oop(), "expected an oop");
+  HeapWord* addr = (HeapWord*)p;
+  assert(addr >= _nextMarkBitMap->startWord() ||
+         addr < _nextMarkBitMap->endWord(), "in a region");
+
+  _nextMarkBitMap->clear(addr);
+}
+
+void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+  _nextMarkBitMap->clearRange(mr);
+}
+
+HeapRegion*
+ConcurrentMark::claim_region(int task_num) {
+  // "checkpoint" the finger
+  HeapWord* finger = _finger;
+
+  // _heap_end will not change underneath our feet; it only changes at
+  // yield points.
+  while (finger < _heap_end) {
+    tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
+
+    // is the gap between reading the finger and doing the CAS too long?
+
+    HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
+    HeapWord*   bottom        = curr_region->bottom();
+    HeapWord*   end           = curr_region->end();
+    HeapWord*   limit         = curr_region->next_top_at_mark_start();
+
+    if (verbose_low())
+      gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
+                             "["PTR_FORMAT", "PTR_FORMAT"), "
+                             "limit = "PTR_FORMAT,
+                             task_num, curr_region, bottom, end, limit);
+
+    HeapWord* res =
+      (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
+    if (res == finger) {
+      // we succeeded
+
+      // notice that _finger == end cannot be guaranteed here since,
+      // someone else might have moved the finger even further
+      guarantee( _finger >= end, "the finger should have moved forward" );
+
+      if (verbose_low())
+        gclog_or_tty->print_cr("[%d] we were successful with region = "
+                               PTR_FORMAT, task_num, curr_region);
+
+      if (limit > bottom) {
+        if (verbose_low())
+          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
+                                 "returning it ", task_num, curr_region);
+        return curr_region;
+      } else {
+        tmp_guarantee_CM( limit == bottom,
+                          "the region limit should be at bottom" );
+        if (verbose_low())
+          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
+                                 "returning NULL", task_num, curr_region);
+        // we return NULL and the caller should try calling
+        // claim_region() again.
+        return NULL;
+      }
+    } else {
+      guarantee( _finger > finger, "the finger should have moved forward" );
+      if (verbose_low())
+        gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
+                               "global finger = "PTR_FORMAT", "
+                               "our finger = "PTR_FORMAT,
+                               task_num, _finger, finger);
+
+      // read it again
+      finger = _finger;
+    }
+  }
+
+  return NULL;
+}
+
+void ConcurrentMark::oops_do(OopClosure* cl) {
+  if (_markStack.size() > 0 && verbose_low())
+    gclog_or_tty->print_cr("[global] scanning the global marking stack, "
+                           "size = %d", _markStack.size());
+  // we first iterate over the contents of the mark stack...
+  _markStack.oops_do(cl);
+
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    OopTaskQueue* queue = _task_queues->queue((int)i);
+
+    if (queue->size() > 0 && verbose_low())
+      gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
+                             "size = %d", i, queue->size());
+
+    // ...then over the contents of the all the task queues.
+    queue->oops_do(cl);
+  }
+
+  // finally, invalidate any entries that in the region stack that
+  // point into the collection set
+  if (_regionStack.invalidate_entries_into_cset()) {
+    // otherwise, any gray objects copied during the evacuation pause
+    // might not be visited.
+    guarantee( _should_gray_objects, "invariant" );
+  }
+}
+
+void ConcurrentMark::clear_marking_state() {
+  _markStack.setEmpty();
+  _markStack.clear_overflow();
+  _regionStack.setEmpty();
+  _regionStack.clear_overflow();
+  clear_has_overflown();
+  _finger = _heap_start;
+
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    OopTaskQueue* queue = _task_queues->queue(i);
+    queue->set_empty();
+  }
+}
+
+void ConcurrentMark::print_stats() {
+  if (verbose_stats()) {
+    gclog_or_tty->print_cr("---------------------------------------------------------------------");
+    for (size_t i = 0; i < _active_tasks; ++i) {
+      _tasks[i]->print_stats();
+      gclog_or_tty->print_cr("---------------------------------------------------------------------");
+    }
+  }
+}
+
+class CSMarkOopClosure: public OopClosure {
+  friend class CSMarkBitMapClosure;
+
+  G1CollectedHeap* _g1h;
+  CMBitMap*        _bm;
+  ConcurrentMark*  _cm;
+  oop*             _ms;
+  jint*            _array_ind_stack;
+  int              _ms_size;
+  int              _ms_ind;
+  int              _array_increment;
+
+  bool push(oop obj, int arr_ind = 0) {
+    if (_ms_ind == _ms_size) {
+      gclog_or_tty->print_cr("Mark stack is full.");
+      return false;
+    }
+    _ms[_ms_ind] = obj;
+    if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
+    _ms_ind++;
+    return true;
+  }
+
+  oop pop() {
+    if (_ms_ind == 0) return NULL;
+    else {
+      _ms_ind--;
+      return _ms[_ms_ind];
+    }
+  }
+
+  bool drain() {
+    while (_ms_ind > 0) {
+      oop obj = pop();
+      assert(obj != NULL, "Since index was non-zero.");
+      if (obj->is_objArray()) {
+        jint arr_ind = _array_ind_stack[_ms_ind];
+        objArrayOop aobj = objArrayOop(obj);
+        jint len = aobj->length();
+        jint next_arr_ind = arr_ind + _array_increment;
+        if (next_arr_ind < len) {
+          push(obj, next_arr_ind);
+        }
+        // Now process this portion of this one.
+        int lim = MIN2(next_arr_ind, len);
+        assert(!UseCompressedOops, "This needs to be fixed");
+        for (int j = arr_ind; j < lim; j++) {
+          do_oop(aobj->obj_at_addr<oop>(j));
+        }
+
+      } else {
+        obj->oop_iterate(this);
+      }
+      if (abort()) return false;
+    }
+    return true;
+  }
+
+public:
+  CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
+    _g1h(G1CollectedHeap::heap()),
+    _cm(cm),
+    _bm(cm->nextMarkBitMap()),
+    _ms_size(ms_size), _ms_ind(0),
+    _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
+    _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
+    _array_increment(MAX2(ms_size/8, 16))
+  {}
+
+  ~CSMarkOopClosure() {
+    FREE_C_HEAP_ARRAY(oop, _ms);
+    FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
+  }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop obj = *p;
+    if (obj == NULL) return;
+    if (obj->is_forwarded()) {
+      // If the object has already been forwarded, we have to make sure
+      // that it's marked.  So follow the forwarding pointer.  Note that
+      // this does the right thing for self-forwarding pointers in the
+      // evacuation failure case.
+      obj = obj->forwardee();
+    }
+    HeapRegion* hr = _g1h->heap_region_containing(obj);
+    if (hr != NULL) {
+      if (hr->in_collection_set()) {
+        if (_g1h->is_obj_ill(obj)) {
+          _bm->mark((HeapWord*)obj);
+          if (!push(obj)) {
+            gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
+            set_abort();
+          }
+        }
+      } else {
+        // Outside the collection set; we need to gray it
+        _cm->deal_with_reference(obj);
+      }
+    }
+  }
+};
+
+class CSMarkBitMapClosure: public BitMapClosure {
+  G1CollectedHeap* _g1h;
+  CMBitMap*        _bitMap;
+  ConcurrentMark*  _cm;
+  CSMarkOopClosure _oop_cl;
+public:
+  CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
+    _g1h(G1CollectedHeap::heap()),
+    _bitMap(cm->nextMarkBitMap()),
+    _oop_cl(cm, ms_size)
+  {}
+
+  ~CSMarkBitMapClosure() {}
+
+  bool do_bit(size_t offset) {
+    // convert offset into a HeapWord*
+    HeapWord* addr = _bitMap->offsetToHeapWord(offset);
+    assert(_bitMap->endWord() && addr < _bitMap->endWord(),
+           "address out of range");
+    assert(_bitMap->isMarked(addr), "tautology");
+    oop obj = oop(addr);
+    if (!obj->is_forwarded()) {
+      if (!_oop_cl.push(obj)) return false;
+      if (!_oop_cl.drain()) return false;
+    }
+    // Otherwise...
+    return true;
+  }
+};
+
+
+class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
+  CMBitMap* _bm;
+  CSMarkBitMapClosure _bit_cl;
+  enum SomePrivateConstants {
+    MSSize = 1000
+  };
+  bool _completed;
+public:
+  CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
+    _bm(cm->nextMarkBitMap()),
+    _bit_cl(cm, MSSize),
+    _completed(true)
+  {}
+
+  ~CompleteMarkingInCSHRClosure() {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->evacuation_failed()) {
+      MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
+      if (!mr.is_empty()) {
+        if (!_bm->iterate(&_bit_cl, mr)) {
+          _completed = false;
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  bool completed() { return _completed; }
+};
+
+class ClearMarksInHRClosure: public HeapRegionClosure {
+  CMBitMap* _bm;
+public:
+  ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->used_region().is_empty() && !r->evacuation_failed()) {
+      MemRegion usedMR = r->used_region();
+      _bm->clearRange(r->used_region());
+    }
+    return false;
+  }
+};
+
+void ConcurrentMark::complete_marking_in_collection_set() {
+  G1CollectedHeap* g1h =  G1CollectedHeap::heap();
+
+  if (!g1h->mark_in_progress()) {
+    g1h->g1_policy()->record_mark_closure_time(0.0);
+    return;
+  }
+
+  int i = 1;
+  double start = os::elapsedTime();
+  while (true) {
+    i++;
+    CompleteMarkingInCSHRClosure cmplt(this);
+    g1h->collection_set_iterate(&cmplt);
+    if (cmplt.completed()) break;
+  }
+  double end_time = os::elapsedTime();
+  double elapsed_time_ms = (end_time - start) * 1000.0;
+  g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
+  if (PrintGCDetails) {
+    gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
+  }
+
+  ClearMarksInHRClosure clr(nextMarkBitMap());
+  g1h->collection_set_iterate(&clr);
+}
+
+// The next two methods deal with the following optimisation. Some
+// objects are gray by being marked and located above the finger. If
+// they are copied, during an evacuation pause, below the finger then
+// the need to be pushed on the stack. The observation is that, if
+// there are no regions in the collection set located above the
+// finger, then the above cannot happen, hence we do not need to
+// explicitly gray any objects when copying them to below the
+// finger. The global stack will be scanned to ensure that, if it
+// points to objects being copied, it will update their
+// location. There is a tricky situation with the gray objects in
+// region stack that are being coped, however. See the comment in
+// newCSet().
+
+void ConcurrentMark::newCSet() {
+  if (!concurrent_marking_in_progress())
+    // nothing to do if marking is not in progress
+    return;
+
+  // find what the lowest finger is among the global and local fingers
+  _min_finger = _finger;
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    CMTask* task = _tasks[i];
+    HeapWord* task_finger = task->finger();
+    if (task_finger != NULL && task_finger < _min_finger)
+      _min_finger = task_finger;
+  }
+
+  _should_gray_objects = false;
+
+  // This fixes a very subtle and fustrating bug. It might be the case
+  // that, during en evacuation pause, heap regions that contain
+  // objects that are gray (by being in regions contained in the
+  // region stack) are included in the collection set. Since such gray
+  // objects will be moved, and because it's not easy to redirect
+  // region stack entries to point to a new location (because objects
+  // in one region might be scattered to multiple regions after they
+  // are copied), one option is to ensure that all marked objects
+  // copied during a pause are pushed on the stack. Notice, however,
+  // that this problem can only happen when the region stack is not
+  // empty during an evacuation pause. So, we make the fix a bit less
+  // conservative and ensure that regions are pushed on the stack,
+  // irrespective whether all collection set regions are below the
+  // finger, if the region stack is not empty. This is expected to be
+  // a rare case, so I don't think it's necessary to be smarted about it.
+  if (!region_stack_empty())
+    _should_gray_objects = true;
+}
+
+void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  if (!concurrent_marking_in_progress())
+    return;
+
+  HeapWord* region_end = hr->end();
+  if (region_end > _min_finger)
+    _should_gray_objects = true;
+}
+
+void ConcurrentMark::disable_co_trackers() {
+  if (has_aborted()) {
+    if (_cleanup_co_tracker.enabled())
+      _cleanup_co_tracker.disable();
+    for (int i = 0; i < (int)_max_task_num; ++i) {
+      CMTask* task = _tasks[i];
+      if (task->co_tracker_enabled())
+        task->disable_co_tracker();
+    }
+  } else {
+    guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
+    for (int i = 0; i < (int)_max_task_num; ++i) {
+      CMTask* task = _tasks[i];
+      guarantee( !task->co_tracker_enabled(), "invariant" );
+    }
+  }
+}
+
+// abandon current marking iteration due to a Full GC
+void ConcurrentMark::abort() {
+  // If we're not marking, nothing to do.
+  if (!G1ConcMark) return;
+
+  // Clear all marks to force marking thread to do nothing
+  _nextMarkBitMap->clearAll();
+  // Empty mark stack
+  clear_marking_state();
+  for (int i = 0; i < (int)_max_task_num; ++i)
+    _tasks[i]->clear_region_fields();
+  _has_aborted = true;
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.abandon_partial_marking();
+  satb_mq_set.set_active_all_threads(false);
+}
+
+static void print_ms_time_info(const char* prefix, const char* name,
+                               NumberSeq& ns) {
+  gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
+                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
+  if (ns.num() > 0) {
+    gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
+                           prefix, ns.sd(), ns.maximum());
+  }
+}
+
+void ConcurrentMark::print_summary_info() {
+  gclog_or_tty->print_cr(" Concurrent marking:");
+  print_ms_time_info("  ", "init marks", _init_times);
+  print_ms_time_info("  ", "remarks", _remark_times);
+  {
+    print_ms_time_info("     ", "final marks", _remark_mark_times);
+    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
+
+  }
+  print_ms_time_info("  ", "cleanups", _cleanup_times);
+  gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
+                         _total_counting_time,
+                         (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
+                          (double)_cleanup_times.num()
+                         : 0.0));
+  if (G1ScrubRemSets) {
+    gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
+                           _total_rs_scrub_time,
+                           (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
+                            (double)_cleanup_times.num()
+                           : 0.0));
+  }
+  gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
+                         (_init_times.sum() + _remark_times.sum() +
+                          _cleanup_times.sum())/1000.0);
+  gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
+                "(%8.2f s marking, %8.2f s counting).",
+                cmThread()->vtime_accum(),
+                cmThread()->vtime_mark_accum(),
+                cmThread()->vtime_count_accum());
+}
+
+// Closures
+// XXX: there seems to be a lot of code  duplication here;
+// should refactor and consolidate the shared code.
+
+// This closure is used to mark refs into the CMS generation in
+// the CMS bit map. Called at the first checkpoint.
+
+// We take a break if someone is trying to stop the world.
+bool ConcurrentMark::do_yield_check(int worker_i) {
+  if (should_yield()) {
+    if (worker_i == 0)
+      _g1h->g1_policy()->record_concurrent_pause();
+    cmThread()->yield();
+    if (worker_i == 0)
+      _g1h->g1_policy()->record_concurrent_pause_end();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ConcurrentMark::should_yield() {
+  return cmThread()->should_yield();
+}
+
+bool ConcurrentMark::containing_card_is_marked(void* p) {
+  size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
+  return _card_bm.at(offset >> CardTableModRefBS::card_shift);
+}
+
+bool ConcurrentMark::containing_cards_are_marked(void* start,
+                                                 void* last) {
+  return
+    containing_card_is_marked(start) &&
+    containing_card_is_marked(last);
+}
+
+#ifndef PRODUCT
+// for debugging purposes
+void ConcurrentMark::print_finger() {
+  gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
+                         _heap_start, _heap_end, _finger);
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
+  }
+  gclog_or_tty->print_cr("");
+}
+#endif
+
+// Closure for iteration over bitmaps
+class CMBitMapClosure : public BitMapClosure {
+private:
+  // the bitmap that is being iterated over
+  CMBitMap*                   _nextMarkBitMap;
+  ConcurrentMark*             _cm;
+  CMTask*                     _task;
+  // true if we're scanning a heap region claimed by the task (so that
+  // we move the finger along), false if we're not, i.e. currently when
+  // scanning a heap region popped from the region stack (so that we
+  // do not move the task finger along; it'd be a mistake if we did so).
+  bool                        _scanning_heap_region;
+
+public:
+  CMBitMapClosure(CMTask *task,
+                  ConcurrentMark* cm,
+                  CMBitMap* nextMarkBitMap)
+    :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
+
+  void set_scanning_heap_region(bool scanning_heap_region) {
+    _scanning_heap_region = scanning_heap_region;
+  }
+
+  bool do_bit(size_t offset) {
+    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
+    tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
+    tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
+
+    if (_scanning_heap_region) {
+      statsOnly( _task->increase_objs_found_on_bitmap() );
+      tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
+      // We move that task's local finger along.
+      _task->move_finger_to(addr);
+    } else {
+      // We move the task's region finger along.
+      _task->move_region_finger_to(addr);
+    }
+
+    _task->scan_object(oop(addr));
+    // we only partially drain the local queue and global stack
+    _task->drain_local_queue(true);
+    _task->drain_global_stack(true);
+
+    // if the has_aborted flag has been raised, we need to bail out of
+    // the iteration
+    return !_task->has_aborted();
+  }
+};
+
+// Closure for iterating over objects, currently only used for
+// processing SATB buffers.
+class CMObjectClosure : public ObjectClosure {
+private:
+  CMTask* _task;
+
+public:
+  void do_object(oop obj) {
+    _task->deal_with_reference(obj);
+  }
+
+  CMObjectClosure(CMTask* task) : _task(task) { }
+};
+
+// Closure for iterating over object fields
+class CMOopClosure : public OopClosure {
+private:
+  G1CollectedHeap*   _g1h;
+  ConcurrentMark*    _cm;
+  CMTask*            _task;
+
+public:
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
+
+    oop obj = *p;
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] we're looking at location "
+                             "*"PTR_FORMAT" = "PTR_FORMAT,
+                             _task->task_id(), p, (void*) obj);
+    _task->deal_with_reference(obj);
+  }
+
+  CMOopClosure(G1CollectedHeap* g1h,
+               ConcurrentMark* cm,
+               CMTask* task)
+    : _g1h(g1h), _cm(cm), _task(task) { }
+};
+
+void CMTask::setup_for_region(HeapRegion* hr) {
+  tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
+      "claim_region() should have filtered out continues humongous regions" );
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
+                           _task_id, hr);
+
+  _curr_region  = hr;
+  _finger       = hr->bottom();
+  update_region_limit();
+}
+
+void CMTask::update_region_limit() {
+  HeapRegion* hr            = _curr_region;
+  HeapWord* bottom          = hr->bottom();
+  HeapWord* limit           = hr->next_top_at_mark_start();
+
+  if (limit == bottom) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] found an empty region "
+                             "["PTR_FORMAT", "PTR_FORMAT")",
+                             _task_id, bottom, limit);
+    // The region was collected underneath our feet.
+    // We set the finger to bottom to ensure that the bitmap
+    // iteration that will follow this will not do anything.
+    // (this is not a condition that holds when we set the region up,
+    // as the region is not supposed to be empty in the first place)
+    _finger = bottom;
+  } else if (limit >= _region_limit) {
+    tmp_guarantee_CM( limit >= _finger, "peace of mind" );
+  } else {
+    tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
+    // This can happen under some pretty unusual circumstances.  An
+    // evacuation pause empties the region underneath our feet (NTAMS
+    // at bottom). We then do some allocation in the region (NTAMS
+    // stays at bottom), followed by the region being used as a GC
+    // alloc region (NTAMS will move to top() and the objects
+    // originally below it will be grayed). All objects now marked in
+    // the region are explicitly grayed, if below the global finger,
+    // and we do not need in fact to scan anything else. So, we simply
+    // set _finger to be limit to ensure that the bitmap iteration
+    // doesn't do anything.
+    _finger = limit;
+  }
+
+  _region_limit = limit;
+}
+
+void CMTask::giveup_current_region() {
+  tmp_guarantee_CM( _curr_region != NULL, "invariant" );
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
+                           _task_id, _curr_region);
+  clear_region_fields();
+}
+
+void CMTask::clear_region_fields() {
+  // Values for these three fields that indicate that we're not
+  // holding on to a region.
+  _curr_region   = NULL;
+  _finger        = NULL;
+  _region_limit  = NULL;
+
+  _region_finger = NULL;
+}
+
+void CMTask::reset(CMBitMap* nextMarkBitMap) {
+  guarantee( nextMarkBitMap != NULL, "invariant" );
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] resetting", _task_id);
+
+  _nextMarkBitMap                = nextMarkBitMap;
+  clear_region_fields();
+
+  _calls                         = 0;
+  _elapsed_time_ms               = 0.0;
+  _termination_time_ms           = 0.0;
+  _termination_start_time_ms     = 0.0;
+
+#if _MARKING_STATS_
+  _local_pushes                  = 0;
+  _local_pops                    = 0;
+  _local_max_size                = 0;
+  _objs_scanned                  = 0;
+  _global_pushes                 = 0;
+  _global_pops                   = 0;
+  _global_max_size               = 0;
+  _global_transfers_to           = 0;
+  _global_transfers_from         = 0;
+  _region_stack_pops             = 0;
+  _regions_claimed               = 0;
+  _objs_found_on_bitmap          = 0;
+  _satb_buffers_processed        = 0;
+  _steal_attempts                = 0;
+  _steals                        = 0;
+  _aborted                       = 0;
+  _aborted_overflow              = 0;
+  _aborted_cm_aborted            = 0;
+  _aborted_yield                 = 0;
+  _aborted_timed_out             = 0;
+  _aborted_satb                  = 0;
+  _aborted_termination           = 0;
+#endif // _MARKING_STATS_
+}
+
+bool CMTask::should_exit_termination() {
+  regular_clock_call();
+  // This is called when we are in the termination protocol. We should
+  // quit if, for some reason, this task wants to abort or the global
+  // stack is not empty (this means that we can get work from it).
+  return !_cm->mark_stack_empty() || has_aborted();
+}
+
+// This determines whether the method below will check both the local
+// and global fingers when determining whether to push on the stack a
+// gray object (value 1) or whether it will only check the global one
+// (value 0). The tradeoffs are that the former will be a bit more
+// accurate and possibly push less on the stack, but it might also be
+// a little bit slower.
+
+#define _CHECK_BOTH_FINGERS_      1
+
+void CMTask::deal_with_reference(oop obj) {
+  if (_cm->verbose_high())
+    gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
+                           _task_id, (void*) obj);
+
+  ++_refs_reached;
+
+  HeapWord* objAddr = (HeapWord*) obj;
+  if (_g1h->is_in_g1_reserved(objAddr)) {
+    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    HeapRegion* hr =  _g1h->heap_region_containing(obj);
+    if (_g1h->is_obj_ill(obj, hr)) {
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
+                               _task_id, (void*) obj);
+
+      // we need to mark it first
+      if (_nextMarkBitMap->parMark(objAddr)) {
+        // No OrderAccess:store_load() is needed. It is implicit in the
+        // CAS done in parMark(objAddr) above
+        HeapWord* global_finger = _cm->finger();
+
+#if _CHECK_BOTH_FINGERS_
+        // we will check both the local and global fingers
+
+        if (_finger != NULL && objAddr < _finger) {
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
+                                   "pushing it", _task_id, _finger);
+          push(obj);
+        } else if (_curr_region != NULL && objAddr < _region_limit) {
+          // do nothing
+        } else if (objAddr < global_finger) {
+          // Notice that the global finger might be moving forward
+          // concurrently. This is not a problem. In the worst case, we
+          // mark the object while it is above the global finger and, by
+          // the time we read the global finger, it has moved forward
+          // passed this object. In this case, the object will probably
+          // be visited when a task is scanning the region and will also
+          // be pushed on the stack. So, some duplicate work, but no
+          // correctness problems.
+
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the global finger "
+                                   "("PTR_FORMAT"), pushing it",
+                                   _task_id, global_finger);
+          push(obj);
+        } else {
+          // do nothing
+        }
+#else // _CHECK_BOTH_FINGERS_
+      // we will only check the global finger
+
+        if (objAddr < global_finger) {
+          // see long comment above
+
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the global finger "
+                                   "("PTR_FORMAT"), pushing it",
+                                   _task_id, global_finger);
+          push(obj);
+        }
+#endif // _CHECK_BOTH_FINGERS_
+      }
+    }
+  }
+}
+
+void CMTask::push(oop obj) {
+  HeapWord* objAddr = (HeapWord*) obj;
+  tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
+  tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
+  tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
+
+  if (_cm->verbose_high())
+    gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
+
+  if (!_task_queue->push(obj)) {
+    // The local task queue looks full. We need to push some entries
+    // to the global stack.
+
+    if (_cm->verbose_medium())
+      gclog_or_tty->print_cr("[%d] task queue overflow, "
+                             "moving entries to the global stack",
+                             _task_id);
+    move_entries_to_global_stack();
+
+    // this should succeed since, even if we overflow the global
+    // stack, we should have definitely removed some entries from the
+    // local queue. So, there must be space on it.
+    bool success = _task_queue->push(obj);
+    tmp_guarantee_CM( success, "invariant" );
+  }
+
+  statsOnly( int tmp_size = _task_queue->size();
+             if (tmp_size > _local_max_size)
+               _local_max_size = tmp_size;
+             ++_local_pushes );
+}
+
+void CMTask::reached_limit() {
+  tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
+                    _refs_reached >= _refs_reached_limit ,
+                 "shouldn't have been called otherwise" );
+  regular_clock_call();
+}
+
+void CMTask::regular_clock_call() {
+  if (has_aborted())
+    return;
+
+  // First, we need to recalculate the words scanned and refs reached
+  // limits for the next clock call.
+  recalculate_limits();
+
+  // During the regular clock call we do the following
+
+  // (1) If an overflow has been flagged, then we abort.
+  if (_cm->has_overflown()) {
+    set_has_aborted();
+    return;
+  }
+
+  // If we are not concurrent (i.e. we're doing remark) we don't need
+  // to check anything else. The other steps are only needed during
+  // the concurrent marking phase.
+  if (!concurrent())
+    return;
+
+  // (2) If marking has been aborted for Full GC, then we also abort.
+  if (_cm->has_aborted()) {
+    set_has_aborted();
+    statsOnly( ++_aborted_cm_aborted );
+    return;
+  }
+
+  double curr_time_ms = os::elapsedVTime() * 1000.0;
+
+  // (3) If marking stats are enabled, then we update the step history.
+#if _MARKING_STATS_
+  if (_words_scanned >= _words_scanned_limit)
+    ++_clock_due_to_scanning;
+  if (_refs_reached >= _refs_reached_limit)
+    ++_clock_due_to_marking;
+
+  double last_interval_ms = curr_time_ms - _interval_start_time_ms;
+  _interval_start_time_ms = curr_time_ms;
+  _all_clock_intervals_ms.add(last_interval_ms);
+
+  if (_cm->verbose_medium()) {
+    gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
+                           "scanned = %d%s, refs reached = %d%s",
+                           _task_id, last_interval_ms,
+                           _words_scanned,
+                           (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
+                           _refs_reached,
+                           (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
+  }
+#endif // _MARKING_STATS_
+
+  // (4) We check whether we should yield. If we have to, then we abort.
+  if (_cm->should_yield()) {
+    // We should yield. To do this we abort the task. The caller is
+    // responsible for yielding.
+    set_has_aborted();
+    statsOnly( ++_aborted_yield );
+    return;
+  }
+
+  // (5) We check whether we've reached our time quota. If we have,
+  // then we abort.
+  double elapsed_time_ms = curr_time_ms - _start_time_ms;
+  if (elapsed_time_ms > _time_target_ms) {
+    set_has_aborted();
+    _has_aborted_timed_out = true;
+    statsOnly( ++_aborted_timed_out );
+    return;
+  }
+
+  // (6) Finally, we check whether there are enough completed STAB
+  // buffers available for processing. If there are, we abort.
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
+                             _task_id);
+    // we do need to process SATB buffers, we'll abort and restart
+    // the marking task to do so
+    set_has_aborted();
+    statsOnly( ++_aborted_satb );
+    return;
+  }
+}
+
+void CMTask::recalculate_limits() {
+  _real_words_scanned_limit = _words_scanned + words_scanned_period;
+  _words_scanned_limit      = _real_words_scanned_limit;
+
+  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
+  _refs_reached_limit       = _real_refs_reached_limit;
+}
+
+void CMTask::decrease_limits() {
+  // This is called when we believe that we're going to do an infrequent
+  // operation which will increase the per byte scanned cost (i.e. move
+  // entries to/from the global stack). It basically tries to decrease the
+  // scanning limit so that the clock is called earlier.
+
+  if (_cm->verbose_medium())
+    gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
+
+  _words_scanned_limit = _real_words_scanned_limit -
+    3 * words_scanned_period / 4;
+  _refs_reached_limit  = _real_refs_reached_limit -
+    3 * refs_reached_period / 4;
+}
+
+void CMTask::move_entries_to_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the local queue
+  oop buffer[global_stack_transfer_size];
+
+  int n = 0;
+  oop obj;
+  while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
+    buffer[n] = obj;
+    ++n;
+  }
+
+  if (n > 0) {
+    // we popped at least one entry from the local queue
+
+    statsOnly( ++_global_transfers_to; _local_pops += n );
+
+    if (!_cm->mark_stack_push(buffer, n)) {
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
+      set_has_aborted();
+    } else {
+      // the transfer was successful
+
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
+                               _task_id, n);
+      statsOnly( int tmp_size = _cm->mark_stack_size();
+                 if (tmp_size > _global_max_size)
+                   _global_max_size = tmp_size;
+                 _global_pushes += n );
+    }
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void CMTask::get_entries_from_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the global stack.
+  oop buffer[global_stack_transfer_size];
+  int n;
+  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
+  tmp_guarantee_CM( n <= global_stack_transfer_size,
+                    "we should not pop more than the given limit" );
+  if (n > 0) {
+    // yes, we did actually pop at least one entry
+
+    statsOnly( ++_global_transfers_from; _global_pops += n );
+    if (_cm->verbose_medium())
+      gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
+                             _task_id, n);
+    for (int i = 0; i < n; ++i) {
+      bool success = _task_queue->push(buffer[i]);
+      // We only call this when the local queue is empty or under a
+      // given target limit. So, we do not expect this push to fail.
+      tmp_guarantee_CM( success, "invariant" );
+    }
+
+    statsOnly( int tmp_size = _task_queue->size();
+               if (tmp_size > _local_max_size)
+                 _local_max_size = tmp_size;
+               _local_pushes += n );
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void CMTask::drain_local_queue(bool partially) {
+  if (has_aborted())
+    return;
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).
+  size_t target_size;
+  if (partially)
+    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
+  else
+    target_size = 0;
+
+  if (_task_queue->size() > target_size) {
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
+                             _task_id, target_size);
+
+    oop obj;
+    bool ret = _task_queue->pop_local(obj);
+    while (ret) {
+      statsOnly( ++_local_pops );
+
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
+                               (void*) obj);
+
+      tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
+                        "invariant" );
+
+      scan_object(obj);
+
+      if (_task_queue->size() <= target_size || has_aborted())
+        ret = false;
+      else
+        ret = _task_queue->pop_local(obj);
+    }
+
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
+                             _task_id, _task_queue->size());
+  }
+}
+
+void CMTask::drain_global_stack(bool partially) {
+  if (has_aborted())
+    return;
+
+  // We have a policy to drain the local queue before we attempt to
+  // drain the global stack.
+  tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).  Notice that,
+  // because we move entries from the global stack in chunks or
+  // because another task might be doing the same, we might in fact
+  // drop below the target. But, this is not a problem.
+  size_t target_size;
+  if (partially)
+    target_size = _cm->partial_mark_stack_size_target();
+  else
+    target_size = 0;
+
+  if (_cm->mark_stack_size() > target_size) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
+                             _task_id, target_size);
+
+    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
+      get_entries_from_global_stack();
+      drain_local_queue(partially);
+    }
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
+                             _task_id, _cm->mark_stack_size());
+  }
+}
+
+// SATB Queue has several assumptions on whether to call the par or
+// non-par versions of the methods. this is why some of the code is
+// replicated. We should really get rid of the single-threaded version
+// of the code to simplify things.
+void CMTask::drain_satb_buffers() {
+  if (has_aborted())
+    return;
+
+  // We set this so that the regular clock knows that we're in the
+  // middle of draining buffers and doesn't set the abort flag when it
+  // notices that SATB buffers are available for draining. It'd be
+  // very counter productive if it did that. :-)
+  _draining_satb_buffers = true;
+
+  CMObjectClosure oc(this);
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  if (ParallelGCThreads > 0)
+    satb_mq_set.set_par_closure(_task_id, &oc);
+  else
+    satb_mq_set.set_closure(&oc);
+
+  // This keeps claiming and applying the closure to completed buffers
+  // until we run out of buffers or we need to abort.
+  if (ParallelGCThreads > 0) {
+    while (!has_aborted() &&
+           satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
+      statsOnly( ++_satb_buffers_processed );
+      regular_clock_call();
+    }
+  } else {
+    while (!has_aborted() &&
+           satb_mq_set.apply_closure_to_completed_buffer()) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
+      statsOnly( ++_satb_buffers_processed );
+      regular_clock_call();
+    }
+  }
+
+  if (!concurrent() && !has_aborted()) {
+    // We should only do this during remark.
+    if (ParallelGCThreads > 0)
+      satb_mq_set.par_iterate_closure_all_threads(_task_id);
+    else
+      satb_mq_set.iterate_closure_all_threads();
+  }
+
+  _draining_satb_buffers = false;
+
+  tmp_guarantee_CM( has_aborted() ||
+                    concurrent() ||
+                    satb_mq_set.completed_buffers_num() == 0, "invariant" );
+
+  if (ParallelGCThreads > 0)
+    satb_mq_set.set_par_closure(_task_id, NULL);
+  else
+    satb_mq_set.set_closure(NULL);
+
+  // again, this was a potentially expensive operation, decrease the
+  // limits to get the regular clock call early
+  decrease_limits();
+}
+
+void CMTask::drain_region_stack(BitMapClosure* bc) {
+  if (has_aborted())
+    return;
+
+  tmp_guarantee_CM( _region_finger == NULL,
+                    "it should be NULL when we're not scanning a region" );
+
+  if (!_cm->region_stack_empty()) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
+                             _task_id, _cm->region_stack_size());
+
+    MemRegion mr = _cm->region_stack_pop();
+    // it returns MemRegion() if the pop fails
+    statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+
+    while (mr.start() != NULL) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] we are scanning region "
+                               "["PTR_FORMAT", "PTR_FORMAT")",
+                               _task_id, mr.start(), mr.end());
+      tmp_guarantee_CM( mr.end() <= _cm->finger(),
+                        "otherwise the region shouldn't be on the stack" );
+      assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
+      if (_nextMarkBitMap->iterate(bc, mr)) {
+        tmp_guarantee_CM( !has_aborted(),
+               "cannot abort the task without aborting the bitmap iteration" );
+
+        // We finished iterating over the region without aborting.
+        regular_clock_call();
+        if (has_aborted())
+          mr = MemRegion();
+        else {
+          mr = _cm->region_stack_pop();
+          // it returns MemRegion() if the pop fails
+          statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+        }
+      } else {
+        guarantee( has_aborted(), "currently the only way to do so" );
+
+        // The only way to abort the bitmap iteration is to return
+        // false from the do_bit() method. However, inside the
+        // do_bit() method we move the _region_finger to point to the
+        // object currently being looked at. So, if we bail out, we
+        // have definitely set _region_finger to something non-null.
+        guarantee( _region_finger != NULL, "invariant" );
+
+        // The iteration was actually aborted. So now _region_finger
+        // points to the address of the object we last scanned. If we
+        // leave it there, when we restart this task, we will rescan
+        // the object. It is easy to avoid this. We move the finger by
+        // enough to point to the next possible object header (the
+        // bitmap knows by how much we need to move it as it knows its
+        // granularity).
+        MemRegion newRegion =
+          MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
+
+        if (!newRegion.is_empty()) {
+          if (_cm->verbose_low()) {
+            gclog_or_tty->print_cr("[%d] pushing unscanned region"
+                                   "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
+                                   _task_id,
+                                   newRegion.start(), newRegion.end());
+          }
+          // Now push the part of the region we didn't scan on the
+          // region stack to make sure a task scans it later.
+          _cm->region_stack_push(newRegion);
+        }
+        // break from while
+        mr = MemRegion();
+      }
+      _region_finger = NULL;
+    }
+
+    // We only push regions on the region stack during evacuation
+    // pauses. So if we come out the above iteration because we region
+    // stack is empty, it will remain empty until the next yield
+    // point. So, the guarantee below is safe.
+    guarantee( has_aborted() || _cm->region_stack_empty(),
+               "only way to exit the loop" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
+                             _task_id, _cm->region_stack_size());
+  }
+}
+
+void CMTask::print_stats() {
+  gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
+                         _task_id, _calls);
+  gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
+                         _elapsed_time_ms, _termination_time_ms);
+  gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
+                         _step_times_ms.num(), _step_times_ms.avg(),
+                         _step_times_ms.sd());
+  gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
+                         _step_times_ms.maximum(), _step_times_ms.sum());
+
+#if _MARKING_STATS_
+  gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
+                         _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
+                         _all_clock_intervals_ms.sd());
+  gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
+                         _all_clock_intervals_ms.maximum(),
+                         _all_clock_intervals_ms.sum());
+  gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
+                         _clock_due_to_scanning, _clock_due_to_marking);
+  gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
+                         _objs_scanned, _objs_found_on_bitmap);
+  gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
+                         _local_pushes, _local_pops, _local_max_size);
+  gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
+                         _global_pushes, _global_pops, _global_max_size);
+  gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
+                         _global_transfers_to,_global_transfers_from);
+  gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
+                         _regions_claimed, _region_stack_pops);
+  gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
+  gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
+                         _steal_attempts, _steals);
+  gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
+  gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
+                         _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
+  gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
+                         _aborted_timed_out, _aborted_satb, _aborted_termination);
+#endif // _MARKING_STATS_
+}
+
+/*****************************************************************************
+
+    The do_marking_step(time_target_ms) method is the building block
+    of the parallel marking framework. It can be called in parallel
+    with other invocations of do_marking_step() on different tasks
+    (but only one per task, obviously) and concurrently with the
+    mutator threads, or during remark, hence it eliminates the need
+    for two versions of the code. When called during remark, it will
+    pick up from where the task left off during the concurrent marking
+    phase. Interestingly, tasks are also claimable during evacuation
+    pauses too, since do_marking_step() ensures that it aborts before
+    it needs to yield.
+
+    The data structures that is uses to do marking work are the
+    following:
+
+      (1) Marking Bitmap. If there are gray objects that appear only
+      on the bitmap (this happens either when dealing with an overflow
+      or when the initial marking phase has simply marked the roots
+      and didn't push them on the stack), then tasks claim heap
+      regions whose bitmap they then scan to find gray objects. A
+      global finger indicates where the end of the last claimed region
+      is. A local finger indicates how far into the region a task has
+      scanned. The two fingers are used to determine how to gray an
+      object (i.e. whether simply marking it is OK, as it will be
+      visited by a task in the future, or whether it needs to be also
+      pushed on a stack).
+
+      (2) Local Queue. The local queue of the task which is accessed
+      reasonably efficiently by the task. Other tasks can steal from
+      it when they run out of work. Throughout the marking phase, a
+      task attempts to keep its local queue short but not totally
+      empty, so that entries are available for stealing by other
+      tasks. Only when there is no more work, a task will totally
+      drain its local queue.
+
+      (3) Global Mark Stack. This handles local queue overflow. During
+      marking only sets of entries are moved between it and the local
+      queues, as access to it requires a mutex and more fine-grain
+      interaction with it which might cause contention. If it
+      overflows, then the marking phase should restart and iterate
+      over the bitmap to identify gray objects. Throughout the marking
+      phase, tasks attempt to keep the global mark stack at a small
+      length but not totally empty, so that entries are available for
+      popping by other tasks. Only when there is no more work, tasks
+      will totally drain the global mark stack.
+
+      (4) Global Region Stack. Entries on it correspond to areas of
+      the bitmap that need to be scanned since they contain gray
+      objects. Pushes on the region stack only happen during
+      evacuation pauses and typically correspond to areas covered by
+      GC LABS. If it overflows, then the marking phase should restart
+      and iterate over the bitmap to identify gray objects. Tasks will
+      try to totally drain the region stack as soon as possible.
+
+      (5) SATB Buffer Queue. This is where completed SATB buffers are
+      made available. Buffers are regularly removed from this queue
+      and scanned for roots, so that the queue doesn't get too
+      long. During remark, all completed buffers are processed, as
+      well as the filled in parts of any uncompleted buffers.
+
+    The do_marking_step() method tries to abort when the time target
+    has been reached. There are a few other cases when the
+    do_marking_step() method also aborts:
+
+      (1) When the marking phase has been aborted (after a Full GC).
+
+      (2) When a global overflow (either on the global stack or the
+      region stack) has been triggered. Before the task aborts, it
+      will actually sync up with the other tasks to ensure that all
+      the marking data structures (local queues, stacks, fingers etc.)
+      are re-initialised so that when do_marking_step() completes,
+      the marking phase can immediately restart.
+
+      (3) When enough completed SATB buffers are available. The
+      do_marking_step() method only tries to drain SATB buffers right
+      at the beginning. So, if enough buffers are available, the
+      marking step aborts and the SATB buffers are processed at
+      the beginning of the next invocation.
+
+      (4) To yield. when we have to yield then we abort and yield
+      right at the end of do_marking_step(). This saves us from a lot
+      of hassle as, by yielding we might allow a Full GC. If this
+      happens then objects will be compacted underneath our feet, the
+      heap might shrink, etc. We save checking for this by just
+      aborting and doing the yield right at the end.
+
+    From the above it follows that the do_marking_step() method should
+    be called in a loop (or, otherwise, regularly) until it completes.
+
+    If a marking step completes without its has_aborted() flag being
+    true, it means it has completed the current marking phase (and
+    also all other marking tasks have done so and have all synced up).
+
+    A method called regular_clock_call() is invoked "regularly" (in
+    sub ms intervals) throughout marking. It is this clock method that
+    checks all the abort conditions which were mentioned above and
+    decides when the task should abort. A work-based scheme is used to
+    trigger this clock method: when the number of object words the
+    marking phase has scanned or the number of references the marking
+    phase has visited reach a given limit. Additional invocations to
+    the method clock have been planted in a few other strategic places
+    too. The initial reason for the clock method was to avoid calling
+    vtime too regularly, as it is quite expensive. So, once it was in
+    place, it was natural to piggy-back all the other conditions on it
+    too and not constantly check them throughout the code.
+
+ *****************************************************************************/
+
+void CMTask::do_marking_step(double time_target_ms) {
+  guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
+  guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
+
+  guarantee( concurrent() || _cm->region_stack_empty(),
+             "the region stack should have been cleared before remark" );
+  guarantee( _region_finger == NULL,
+             "this should be non-null only when a region is being scanned" );
+
+  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
+  guarantee( _task_queues != NULL, "invariant" );
+  guarantee( _task_queue != NULL,  "invariant" );
+  guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
+
+  guarantee( !_claimed,
+             "only one thread should claim this task at any one time" );
+
+  // OK, this doesn't safeguard again all possible scenarios, as it is
+  // possible for two threads to set the _claimed flag at the same
+  // time. But it is only for debugging purposes anyway and it will
+  // catch most problems.
+  _claimed = true;
+
+  _start_time_ms = os::elapsedVTime() * 1000.0;
+  statsOnly( _interval_start_time_ms = _start_time_ms );
+
+  double diff_prediction_ms =
+    g1_policy->get_new_prediction(&_marking_step_diffs_ms);
+  _time_target_ms = time_target_ms - diff_prediction_ms;
+
+  // set up the variables that are used in the work-based scheme to
+  // call the regular clock method
+  _words_scanned = 0;
+  _refs_reached  = 0;
+  recalculate_limits();
+
+  // clear all flags
+  clear_has_aborted();
+  _has_aborted_timed_out = false;
+  _draining_satb_buffers = false;
+
+  ++_calls;
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
+                           "target = %1.2lfms >>>>>>>>>>",
+                           _task_id, _calls, _time_target_ms);
+
+  // Set up the bitmap and oop closures. Anything that uses them is
+  // eventually called from this method, so it is OK to allocate these
+  // statically.
+  CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
+  CMOopClosure    oop_closure(_g1h, _cm, this);
+  set_oop_closure(&oop_closure);
+
+  if (_cm->has_overflown()) {
+    // This can happen if the region stack or the mark stack overflows
+    // during a GC pause and this task, after a yield point,
+    // restarts. We have to abort as we need to get into the overflow
+    // protocol which happens right at the end of this task.
+    set_has_aborted();
+  }
+
+  // First drain any available SATB buffers. After this, we will not
+  // look at SATB buffers before the next invocation of this method.
+  // If enough completed SATB buffers are queued up, the regular clock
+  // will abort this task so that it restarts.
+  drain_satb_buffers();
+  // ...then partially drain the local queue and the global stack
+  drain_local_queue(true);
+  drain_global_stack(true);
+
+  // Then totally drain the region stack.  We will not look at
+  // it again before the next invocation of this method. Entries on
+  // the region stack are only added during evacuation pauses, for
+  // which we have to yield. When we do, we abort the task anyway so
+  // it will look at the region stack again when it restarts.
+  bitmap_closure.set_scanning_heap_region(false);
+  drain_region_stack(&bitmap_closure);
+  // ...then partially drain the local queue and the global stack
+  drain_local_queue(true);
+  drain_global_stack(true);
+
+  do {
+    if (!has_aborted() && _curr_region != NULL) {
+      // This means that we're already holding on to a region.
+      tmp_guarantee_CM( _finger != NULL,
+                        "if region is not NULL, then the finger "
+                        "should not be NULL either" );
+
+      // We might have restarted this task after an evacuation pause
+      // which might have evacuated the region we're holding on to
+      // underneath our feet. Let's read its limit again to make sure
+      // that we do not iterate over a region of the heap that
+      // contains garbage (update_region_limit() will also move
+      // _finger to the start of the region if it is found empty).
+      update_region_limit();
+      // We will start from _finger not from the start of the region,
+      // as we might be restarting this task after aborting half-way
+      // through scanning this region. In this case, _finger points to
+      // the address where we last found a marked object. If this is a
+      // fresh region, _finger points to start().
+      MemRegion mr = MemRegion(_finger, _region_limit);
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] we're scanning part "
+                               "["PTR_FORMAT", "PTR_FORMAT") "
+                               "of region "PTR_FORMAT,
+                               _task_id, _finger, _region_limit, _curr_region);
+
+      // Let's iterate over the bitmap of the part of the
+      // region that is left.
+      bitmap_closure.set_scanning_heap_region(true);
+      if (mr.is_empty() ||
+          _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
+        // We successfully completed iterating over the region. Now,
+        // let's give up the region.
+        giveup_current_region();
+        regular_clock_call();
+      } else {
+        guarantee( has_aborted(), "currently the only way to do so" );
+        // The only way to abort the bitmap iteration is to return
+        // false from the do_bit() method. However, inside the
+        // do_bit() method we move the _finger to point to the
+        // object currently being looked at. So, if we bail out, we
+        // have definitely set _finger to something non-null.
+        guarantee( _finger != NULL, "invariant" );
+
+        // Region iteration was actually aborted. So now _finger
+        // points to the address of the object we last scanned. If we
+        // leave it there, when we restart this task, we will rescan
+        // the object. It is easy to avoid this. We move the finger by
+        // enough to point to the next possible object header (the
+        // bitmap knows by how much we need to move it as it knows its
+        // granularity).
+        move_finger_to(_nextMarkBitMap->nextWord(_finger));
+      }
+    }
+    // At this point we have either completed iterating over the
+    // region we were holding on to, or we have aborted.
+
+    // We then partially drain the local queue and the global stack.
+    // (Do we really need this?)
+    drain_local_queue(true);
+    drain_global_stack(true);
+
+    // Read the note on the claim_region() method on why it might
+    // return NULL with potentially more regions available for
+    // claiming and why we have to check out_of_regions() to determine
+    // whether we're done or not.
+    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
+      // We are going to try to claim a new region. We should have
+      // given up on the previous one.
+      tmp_guarantee_CM( _curr_region  == NULL &&
+                        _finger       == NULL &&
+                        _region_limit == NULL, "invariant" );
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
+      HeapRegion* claimed_region = _cm->claim_region(_task_id);
+      if (claimed_region != NULL) {
+        // Yes, we managed to claim one
+        statsOnly( ++_regions_claimed );
+
+        if (_cm->verbose_low())
+          gclog_or_tty->print_cr("[%d] we successfully claimed "
+                                 "region "PTR_FORMAT,
+                                 _task_id, claimed_region);
+
+        setup_for_region(claimed_region);
+        tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
+      }
+      // It is important to call the regular clock here. It might take
+      // a while to claim a region if, for example, we hit a large
+      // block of empty regions. So we need to call the regular clock
+      // method once round the loop to make sure it's called
+      // frequently enough.
+      regular_clock_call();
+    }
+
+    if (!has_aborted() && _curr_region == NULL) {
+      tmp_guarantee_CM( _cm->out_of_regions(),
+                        "at this point we should be out of regions" );
+    }
+  } while ( _curr_region != NULL && !has_aborted());
+
+  if (!has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently. We also cannot
+    // check if the region stack is empty because if a thread is aborting
+    // it can push a partially done region back.
+    tmp_guarantee_CM( _cm->out_of_regions(),
+                      "at this point we should be out of regions" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
+
+    // Try to reduce the number of available SATB buffers so that
+    // remark has less work to do.
+    drain_satb_buffers();
+  }
+
+  // Since we've done everything else, we can now totally drain the
+  // local queue and global stack.
+  drain_local_queue(false);
+  drain_global_stack(false);
+
+  // Attempt at work stealing from other task's queues.
+  if (!has_aborted()) {
+    // We have not aborted. This means that we have finished all that
+    // we could. Let's try to do some stealing...
+
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently. We also cannot
+    // check if the region stack is empty because if a thread is aborting
+    // it can push a partially done region back.
+    guarantee( _cm->out_of_regions() &&
+               _task_queue->size() == 0, "only way to reach here" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
+
+    while (!has_aborted()) {
+      oop obj;
+      statsOnly( ++_steal_attempts );
+
+      if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
+        if (_cm->verbose_medium())
+          gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
+                                 _task_id, (void*) obj);
+
+        statsOnly( ++_steals );
+
+        tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
+                          "any stolen object should be marked" );
+        scan_object(obj);
+
+        // And since we're towards the end, let's totally drain the
+        // local queue and global stack.
+        drain_local_queue(false);
+        drain_global_stack(false);
+      } else {
+        break;
+      }
+    }
+  }
+
+  // We still haven't aborted. Now, let's try to get into the
+  // termination protocol.
+  if (!has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be concurrently pushing objects on it. We also cannot
+    // check if the region stack is empty because if a thread is aborting
+    // it can push a partially done region back.
+    guarantee( _cm->out_of_regions() &&
+               _task_queue->size() == 0, "only way to reach here" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
+
+    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
+    // The CMTask class also extends the TerminatorTerminator class,
+    // hence its should_exit_termination() method will also decide
+    // whether to exit the termination protocol or not.
+    bool finished = _cm->terminator()->offer_termination(this);
+    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
+    _termination_time_ms +=
+      termination_end_time_ms - _termination_start_time_ms;
+
+    if (finished) {
+      // We're all done.
+
+      if (_task_id == 0) {
+        // let's allow task 0 to do this
+        if (concurrent()) {
+          guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
+          // we need to set this to false before the next
+          // safepoint. This way we ensure that the marking phase
+          // doesn't observe any more heap expansions.
+          _cm->clear_concurrent_marking_in_progress();
+        }
+      }
+
+      // We can now guarantee that the global stack is empty, since
+      // all other tasks have finished.
+      guarantee( _cm->out_of_regions() &&
+                 _cm->region_stack_empty() &&
+                 _cm->mark_stack_empty() &&
+                 _task_queue->size() == 0 &&
+                 !_cm->has_overflown() &&
+                 !_cm->mark_stack_overflow() &&
+                 !_cm->region_stack_overflow(),
+                 "only way to reach here" );
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
+    } else {
+      // Apparently there's more work to do. Let's abort this task. It
+      // will restart it and we can hopefully find more things to do.
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
+
+      set_has_aborted();
+      statsOnly( ++_aborted_termination );
+    }
+  }
+
+  // Mainly for debugging purposes to make sure that a pointer to the
+  // closure which was statically allocated in this frame doesn't
+  // escape it by accident.
+  set_oop_closure(NULL);
+  double end_time_ms = os::elapsedVTime() * 1000.0;
+  double elapsed_time_ms = end_time_ms - _start_time_ms;
+  // Update the step history.
+  _step_times_ms.add(elapsed_time_ms);
+
+  if (has_aborted()) {
+    // The task was aborted for some reason.
+
+    statsOnly( ++_aborted );
+
+    if (_has_aborted_timed_out) {
+      double diff_ms = elapsed_time_ms - _time_target_ms;
+      // Keep statistics of how well we did with respect to hitting
+      // our target only if we actually timed out (if we aborted for
+      // other reasons, then the results might get skewed).
+      _marking_step_diffs_ms.add(diff_ms);
+    }
+
+    if (_cm->has_overflown()) {
+      // This is the interesting one. We aborted because a global
+      // overflow was raised. This means we have to restart the
+      // marking phase and start iterating over regions. However, in
+      // order to do this we have to make sure that all tasks stop
+      // what they are doing and re-initialise in a safe manner. We
+      // will achieve this with the use of two barrier sync points.
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
+
+      _cm->enter_first_sync_barrier(_task_id);
+      // When we exit this sync barrier we know that all tasks have
+      // stopped doing marking work. So, it's now safe to
+      // re-initialise our data structures. At the end of this method,
+      // task 0 will clear the global data structures.
+
+      statsOnly( ++_aborted_overflow );
+
+      // We clear the local state of this task...
+      clear_region_fields();
+
+      // ...and enter the second barrier.
+      _cm->enter_second_sync_barrier(_task_id);
+      // At this point everything has bee re-initialised and we're
+      // ready to restart.
+    }
+
+    if (_cm->verbose_low()) {
+      gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
+                             "elapsed = %1.2lfms <<<<<<<<<<",
+                             _task_id, _time_target_ms, elapsed_time_ms);
+      if (_cm->has_aborted())
+        gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
+                               _task_id);
+    }
+  } else {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
+                             "elapsed = %1.2lfms <<<<<<<<<<",
+                             _task_id, _time_target_ms, elapsed_time_ms);
+  }
+
+  _claimed = false;
+}
+
+CMTask::CMTask(int task_id,
+               ConcurrentMark* cm,
+               CMTaskQueue* task_queue,
+               CMTaskQueueSet* task_queues)
+  : _g1h(G1CollectedHeap::heap()),
+    _co_tracker(G1CMGroup),
+    _task_id(task_id), _cm(cm),
+    _claimed(false),
+    _nextMarkBitMap(NULL), _hash_seed(17),
+    _task_queue(task_queue),
+    _task_queues(task_queues),
+    _oop_closure(NULL) {
+  guarantee( task_queue != NULL, "invariant" );
+  guarantee( task_queues != NULL, "invariant" );
+
+  statsOnly( _clock_due_to_scanning = 0;
+             _clock_due_to_marking  = 0 );
+
+  _marking_step_diffs_ms.add(0.5);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,1049 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectedHeap;
+class CMTask;
+typedef GenericTaskQueue<oop> CMTaskQueue;
+typedef GenericTaskQueueSet<oop> CMTaskQueueSet;
+
+// A generic CM bit map.  This is essentially a wrapper around the BitMap
+// class, with one bit per (1<<_shifter) HeapWords.
+
+class CMBitMapRO {
+ protected:
+  HeapWord* _bmStartWord;      // base address of range covered by map
+  size_t    _bmWordSize;       // map size (in #HeapWords covered)
+  const int _shifter;          // map to char or bit
+  VirtualSpace _virtual_space; // underlying the bit map
+  BitMap    _bm;               // the bit map itself
+
+ public:
+  // constructor
+  CMBitMapRO(ReservedSpace rs, int shifter);
+
+  enum { do_yield = true };
+
+  // inquiries
+  HeapWord* startWord()   const { return _bmStartWord; }
+  size_t    sizeInWords() const { return _bmWordSize;  }
+  // the following is one past the last word in space
+  HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
+
+  // read marks
+
+  bool isMarked(HeapWord* addr) const {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.at(heapWordToOffset(addr));
+  }
+
+  // iteration
+  bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
+  bool iterate(BitMapClosure* cl, MemRegion mr);
+
+  // Return the address corresponding to the next marked bit at or after
+  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
+  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
+  HeapWord* getNextMarkedWordAddress(HeapWord* addr,
+                                     HeapWord* limit = NULL) const;
+  // Return the address corresponding to the next unmarked bit at or after
+  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
+  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
+  HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
+                                       HeapWord* limit = NULL) const;
+
+  // conversion utilities
+  // XXX Fix these so that offsets are size_t's...
+  HeapWord* offsetToHeapWord(size_t offset) const {
+    return _bmStartWord + (offset << _shifter);
+  }
+  size_t heapWordToOffset(HeapWord* addr) const {
+    return pointer_delta(addr, _bmStartWord) >> _shifter;
+  }
+  int heapWordDiffToOffsetDiff(size_t diff) const;
+  HeapWord* nextWord(HeapWord* addr) {
+    return offsetToHeapWord(heapWordToOffset(addr) + 1);
+  }
+
+  void mostly_disjoint_range_union(BitMap*   from_bitmap,
+                                   size_t    from_start_index,
+                                   HeapWord* to_start_word,
+                                   size_t    word_num);
+
+  // debugging
+  NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
+};
+
+class CMBitMap : public CMBitMapRO {
+
+ public:
+  // constructor
+  CMBitMap(ReservedSpace rs, int shifter) :
+    CMBitMapRO(rs, shifter) {}
+
+  // write marks
+  void mark(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    _bm.at_put(heapWordToOffset(addr), true);
+  }
+  void clear(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    _bm.at_put(heapWordToOffset(addr), false);
+  }
+  bool parMark(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.par_at_put(heapWordToOffset(addr), true);
+  }
+  bool parClear(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.par_at_put(heapWordToOffset(addr), false);
+  }
+  void markRange(MemRegion mr);
+  void clearAll();
+  void clearRange(MemRegion mr);
+
+  // Starting at the bit corresponding to "addr" (inclusive), find the next
+  // "1" bit, if any.  This bit starts some run of consecutive "1"'s; find
+  // the end of this run (stopping at "end_addr").  Return the MemRegion
+  // covering from the start of the region corresponding to the first bit
+  // of the run to the end of the region corresponding to the last bit of
+  // the run.  If there is no "1" bit at or after "addr", return an empty
+  // MemRegion.
+  MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
+};
+
+// Represents a marking stack used by the CM collector.
+// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
+class CMMarkStack {
+  ConcurrentMark* _cm;
+  oop*   _base;      // bottom of stack
+  jint   _index;     // one more than last occupied index
+  jint   _capacity;  // max #elements
+  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
+
+  bool   _overflow;
+  DEBUG_ONLY(bool _drain_in_progress;)
+  DEBUG_ONLY(bool _drain_in_progress_yields;)
+
+ public:
+  CMMarkStack(ConcurrentMark* cm);
+  ~CMMarkStack();
+
+  void allocate(size_t size);
+
+  oop pop() {
+    if (!isEmpty()) {
+      return _base[--_index] ;
+    }
+    return NULL;
+  }
+
+  // If overflow happens, don't do the push, and record the overflow.
+  // *Requires* that "ptr" is already marked.
+  void push(oop ptr) {
+    if (isFull()) {
+      // Record overflow.
+      _overflow = true;
+      return;
+    } else {
+      _base[_index++] = ptr;
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
+    }
+  }
+  // Non-block impl.  Note: concurrency is allowed only with other
+  // "par_push" operations, not with "pop" or "drain".  We would need
+  // parallel versions of them if such concurrency was desired.
+  void par_push(oop ptr);
+
+  // Pushes the first "n" elements of "ptr_arr" on the stack.
+  // Non-block impl.  Note: concurrency is allowed only with other
+  // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
+  void par_adjoin_arr(oop* ptr_arr, int n);
+
+  // Pushes the first "n" elements of "ptr_arr" on the stack.
+  // Locking impl: concurrency is allowed only with
+  // "par_push_arr" and/or "par_pop_arr" operations, which use the same
+  // locking strategy.
+  void par_push_arr(oop* ptr_arr, int n);
+
+  // If returns false, the array was empty.  Otherwise, removes up to "max"
+  // elements from the stack, and transfers them to "ptr_arr" in an
+  // unspecified order.  The actual number transferred is given in "n" ("n
+  // == 0" is deliberately redundant with the return value.)  Locking impl:
+  // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
+  // operations, which use the same locking strategy.
+  bool par_pop_arr(oop* ptr_arr, int max, int* n);
+
+  // Drain the mark stack, applying the given closure to all fields of
+  // objects on the stack.  (That is, continue until the stack is empty,
+  // even if closure applications add entries to the stack.)  The "bm"
+  // argument, if non-null, may be used to verify that only marked objects
+  // are on the mark stack.  If "yield_after" is "true", then the
+  // concurrent marker performing the drain offers to yield after
+  // processing each object.  If a yield occurs, stops the drain operation
+  // and returns false.  Otherwise, returns true.
+  template<class OopClosureClass>
+  bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
+
+  bool isEmpty()    { return _index == 0; }
+  bool isFull()     { return _index == _capacity; }
+  int maxElems()    { return _capacity; }
+
+  bool overflow() { return _overflow; }
+  void clear_overflow() { _overflow = false; }
+
+  int  size() { return _index; }
+
+  void setEmpty()   { _index = 0; clear_overflow(); }
+
+  // Record the current size; a subsequent "oops_do" will iterate only over
+  // indices valid at the time of this call.
+  void set_oops_do_bound(jint bound = -1) {
+    if (bound == -1) {
+      _oops_do_bound = _index;
+    } else {
+      _oops_do_bound = bound;
+    }
+  }
+  jint oops_do_bound() { return _oops_do_bound; }
+  // iterate over the oops in the mark stack, up to the bound recorded via
+  // the call above.
+  void oops_do(OopClosure* f);
+};
+
+class CMRegionStack {
+  MemRegion* _base;
+  jint _capacity;
+  jint _index;
+  jint _oops_do_bound;
+  bool _overflow;
+public:
+  CMRegionStack();
+  ~CMRegionStack();
+  void allocate(size_t size);
+
+  // This is lock-free; assumes that it will only be called in parallel
+  // with other "push" operations (no pops).
+  void push(MemRegion mr);
+
+  // Lock-free; assumes that it will only be called in parallel
+  // with other "pop" operations (no pushes).
+  MemRegion pop();
+
+  bool isEmpty()    { return _index == 0; }
+  bool isFull()     { return _index == _capacity; }
+
+  bool overflow() { return _overflow; }
+  void clear_overflow() { _overflow = false; }
+
+  int  size() { return _index; }
+
+  // It iterates over the entries in the region stack and it
+  // invalidates (i.e. assigns MemRegion()) the ones that point to
+  // regions in the collection set.
+  bool invalidate_entries_into_cset();
+
+  // This gives an upper bound up to which the iteration in
+  // invalidate_entries_into_cset() will reach. This prevents
+  // newly-added entries to be unnecessarily scanned.
+  void set_oops_do_bound() {
+    _oops_do_bound = _index;
+  }
+
+  void setEmpty()   { _index = 0; clear_overflow(); }
+};
+
+// this will enable a variety of different statistics per GC task
+#define _MARKING_STATS_       0
+// this will enable the higher verbose levels
+#define _MARKING_VERBOSE_     0
+
+#if _MARKING_STATS_
+#define statsOnly(statement)  \
+do {                          \
+  statement ;                 \
+} while (0)
+#else // _MARKING_STATS_
+#define statsOnly(statement)  \
+do {                          \
+} while (0)
+#endif // _MARKING_STATS_
+
+// Some extra guarantees that I like to also enable in optimised mode
+// when debugging. If you want to enable them, comment out the assert
+// macro and uncomment out the guaratee macro
+// #define tmp_guarantee_CM(expr, str) guarantee(expr, str)
+#define tmp_guarantee_CM(expr, str) assert(expr, str)
+
+typedef enum {
+  no_verbose  = 0,   // verbose turned off
+  stats_verbose,     // only prints stats at the end of marking
+  low_verbose,       // low verbose, mostly per region and per major event
+  medium_verbose,    // a bit more detailed than low
+  high_verbose       // per object verbose
+} CMVerboseLevel;
+
+
+class ConcurrentMarkThread;
+
+class ConcurrentMark {
+  friend class ConcurrentMarkThread;
+  friend class CMTask;
+  friend class CMBitMapClosure;
+  friend class CSMarkOopClosure;
+  friend class CMGlobalObjectClosure;
+  friend class CMRemarkTask;
+  friend class CMConcurrentMarkingTask;
+  friend class G1ParNoteEndTask;
+  friend class CalcLiveObjectsClosure;
+
+protected:
+  ConcurrentMarkThread* _cmThread;   // the thread doing the work
+  G1CollectedHeap*      _g1h;        // the heap.
+  size_t                _parallel_marking_threads; // the number of marking
+                                                   // threads we'll use
+  double                _sleep_factor; // how much we have to sleep, with
+                                       // respect to the work we just did, to
+                                       // meet the marking overhead goal
+  double                _marking_task_overhead; // marking target overhead for
+                                                // a single task
+
+  // same as the two above, but for the cleanup task
+  double                _cleanup_sleep_factor;
+  double                _cleanup_task_overhead;
+
+  // Stuff related to age cohort processing.
+  struct ParCleanupThreadState {
+    char _pre[64];
+    UncleanRegionList list;
+    char _post[64];
+  };
+  ParCleanupThreadState** _par_cleanup_thread_state;
+
+  // CMS marking support structures
+  CMBitMap                _markBitMap1;
+  CMBitMap                _markBitMap2;
+  CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
+  CMBitMap*               _nextMarkBitMap; // under-construction mark bitmap
+  bool                    _at_least_one_mark_complete;
+
+  BitMap                  _region_bm;
+  BitMap                  _card_bm;
+
+  // Heap bounds
+  HeapWord*               _heap_start;
+  HeapWord*               _heap_end;
+
+  // For gray objects
+  CMMarkStack             _markStack; // Grey objects behind global finger.
+  CMRegionStack           _regionStack; // Grey regions behind global finger.
+  HeapWord* volatile      _finger;  // the global finger, region aligned,
+                                    // always points to the end of the
+                                    // last claimed region
+
+  // marking tasks
+  size_t                  _max_task_num; // maximum task number
+  size_t                  _active_tasks; // task num currently active
+  CMTask**                _tasks;        // task queue array (max_task_num len)
+  CMTaskQueueSet*         _task_queues;  // task queue set
+  ParallelTaskTerminator  _terminator;   // for termination
+
+  // Two sync barriers that are used to synchronise tasks when an
+  // overflow occurs. The algorithm is the following. All tasks enter
+  // the first one to ensure that they have all stopped manipulating
+  // the global data structures. After they exit it, they re-initialise
+  // their data structures and task 0 re-initialises the global data
+  // structures. Then, they enter the second sync barrier. This
+  // ensure, that no task starts doing work before all data
+  // structures (local and global) have been re-initialised. When they
+  // exit it, they are free to start working again.
+  WorkGangBarrierSync     _first_overflow_barrier_sync;
+  WorkGangBarrierSync     _second_overflow_barrier_sync;
+
+
+  // this is set by any task, when an overflow on the global data
+  // structures is detected.
+  volatile bool           _has_overflown;
+  // true: marking is concurrent, false: we're in remark
+  volatile bool           _concurrent;
+  // set at the end of a Full GC so that marking aborts
+  volatile bool           _has_aborted;
+  // used when remark aborts due to an overflow to indicate that
+  // another concurrent marking phase should start
+  volatile bool           _restart_for_overflow;
+
+  // This is true from the very start of concurrent marking until the
+  // point when all the tasks complete their work. It is really used
+  // to determine the points between the end of concurrent marking and
+  // time of remark.
+  volatile bool           _concurrent_marking_in_progress;
+
+  // verbose level
+  CMVerboseLevel          _verbose_level;
+
+  COTracker               _cleanup_co_tracker;
+
+  // These two fields are used to implement the optimisation that
+  // avoids pushing objects on the global/region stack if there are
+  // no collection set regions above the lowest finger.
+
+  // This is the lowest finger (among the global and local fingers),
+  // which is calculated before a new collection set is chosen.
+  HeapWord* _min_finger;
+  // If this flag is true, objects/regions that are marked below the
+  // finger should be pushed on the stack(s). If this is flag is
+  // false, it is safe not to push them on the stack(s).
+  bool      _should_gray_objects;
+
+  // All of these times are in ms.
+  NumberSeq _init_times;
+  NumberSeq _remark_times;
+  NumberSeq   _remark_mark_times;
+  NumberSeq   _remark_weak_ref_times;
+  NumberSeq _cleanup_times;
+  double    _total_counting_time;
+  double    _total_rs_scrub_time;
+
+  double*   _accum_task_vtime;   // accumulated task vtime
+
+  WorkGang* _parallel_workers;
+
+  void weakRefsWork(bool clear_all_soft_refs);
+
+  void swapMarkBitMaps();
+
+  // It resets the global marking data structures, as well as the
+  // task local ones; should be called during initial mark.
+  void reset();
+  // It resets all the marking data structures.
+  void clear_marking_state();
+
+  // It should be called to indicate which phase we're in (concurrent
+  // mark or remark) and how many threads are currently active.
+  void set_phase(size_t active_tasks, bool concurrent);
+  // We do this after we're done with marking so that the marking data
+  // structures are initialised to a sensible and predictable state.
+  void set_non_marking_state();
+
+  // prints all gathered CM-related statistics
+  void print_stats();
+
+  // accessor methods
+  size_t parallel_marking_threads() { return _parallel_marking_threads; }
+  double sleep_factor()             { return _sleep_factor; }
+  double marking_task_overhead()    { return _marking_task_overhead;}
+  double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
+  double cleanup_task_overhead()    { return _cleanup_task_overhead;}
+
+  HeapWord*               finger()        { return _finger;   }
+  bool                    concurrent()    { return _concurrent; }
+  size_t                  active_tasks()  { return _active_tasks; }
+  ParallelTaskTerminator* terminator()    { return &_terminator; }
+
+  // It claims the next available region to be scanned by a marking
+  // task. It might return NULL if the next region is empty or we have
+  // run out of regions. In the latter case, out_of_regions()
+  // determines whether we've really run out of regions or the task
+  // should call claim_region() again.  This might seem a bit
+  // awkward. Originally, the code was written so that claim_region()
+  // either successfully returned with a non-empty region or there
+  // were no more regions to be claimed. The problem with this was
+  // that, in certain circumstances, it iterated over large chunks of
+  // the heap finding only empty regions and, while it was working, it
+  // was preventing the calling task to call its regular clock
+  // method. So, this way, each task will spend very little time in
+  // claim_region() and is allowed to call the regular clock method
+  // frequently.
+  HeapRegion* claim_region(int task);
+
+  // It determines whether we've run out of regions to scan.
+  bool        out_of_regions() { return _finger == _heap_end; }
+
+  // Returns the task with the given id
+  CMTask* task(int id) {
+    guarantee( 0 <= id && id < (int) _active_tasks, "task id not within "
+               "active bounds" );
+    return _tasks[id];
+  }
+
+  // Returns the task queue with the given id
+  CMTaskQueue* task_queue(int id) {
+    guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within "
+               "active bounds" );
+    return (CMTaskQueue*) _task_queues->queue(id);
+  }
+
+  // Returns the task queue set
+  CMTaskQueueSet* task_queues()  { return _task_queues; }
+
+  // Access / manipulation of the overflow flag which is set to
+  // indicate that the global stack or region stack has overflown
+  bool has_overflown()           { return _has_overflown; }
+  void set_has_overflown()       { _has_overflown = true; }
+  void clear_has_overflown()     { _has_overflown = false; }
+
+  bool has_aborted()             { return _has_aborted; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
+
+  // Methods to enter the two overflow sync barriers
+  void enter_first_sync_barrier(int task_num);
+  void enter_second_sync_barrier(int task_num);
+
+public:
+  // Manipulation of the global mark stack.
+  // Notice that the first mark_stack_push is CAS-based, whereas the
+  // two below are Mutex-based. This is OK since the first one is only
+  // called during evacuation pauses and doesn't compete with the
+  // other two (which are called by the marking tasks during
+  // concurrent marking or remark).
+  bool mark_stack_push(oop p) {
+    _markStack.par_push(p);
+    if (_markStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  bool mark_stack_push(oop* arr, int n) {
+    _markStack.par_push_arr(arr, n);
+    if (_markStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  void mark_stack_pop(oop* arr, int max, int* n) {
+    _markStack.par_pop_arr(arr, max, n);
+  }
+  size_t mark_stack_size()              { return _markStack.size(); }
+  size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
+  bool mark_stack_overflow()            { return _markStack.overflow(); }
+  bool mark_stack_empty()               { return _markStack.isEmpty(); }
+
+  // Manipulation of the region stack
+  bool region_stack_push(MemRegion mr) {
+    _regionStack.push(mr);
+    if (_regionStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  MemRegion region_stack_pop()          { return _regionStack.pop(); }
+  int region_stack_size()               { return _regionStack.size(); }
+  bool region_stack_overflow()          { return _regionStack.overflow(); }
+  bool region_stack_empty()             { return _regionStack.isEmpty(); }
+
+  bool concurrent_marking_in_progress() {
+    return _concurrent_marking_in_progress;
+  }
+  void set_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = true;
+  }
+  void clear_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = false;
+  }
+
+  void update_accum_task_vtime(int i, double vtime) {
+    _accum_task_vtime[i] += vtime;
+  }
+
+  double all_task_accum_vtime() {
+    double ret = 0.0;
+    for (int i = 0; i < (int)_max_task_num; ++i)
+      ret += _accum_task_vtime[i];
+    return ret;
+  }
+
+  // Attempts to steal an object from the task queues of other tasks
+  bool try_stealing(int task_num, int* hash_seed, oop& obj) {
+    return _task_queues->steal(task_num, hash_seed, obj);
+  }
+
+  // It grays an object by first marking it. Then, if it's behind the
+  // global finger, it also pushes it on the global stack.
+  void deal_with_reference(oop obj);
+
+  ConcurrentMark(ReservedSpace rs, int max_regions);
+  ~ConcurrentMark();
+  ConcurrentMarkThread* cmThread() { return _cmThread; }
+
+  CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
+  CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
+
+  // The following three are interaction between CM and
+  // G1CollectedHeap
+
+  // This notifies CM that a root during initial-mark needs to be
+  // grayed and it's MT-safe. Currently, we just mark it. But, in the
+  // future, we can experiment with pushing it on the stack and we can
+  // do this without changing G1CollectedHeap.
+  void grayRoot(oop p);
+  // It's used during evacuation pauses to gray a region, if
+  // necessary, and it's MT-safe. It assumes that the caller has
+  // marked any objects on that region. If _should_gray_objects is
+  // true and we're still doing concurrent marking, the region is
+  // pushed on the region stack, if it is located below the global
+  // finger, otherwise we do nothing.
+  void grayRegionIfNecessary(MemRegion mr);
+  // It's used during evacuation pauses to mark and, if necessary,
+  // gray a single object and it's MT-safe. It assumes the caller did
+  // not mark the object. If _should_gray_objects is true and we're
+  // still doing concurrent marking, the objects is pushed on the
+  // global stack, if it is located below the global finger, otherwise
+  // we do nothing.
+  void markAndGrayObjectIfNecessary(oop p);
+
+  // This iterates over the bitmap of the previous marking and prints
+  // out all objects that are marked on the bitmap and indicates
+  // whether what they point to is also marked or not.
+  void print_prev_bitmap_reachable();
+
+  // Clear the next marking bitmap (will be called concurrently).
+  void clearNextBitmap();
+
+  // main CMS steps and related support
+  void checkpointRootsInitial();
+
+  // These two do the work that needs to be done before and after the
+  // initial root checkpoint. Since this checkpoint can be done at two
+  // different points (i.e. an explicit pause or piggy-backed on a
+  // young collection), then it's nice to be able to easily share the
+  // pre/post code. It might be the case that we can put everything in
+  // the post method. TP
+  void checkpointRootsInitialPre();
+  void checkpointRootsInitialPost();
+
+  // Do concurrent phase of marking, to a tentative transitive closure.
+  void markFromRoots();
+
+  // Process all unprocessed SATB buffers. It is called at the
+  // beginning of an evacuation pause.
+  void drainAllSATBBuffers();
+
+  void checkpointRootsFinal(bool clear_all_soft_refs);
+  void checkpointRootsFinalWork();
+  void calcDesiredRegions();
+  void cleanup();
+  void completeCleanup();
+
+  // Mark in the previous bitmap.  NB: this is usually read-only, so use
+  // this carefully!
+  void markPrev(oop p);
+  void clear(oop p);
+  // Clears marks for all objects in the given range, for both prev and
+  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
+  // this carefully!
+  void clearRangeBothMaps(MemRegion mr);
+
+  // Record the current top of the mark and region stacks; a
+  // subsequent oops_do() on the mark stack and
+  // invalidate_entries_into_cset() on the region stack will iterate
+  // only over indices valid at the time of this call.
+  void set_oops_do_bound() {
+    _markStack.set_oops_do_bound();
+    _regionStack.set_oops_do_bound();
+  }
+  // Iterate over the oops in the mark stack and all local queues. It
+  // also calls invalidate_entries_into_cset() on the region stack.
+  void oops_do(OopClosure* f);
+  // It is called at the end of an evacuation pause during marking so
+  // that CM is notified of where the new end of the heap is. It
+  // doesn't do anything if concurrent_marking_in_progress() is false,
+  // unless the force parameter is true.
+  void update_g1_committed(bool force = false);
+
+  void complete_marking_in_collection_set();
+
+  // It indicates that a new collection set is being chosen.
+  void newCSet();
+  // It registers a collection set heap region with CM. This is used
+  // to determine whether any heap regions are located above the finger.
+  void registerCSetRegion(HeapRegion* hr);
+
+  // Returns "true" if at least one mark has been completed.
+  bool at_least_one_mark_complete() { return _at_least_one_mark_complete; }
+
+  bool isMarked(oop p) const {
+    assert(p != NULL && p->is_oop(), "expected an oop");
+    HeapWord* addr = (HeapWord*)p;
+    assert(addr >= _nextMarkBitMap->startWord() ||
+           addr < _nextMarkBitMap->endWord(), "in a region");
+
+    return _nextMarkBitMap->isMarked(addr);
+  }
+
+  inline bool not_yet_marked(oop p) const;
+
+  // XXX Debug code
+  bool containing_card_is_marked(void* p);
+  bool containing_cards_are_marked(void* start, void* last);
+
+  bool isPrevMarked(oop p) const {
+    assert(p != NULL && p->is_oop(), "expected an oop");
+    HeapWord* addr = (HeapWord*)p;
+    assert(addr >= _prevMarkBitMap->startWord() ||
+           addr < _prevMarkBitMap->endWord(), "in a region");
+
+    return _prevMarkBitMap->isMarked(addr);
+  }
+
+  inline bool do_yield_check(int worker_i = 0);
+  inline bool should_yield();
+
+  // Called to abort the marking cycle after a Full GC takes palce.
+  void abort();
+
+  void disable_co_trackers();
+
+  // This prints the global/local fingers. It is used for debugging.
+  NOT_PRODUCT(void print_finger();)
+
+  void print_summary_info();
+
+  // The following indicate whether a given verbose level has been
+  // set. Notice that anything above stats is conditional to
+  // _MARKING_VERBOSE_ having been set to 1
+  bool verbose_stats()
+    { return _verbose_level >= stats_verbose; }
+  bool verbose_low()
+    { return _MARKING_VERBOSE_ && _verbose_level >= low_verbose; }
+  bool verbose_medium()
+    { return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose; }
+  bool verbose_high()
+    { return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; }
+};
+
+// A class representing a marking task.
+class CMTask : public TerminatorTerminator {
+private:
+  enum PrivateConstants {
+    // the regular clock call is called once the scanned words reaches
+    // this limit
+    words_scanned_period          = 12*1024,
+    // the regular clock call is called once the number of visited
+    // references reaches this limit
+    refs_reached_period           = 384,
+    // initial value for the hash seed, used in the work stealing code
+    init_hash_seed                = 17,
+    // how many entries will be transferred between global stack and
+    // local queues
+    global_stack_transfer_size    = 16
+  };
+
+  int                         _task_id;
+  G1CollectedHeap*            _g1h;
+  ConcurrentMark*             _cm;
+  CMBitMap*                   _nextMarkBitMap;
+  // the task queue of this task
+  CMTaskQueue*                _task_queue;
+  // the task queue set---needed for stealing
+  CMTaskQueueSet*             _task_queues;
+  // indicates whether the task has been claimed---this is only  for
+  // debugging purposes
+  bool                        _claimed;
+
+  // number of calls to this task
+  int                         _calls;
+
+  // concurrent overhead over a single CPU for this task
+  COTracker                   _co_tracker;
+
+  // when the virtual timer reaches this time, the marking step should
+  // exit
+  double                      _time_target_ms;
+  // the start time of the current marking step
+  double                      _start_time_ms;
+
+  // the oop closure used for iterations over oops
+  OopClosure*                 _oop_closure;
+
+  // the region this task is scanning, NULL if we're not scanning any
+  HeapRegion*                 _curr_region;
+  // the local finger of this task, NULL if we're not scanning a region
+  HeapWord*                   _finger;
+  // limit of the region this task is scanning, NULL if we're not scanning one
+  HeapWord*                   _region_limit;
+
+  // This is used only when we scan regions popped from the region
+  // stack. It records what the last object on such a region we
+  // scanned was. It is used to ensure that, if we abort region
+  // iteration, we do not rescan the first part of the region. This
+  // should be NULL when we're not scanning a region from the region
+  // stack.
+  HeapWord*                   _region_finger;
+
+  // the number of words this task has scanned
+  size_t                      _words_scanned;
+  // When _words_scanned reaches this limit, the regular clock is
+  // called. Notice that this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _words_scanned_limit;
+  // the initial value of _words_scanned_limit (i.e. what it was
+  // before it was decreased).
+  size_t                      _real_words_scanned_limit;
+
+  // the number of references this task has visited
+  size_t                      _refs_reached;
+  // When _refs_reached reaches this limit, the regular clock is
+  // called. Notice this this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _refs_reached_limit;
+  // the initial value of _refs_reached_limit (i.e. what it was before
+  // it was decreased).
+  size_t                      _real_refs_reached_limit;
+
+  // used by the work stealing stuff
+  int                         _hash_seed;
+  // if this is true, then the task has aborted for some reason
+  bool                        _has_aborted;
+  // set when the task aborts because it has met its time quota
+  bool                        _has_aborted_timed_out;
+  // true when we're draining SATB buffers; this avoids the task
+  // aborting due to SATB buffers being available (as we're already
+  // dealing with them)
+  bool                        _draining_satb_buffers;
+
+  // number sequence of past step times
+  NumberSeq                   _step_times_ms;
+  // elapsed time of this task
+  double                      _elapsed_time_ms;
+  // termination time of this task
+  double                      _termination_time_ms;
+  // when this task got into the termination protocol
+  double                      _termination_start_time_ms;
+
+  // true when the task is during a concurrent phase, false when it is
+  // in the remark phase (so, in the latter case, we do not have to
+  // check all the things that we have to check during the concurrent
+  // phase, i.e. SATB buffer availability...)
+  bool                        _concurrent;
+
+  TruncatedSeq                _marking_step_diffs_ms;
+
+  // LOTS of statistics related with this task
+#if _MARKING_STATS_
+  NumberSeq                   _all_clock_intervals_ms;
+  double                      _interval_start_time_ms;
+
+  int                         _aborted;
+  int                         _aborted_overflow;
+  int                         _aborted_cm_aborted;
+  int                         _aborted_yield;
+  int                         _aborted_timed_out;
+  int                         _aborted_satb;
+  int                         _aborted_termination;
+
+  int                         _steal_attempts;
+  int                         _steals;
+
+  int                         _clock_due_to_marking;
+  int                         _clock_due_to_scanning;
+
+  int                         _local_pushes;
+  int                         _local_pops;
+  int                         _local_max_size;
+  int                         _objs_scanned;
+
+  int                         _global_pushes;
+  int                         _global_pops;
+  int                         _global_max_size;
+
+  int                         _global_transfers_to;
+  int                         _global_transfers_from;
+
+  int                         _region_stack_pops;
+
+  int                         _regions_claimed;
+  int                         _objs_found_on_bitmap;
+
+  int                         _satb_buffers_processed;
+#endif // _MARKING_STATS_
+
+  // it updates the local fields after this task has claimed
+  // a new region to scan
+  void setup_for_region(HeapRegion* hr);
+  // it brings up-to-date the limit of the region
+  void update_region_limit();
+  // it resets the local fields after a task has finished scanning a
+  // region
+  void giveup_current_region();
+
+  // called when either the words scanned or the refs visited limit
+  // has been reached
+  void reached_limit();
+  // recalculates the words scanned and refs visited limits
+  void recalculate_limits();
+  // decreases the words scanned and refs visited limits when we reach
+  // an expensive operation
+  void decrease_limits();
+  // it checks whether the words scanned or refs visited reached their
+  // respective limit and calls reached_limit() if they have
+  void check_limits() {
+    if (_words_scanned >= _words_scanned_limit ||
+        _refs_reached >= _refs_reached_limit)
+      reached_limit();
+  }
+  // this is supposed to be called regularly during a marking step as
+  // it checks a bunch of conditions that might cause the marking step
+  // to abort
+  void regular_clock_call();
+  bool concurrent() { return _concurrent; }
+
+public:
+  // It resets the task; it should be called right at the beginning of
+  // a marking phase.
+  void reset(CMBitMap* _nextMarkBitMap);
+  // it clears all the fields that correspond to a claimed region.
+  void clear_region_fields();
+
+  void set_concurrent(bool concurrent) { _concurrent = concurrent; }
+
+  void enable_co_tracker() {
+    guarantee( !_co_tracker.enabled(), "invariant" );
+    _co_tracker.enable();
+  }
+  void disable_co_tracker() {
+    guarantee( _co_tracker.enabled(), "invariant" );
+    _co_tracker.disable();
+  }
+  bool co_tracker_enabled() {
+    return _co_tracker.enabled();
+  }
+  void reset_co_tracker(double starting_conc_overhead = 0.0) {
+    _co_tracker.reset(starting_conc_overhead);
+  }
+  void start_co_tracker() {
+    _co_tracker.start();
+  }
+  void update_co_tracker(bool force_end = false) {
+    _co_tracker.update(force_end);
+  }
+
+  // The main method of this class which performs a marking step
+  // trying not to exceed the given duration. However, it might exit
+  // prematurely, according to some conditions (i.e. SATB buffers are
+  // available for processing).
+  void do_marking_step(double target_ms);
+
+  // These two calls start and stop the timer
+  void record_start_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0;
+  }
+  void record_end_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
+  }
+
+  // returns the task ID
+  int task_id() { return _task_id; }
+
+  // From TerminatorTerminator. It determines whether this task should
+  // exit the termination protocol after it's entered it.
+  virtual bool should_exit_termination();
+
+  HeapWord* finger()            { return _finger; }
+
+  bool has_aborted()            { return _has_aborted; }
+  void set_has_aborted()        { _has_aborted = true; }
+  void clear_has_aborted()      { _has_aborted = false; }
+  bool claimed() { return _claimed; }
+
+  void set_oop_closure(OopClosure* oop_closure) {
+    _oop_closure = oop_closure;
+  }
+
+  // It grays the object by marking it and, if necessary, pushing it
+  // on the local queue
+  void deal_with_reference(oop obj);
+
+  // It scans an object and visits its children.
+  void scan_object(oop obj) {
+    tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
+                      "invariant" );
+
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
+                             _task_id, (void*) obj);
+
+    size_t obj_size = obj->size();
+    _words_scanned += obj_size;
+
+    obj->oop_iterate(_oop_closure);
+    statsOnly( ++_objs_scanned );
+    check_limits();
+  }
+
+  // It pushes an object on the local queue.
+  void push(oop obj);
+
+  // These two move entries to/from the global stack.
+  void move_entries_to_global_stack();
+  void get_entries_from_global_stack();
+
+  // It pops and scans objects from the local queue. If partially is
+  // true, then it stops when the queue size is of a given limit. If
+  // partially is false, then it stops when the queue is empty.
+  void drain_local_queue(bool partially);
+  // It moves entries from the global stack to the local queue and
+  // drains the local queue. If partially is true, then it stops when
+  // both the global stack and the local queue reach a given size. If
+  // partially if false, it tries to empty them totally.
+  void drain_global_stack(bool partially);
+  // It keeps picking SATB buffers and processing them until no SATB
+  // buffers are available.
+  void drain_satb_buffers();
+  // It keeps popping regions from the region stack and processing
+  // them until the region stack is empty.
+  void drain_region_stack(BitMapClosure* closure);
+
+  // moves the local finger to a new location
+  inline void move_finger_to(HeapWord* new_finger) {
+    tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit,
+                   "invariant" );
+    _finger = new_finger;
+  }
+
+  // moves the region finger to a new location
+  inline void move_region_finger_to(HeapWord* new_finger) {
+    tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" );
+    _region_finger = new_finger;
+  }
+
+  CMTask(int task_num, ConcurrentMark *cm,
+         CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
+
+  // it prints statistics associated with this task
+  void print_stats();
+
+#if _MARKING_STATS_
+  void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
+#endif // _MARKING_STATS_
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentMarkThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+SurrogateLockerThread*
+     ConcurrentMarkThread::_slt = NULL;
+
+ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) :
+  ConcurrentGCThread(),
+  _cm(cm),
+  _started(false),
+  _in_progress(false),
+  _vtime_accum(0.0),
+  _vtime_mark_accum(0.0),
+  _vtime_count_accum(0.0)
+{
+  create_and_start();
+}
+
+class CMCheckpointRootsInitialClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsInitialClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsInitial();
+  }
+};
+
+class CMCheckpointRootsFinalClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsFinalClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsFinal(false); // !clear_all_soft_refs
+  }
+};
+
+class CMCleanUp: public VoidClosure {
+  ConcurrentMark* _cm;
+public:
+
+  CMCleanUp(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->cleanup();
+  }
+};
+
+
+
+void ConcurrentMarkThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1_policy = g1->g1_policy();
+  G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker();
+  Thread *current_thread = Thread::current();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    sleepBeforeNextCycle();
+    {
+      ResourceMark rm;
+      HandleMark   hm;
+      double cycle_start = os::elapsedVTime();
+      double mark_start_sec = os::elapsedTime();
+      char verbose_str[128];
+
+      if (PrintGC) {
+        gclog_or_tty->date_stamp(PrintGCDateStamps);
+        gclog_or_tty->stamp(PrintGCTimeStamps);
+        tty->print_cr("[GC concurrent-mark-start]");
+      }
+
+      if (!g1_policy->in_young_gc_mode()) {
+        // this ensures the flag is not set if we bail out of the marking
+        // cycle; normally the flag is cleared immediately after cleanup
+        g1->set_marking_complete();
+
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double init_prediction_ms = g1_policy->predict_init_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        // We don't have to skip here if we've been asked to restart, because
+        // in the worst case we just enqueue a new VM operation to start a
+        // marking.  Note that the init operation resets has_aborted()
+        CMCheckpointRootsInitialClosure init_cl(_cm);
+        strcpy(verbose_str, "GC initial-mark");
+        VM_CGC_Operation op(&init_cl, verbose_str);
+        VMThread::execute(&op);
+      }
+
+      int iter = 0;
+      do {
+        iter++;
+        if (!cm()->has_aborted()) {
+          _cm->markFromRoots();
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-mark-from-roots");
+        }
+
+        double mark_end_time = os::elapsedVTime();
+        double mark_end_sec = os::elapsedTime();
+        _vtime_mark_accum += (mark_end_time - cycle_start);
+        if (!cm()->has_aborted()) {
+          if (g1_policy->adaptive_young_list_length()) {
+            double now = os::elapsedTime();
+            double remark_prediction_ms = g1_policy->predict_remark_time_ms();
+            jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms);
+            os::sleep(current_thread, sleep_time_ms, false);
+          }
+
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
+                                      mark_end_sec - mark_start_sec);
+          }
+
+          CMCheckpointRootsFinalClosure final_cl(_cm);
+          sprintf(verbose_str, "GC remark");
+          VM_CGC_Operation op(&final_cl, verbose_str);
+          VMThread::execute(&op);
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-remark");
+        }
+        if (cm()->restart_for_overflow() &&
+            G1TraceMarkStackOverflow) {
+          gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
+                                 "in remark (restart #%d).", iter);
+        }
+
+        if (cm()->restart_for_overflow()) {
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
+          }
+        }
+      } while (cm()->restart_for_overflow());
+      double counting_start_time = os::elapsedVTime();
+
+      // YSR: These look dubious (i.e. redundant) !!! FIX ME
+      slt()->manipulatePLL(SurrogateLockerThread::acquirePLL);
+      slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);
+
+      if (!cm()->has_aborted()) {
+        double count_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-count-start]");
+        }
+
+        _sts.join();
+        _cm->calcDesiredRegions();
+        _sts.leave();
+
+        if (!cm()->has_aborted()) {
+          double count_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
+                                   count_end_sec - count_start_sec);
+          }
+        }
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-end-game");
+      }
+      double end_time = os::elapsedVTime();
+      _vtime_count_accum += (end_time - counting_start_time);
+      // Update the total virtual time before doing this, since it will try
+      // to measure it to get the vtime for this marking.  We purposely
+      // neglect the presumably-short "completeCleanup" phase here.
+      _vtime_accum = (end_time - _vtime_start);
+      if (!cm()->has_aborted()) {
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        CMCleanUp cl_cl(_cm);
+        sprintf(verbose_str, "GC cleanup");
+        VM_CGC_Operation op(&cl_cl, verbose_str);
+        VMThread::execute(&op);
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-cleanup");
+        G1CollectedHeap::heap()->set_marking_complete();
+      }
+
+      if (!cm()->has_aborted()) {
+        double cleanup_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
+        }
+
+        // Now do the remainder of the cleanup operation.
+        _sts.join();
+        _cm->completeCleanup();
+        if (!cm()->has_aborted()) {
+          g1_policy->record_concurrent_mark_cleanup_completed();
+
+          double cleanup_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
+                                   cleanup_end_sec - cleanup_start_sec);
+          }
+        }
+        _sts.leave();
+      }
+      // We're done: no more unclean regions coming.
+      G1CollectedHeap::heap()->set_unclean_regions_coming(false);
+
+      if (cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
+        }
+      }
+
+      _sts.join();
+      _cm->disable_co_trackers();
+      _sts.leave();
+
+      // we now want to allow clearing of the marking bitmap to be
+      // suspended by a collection pause.
+      _sts.join();
+      _cm->clearNextBitmap();
+      _sts.leave();
+    }
+  }
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentMarkThread::yield() {
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield");
+  _sts.yield("Concurrent Mark");
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield-end");
+}
+
+void ConcurrentMarkThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-stop");
+}
+
+void ConcurrentMarkThread::print() {
+  gclog_or_tty->print("\"Concurrent Mark GC Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentMarkThread::sleepBeforeNextCycle() {
+  clear_in_progress();
+  // We join here because we don't want to do the "shouldConcurrentMark()"
+  // below while the world is otherwise stopped.
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  while (!started()) {
+    if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-sleeping");
+    CGC_lock->wait(Mutex::_no_safepoint_check_flag);
+  }
+  set_in_progress();
+  clear_started();
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting");
+
+  return;
+}
+
+// Note: this method, although exported by the ConcurrentMarkSweepThread,
+// which is a non-JavaThread, can only be called by a JavaThread.
+// Currently this is done at vm creation time (post-vm-init) by the
+// main/Primordial (Java)Thread.
+// XXX Consider changing this in the future to allow the CMS thread
+// itself to create this thread?
+void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) {
+  assert(_slt == NULL, "SLT already created");
+  _slt = SurrogateLockerThread::make(THREAD);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent Mark GC Thread (could be several in the future).
+// This is copied from the Concurrent Mark Sweep GC Thread
+// Still under construction.
+
+class ConcurrentMark;
+
+class ConcurrentMarkThread: public ConcurrentGCThread {
+  friend class VMStructs;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Accumulated virtual time.
+
+  double _vtime_mark_accum;
+  double _vtime_count_accum;
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentMark*                  _cm;
+  bool                             _started;
+  bool                             _in_progress;
+
+  void sleepBeforeNextCycle();
+
+  static SurrogateLockerThread*         _slt;
+
+ public:
+  // Constructor
+  ConcurrentMarkThread(ConcurrentMark* cm);
+
+  static void makeSurrogateLockerThread(TRAPS);
+  static SurrogateLockerThread* slt() { return _slt; }
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum();
+  // Marking virtual time so far
+  double vtime_mark_accum();
+  // Counting virtual time so far.
+  double vtime_count_accum() { return _vtime_count_accum; }
+
+  ConcurrentMark* cm()                           { return _cm;     }
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  // Total virtual time so far.
+inline double ConcurrentMarkThread::vtime_accum() {
+  return _vtime_accum + _cm->all_task_accum_vtime();
+}
+
+// Marking virtual time so far
+inline double ConcurrentMarkThread::vtime_mark_accum() {
+  return _vtime_mark_accum + _cm->all_task_accum_vtime();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentZFThread.cpp.incl"
+
+// ======= Concurrent Zero-Fill Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+int ConcurrentZFThread::_region_allocs = 0;
+int ConcurrentZFThread::_sync_zfs = 0;
+int ConcurrentZFThread::_zf_waits = 0;
+int ConcurrentZFThread::_regions_filled = 0;
+
+ConcurrentZFThread::ConcurrentZFThread() :
+  ConcurrentGCThread(),
+  _co_tracker(G1ZFGroup)
+{
+  create_and_start();
+}
+
+void ConcurrentZFThread::wait_for_ZF_completed(HeapRegion* hr) {
+  assert(ZF_mon->owned_by_self(), "Precondition.");
+  note_zf_wait();
+  while (hr->zero_fill_state() == HeapRegion::ZeroFilling) {
+    ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+void ConcurrentZFThread::processHeapRegion(HeapRegion* hr) {
+  assert(!Universe::heap()->is_gc_active(),
+         "This should not happen during GC.");
+  assert(hr != NULL, "Precondition");
+  // These are unlocked reads, but if this test is successful, then no
+  // other thread will attempt this zero filling.  Only a GC thread can
+  // modify the ZF state of a region whose state is zero-filling, and this
+  // should only happen while the ZF thread is locking out GC.
+  if (hr->zero_fill_state() == HeapRegion::ZeroFilling
+      && hr->zero_filler() == Thread::current()) {
+    assert(hr->top() == hr->bottom(), "better be empty!");
+    assert(!hr->isHumongous(), "Only free regions on unclean list.");
+    Copy::fill_to_words(hr->bottom(), hr->capacity()/HeapWordSize);
+    note_region_filled();
+  }
+}
+
+void ConcurrentZFThread::run() {
+  initialize_in_thread();
+  Thread* thr_self = Thread::current();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  _sts.join();
+  while (!_should_terminate) {
+    _sts.leave();
+
+    {
+      MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+
+      // This local variable will hold a region being zero-filled.  This
+      // region will neither be on the unclean or zero-filled lists, and
+      // will not be available for allocation; thus, we might have an
+      // allocation fail, causing a full GC, because of this, but this is a
+      // price we will pay.  (In future, we might want to make the fact
+      // that there's a region being zero-filled apparent to the G1 heap,
+      // which could then wait for it in this extreme case...)
+      HeapRegion* to_fill;
+
+      while (!g1->should_zf()
+             || (to_fill = g1->pop_unclean_region_list_locked()) == NULL)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+      while (to_fill->zero_fill_state() == HeapRegion::ZeroFilling)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+
+      // So now to_fill is non-NULL and is not ZeroFilling.  It might be
+      // Allocated or ZeroFilled.  (The latter could happen if this thread
+      // starts the zero-filling of a region, but a GC intervenes and
+      // pushes new regions needing on the front of the filling on the
+      // front of the list.)
+
+      switch (to_fill->zero_fill_state()) {
+      case HeapRegion::Allocated:
+        to_fill = NULL;
+        break;
+
+      case HeapRegion::NotZeroFilled:
+        to_fill->set_zero_fill_in_progress(thr_self);
+
+        ZF_mon->unlock();
+        _sts.join();
+        processHeapRegion(to_fill);
+        _sts.leave();
+        ZF_mon->lock_without_safepoint_check();
+
+        if (to_fill->zero_fill_state() == HeapRegion::ZeroFilling
+            && to_fill->zero_filler() == thr_self) {
+          to_fill->set_zero_fill_complete();
+          (void)g1->put_free_region_on_list_locked(to_fill);
+        }
+        break;
+
+      case HeapRegion::ZeroFilled:
+        (void)g1->put_free_region_on_list_locked(to_fill);
+        break;
+
+      case HeapRegion::ZeroFilling:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+    _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    _sts.join();
+
+    _co_tracker.update();
+  }
+  _co_tracker.update(false);
+  _sts.leave();
+
+  assert(_should_terminate, "just checking");
+  terminate();
+}
+
+bool ConcurrentZFThread::offer_yield() {
+  if (_sts.should_yield()) {
+    _sts.yield("Concurrent ZF");
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void ConcurrentZFThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+}
+
+void ConcurrentZFThread::print() {
+  gclog_or_tty->print("\"Concurrent ZF Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+
+double ConcurrentZFThread::_vtime_accum;
+
+void ConcurrentZFThread::print_summary_info() {
+  gclog_or_tty->print("\nConcurrent Zero-Filling:\n");
+  gclog_or_tty->print("  Filled %d regions, used %5.2fs.\n",
+                      _regions_filled,
+                      vtime_accum());
+  gclog_or_tty->print("  Of %d region allocs, %d (%5.2f%%) required sync ZF,\n",
+                      _region_allocs, _sync_zfs,
+                      (_region_allocs > 0 ?
+                       (float)_sync_zfs/(float)_region_allocs*100.0 :
+                       0.0));
+  gclog_or_tty->print("     and %d (%5.2f%%) required a ZF wait.\n",
+                      _zf_waits,
+                      (_region_allocs > 0 ?
+                       (float)_zf_waits/(float)_region_allocs*100.0 :
+                       0.0));
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent ZF Thread.  Performs concurrent zero-filling.
+
+class ConcurrentZFThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class ZeroFillRegionClosure;
+
+ private:
+
+  // Zero fill the heap region.
+  void processHeapRegion(HeapRegion* r);
+
+  // Stats
+  //   Allocation (protected by heap lock).
+  static int _region_allocs;  // Number of regions allocated
+  static int _sync_zfs;       //   Synchronous zero-fills +
+  static int _zf_waits;      //   Wait for conc zero-fill completion.
+
+  // Number of regions CFZ thread fills.
+  static int _regions_filled;
+
+  COTracker _co_tracker;
+
+  double _vtime_start;  // Initial virtual time.
+
+  // These are static because the "print_summary_info" method is, and
+  // it currently assumes there is only one ZF thread.  We'll change when
+  // we need to.
+  static double _vtime_accum;  // Initial virtual time.
+  static double vtime_accum() { return _vtime_accum; }
+
+  // Offer yield for GC.  Returns true if yield occurred.
+  bool offer_yield();
+
+ public:
+  // Constructor
+  ConcurrentZFThread();
+
+  // Main loop.
+  virtual void run();
+
+  // Printing
+  void print();
+
+  // Waits until "r" has been zero-filled.  Requires caller to hold the
+  // ZF_mon.
+  static void wait_for_ZF_completed(HeapRegion* r);
+
+  // Get or clear the current unclean region.  Should be done
+  // while holding the ZF_needed_mon lock.
+
+  // shutdown
+  static void stop();
+
+  // Stats
+  static void note_region_alloc() {_region_allocs++; }
+  static void note_sync_zfs() { _sync_zfs++; }
+  static void note_zf_wait() { _zf_waits++; }
+  static void note_region_filled() { _regions_filled++; }
+
+  static void print_summary_info();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_dirtyCardQueue.cpp.incl"
+
+bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl,
+                                   bool consume,
+                                   size_t worker_i) {
+  bool res = true;
+  if (_buf != NULL) {
+    res = apply_closure_to_buffer(cl, _buf, _index, _sz,
+                                  consume,
+                                  (int) worker_i);
+    if (res && consume) _index = _sz;
+  }
+  return res;
+}
+
+bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                             void** buf,
+                                             size_t index, size_t sz,
+                                             bool consume,
+                                             int worker_i) {
+  if (cl == NULL) return true;
+  for (size_t i = index; i < sz; i += oopSize) {
+    int ind = byte_index_to_index((int)i);
+    jbyte* card_ptr = (jbyte*)buf[ind];
+    if (card_ptr != NULL) {
+      // Set the entry to null, so we don't do it again (via the test
+      // above) if we reconsider this buffer.
+      if (consume) buf[ind] = NULL;
+      if (!cl->do_card_ptr(card_ptr, worker_i)) return false;
+    }
+  }
+  return true;
+}
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+DirtyCardQueueSet::DirtyCardQueueSet() :
+  PtrQueueSet(true /*notify_when_complete*/),
+  _closure(NULL),
+  _shared_dirty_card_queue(this, true /*perm*/),
+  _free_ids(NULL),
+  _processed_buffers_mut(0), _processed_buffers_rs_thread(0)
+{
+  _all_active = true;
+}
+
+size_t DirtyCardQueueSet::num_par_ids() {
+  return MAX2(ParallelGCThreads, (size_t)2);
+}
+
+
+void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                   int max_completed_queue,
+                                   Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  set_buffer_size(DCQBarrierQueueBufferSize);
+  set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
+
+  _shared_dirty_card_queue.set_lock(lock);
+  _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
+  bool b = _free_ids->claim_perm_id(0);
+  guarantee(b, "Must reserve id zero for concurrent refinement thread.");
+}
+
+void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->dirty_card_queue().handle_zero_index();
+}
+
+void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) {
+  _closure = closure;
+}
+
+void DirtyCardQueueSet::iterate_closure_all_threads(bool consume,
+                                                    size_t worker_i) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    bool b = t->dirty_card_queue().apply_closure(_closure, consume);
+    guarantee(b, "Should not be interrupted.");
+  }
+  bool b = shared_dirty_card_queue()->apply_closure(_closure,
+                                                    consume,
+                                                    worker_i);
+  guarantee(b, "Should not be interrupted.");
+}
+
+bool DirtyCardQueueSet::mut_process_buffer(void** buf) {
+
+  // Used to determine if we had already claimed a par_id
+  // before entering this method.
+  bool already_claimed = false;
+
+  // We grab the current JavaThread.
+  JavaThread* thread = JavaThread::current();
+
+  // We get the the number of any par_id that this thread
+  // might have already claimed.
+  int worker_i = thread->get_claimed_par_id();
+
+  // If worker_i is not -1 then the thread has already claimed
+  // a par_id. We make note of it using the already_claimed value
+  if (worker_i != -1) {
+    already_claimed = true;
+  } else {
+
+    // Otherwise we need to claim a par id
+    worker_i = _free_ids->claim_par_id();
+
+    // And store the par_id value in the thread
+    thread->set_claimed_par_id(worker_i);
+  }
+
+  bool b = false;
+  if (worker_i != -1) {
+    b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0,
+                                                _sz, true, worker_i);
+    if (b) Atomic::inc(&_processed_buffers_mut);
+
+    // If we had not claimed an id before entering the method
+    // then we must release the id.
+    if (!already_claimed) {
+
+      // we release the id
+      _free_ids->release_par_id(worker_i);
+
+      // and set the claimed_id in the thread to -1
+      thread->set_claimed_par_id(-1);
+    }
+  }
+  return b;
+}
+
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) {
+  CompletedBufferNode* nd = NULL;
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+
+  if ((int)_n_completed_buffers <= stop_at) {
+    _process_completed = false;
+    return NULL;
+  }
+
+  if (_completed_buffers_head != NULL) {
+    nd = _completed_buffers_head;
+    _completed_buffers_head = nd->next;
+    if (_completed_buffers_head == NULL)
+      _completed_buffers_tail = NULL;
+    _n_completed_buffers--;
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  return nd;
+}
+
+// We only do this in contexts where there is no concurrent enqueueing.
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_CAS() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+
+  while (nd != NULL) {
+    CompletedBufferNode* next = nd->next;
+    CompletedBufferNode* result =
+      (CompletedBufferNode*)Atomic::cmpxchg_ptr(next,
+                                                &_completed_buffers_head,
+                                                nd);
+    if (result == nd) {
+      return result;
+    } else {
+      nd = _completed_buffers_head;
+    }
+  }
+  assert(_completed_buffers_head == NULL, "Loop post");
+  _completed_buffers_tail = NULL;
+  return NULL;
+}
+
+bool DirtyCardQueueSet::
+apply_closure_to_completed_buffer_helper(int worker_i,
+                                         CompletedBufferNode* nd) {
+  if (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf,
+                                              nd->index, _sz,
+                                              true, worker_i);
+    void** buf = nd->buf;
+    size_t index = nd->index;
+    delete nd;
+    if (b) {
+      deallocate_buffer(buf);
+      return true;  // In normal case, go on to next buffer.
+    } else {
+      enqueue_complete_buffer(buf, index, true);
+      return false;
+    }
+  } else {
+    return false;
+  }
+}
+
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
+                                                          int stop_at,
+                                                          bool with_CAS)
+{
+  CompletedBufferNode* nd = NULL;
+  if (with_CAS) {
+    guarantee(stop_at == 0, "Precondition");
+    nd = get_completed_buffer_CAS();
+  } else {
+    nd = get_completed_buffer_lock(stop_at);
+  }
+  bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
+  if (res) _processed_buffers_rs_thread++;
+  return res;
+}
+
+void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz,
+                                              false);
+    guarantee(b, "Should not stop early.");
+    nd = nd->next;
+  }
+}
+
+void DirtyCardQueueSet::abandon_logs() {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _n_completed_buffers = 0;
+    _completed_buffers_tail = NULL;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  // Since abandon is done only at safepoints, we can safely manipulate
+  // these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->dirty_card_queue().reset();
+  }
+  shared_dirty_card_queue()->reset();
+}
+
+
+void DirtyCardQueueSet::concatenate_logs() {
+  // Iterate over all the threads, if we find a partial log add it to
+  // the global list of logs.  Temporarily turn off the limit on the number
+  // of outstanding buffers.
+  int save_max_completed_queue = _max_completed_queue;
+  _max_completed_queue = max_jint;
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    DirtyCardQueue& dcq = t->dirty_card_queue();
+    if (dcq.size() != 0) {
+      void **buf = t->dirty_card_queue().get_buf();
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < t->dirty_card_queue().get_index(); i += oopSize) {
+        buf[PtrQueue::byte_index_to_index((int)i)] = NULL;
+      }
+      enqueue_complete_buffer(dcq.get_buf(), dcq.get_index());
+      dcq.reinitialize();
+    }
+  }
+  if (_shared_dirty_card_queue.size() != 0) {
+    enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(),
+                            _shared_dirty_card_queue.get_index());
+    _shared_dirty_card_queue.reinitialize();
+  }
+  // Restore the completed buffer queue limit.
+  _max_completed_queue = save_max_completed_queue;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class FreeIdSet;
+
+// A closure class for processing card table entries.  Note that we don't
+// require these closure objects to be stack-allocated.
+class CardTableEntryClosure: public CHeapObj {
+public:
+  // Process the card whose card table entry is "card_ptr".  If returns
+  // "false", terminate the iteration early.
+  virtual bool do_card_ptr(jbyte* card_ptr, int worker_i = 0) = 0;
+};
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class DirtyCardQueue: public PtrQueue {
+public:
+  DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {
+    // Dirty card queues are always active.
+    _active = true;
+  }
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.  If a closure application returns "false", return
+  // "false" immediately, halting the iteration.  If "consume" is true,
+  // deletes processed entries from logs.
+  bool apply_closure(CardTableEntryClosure* cl,
+                     bool consume = true,
+                     size_t worker_i = 0);
+
+  // Apply the closure to all elements of "buf", down to "index"
+  // (inclusive.)  If returns "false", then a closure application returned
+  // "false", and we return immediately.  If "consume" is true, entries are
+  // set to NULL as they are processed, so they will not be processed again
+  // later.
+  static bool apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                      void** buf, size_t index, size_t sz,
+                                      bool consume = true,
+                                      int worker_i = 0);
+  void **get_buf() { return _buf;}
+  void set_buf(void **buf) {_buf = buf;}
+  size_t get_index() { return _index;}
+  void reinitialize() { _buf = 0; _sz = 0; _index = 0;}
+};
+
+
+
+class DirtyCardQueueSet: public PtrQueueSet {
+  CardTableEntryClosure* _closure;
+
+  DirtyCardQueue _shared_dirty_card_queue;
+
+  // Override.
+  bool mut_process_buffer(void** buf);
+
+  // Protected by the _cbl_mon.
+  FreeIdSet* _free_ids;
+
+  // The number of completed buffers processed by mutator and rs thread,
+  // respectively.
+  jint _processed_buffers_mut;
+  jint _processed_buffers_rs_thread;
+
+public:
+  DirtyCardQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  // The number of parallel ids that can be claimed to allow collector or
+  // mutator threads to do card-processing work.
+  static size_t num_par_ids();
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(CardTableEntryClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)  If "consume" is true, processed entries are
+  // discarded.
+  void iterate_closure_all_threads(bool consume = true,
+                                   size_t worker_i = 0);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, nulling out those elements
+  // processed.  If all elements are processed, returns "true".  If no
+  // completed buffers exist, returns false.  If a completed buffer exists,
+  // but is only partially completed before a "yield" happens, the
+  // partially completed buffer (with its processed elements set to NULL)
+  // is returned to the completed buffer set, and this call returns false.
+  bool apply_closure_to_completed_buffer(int worker_i = 0,
+                                         int stop_at = 0,
+                                         bool with_CAS = false);
+  bool apply_closure_to_completed_buffer_helper(int worker_i,
+                                                CompletedBufferNode* nd);
+
+  CompletedBufferNode* get_completed_buffer_CAS();
+  CompletedBufferNode* get_completed_buffer_lock(int stop_at);
+  // Applies the current closure to all completed buffers,
+  // non-consumptively.
+  void apply_closure_to_all_completed_buffers();
+
+  DirtyCardQueue* shared_dirty_card_queue() {
+    return &_shared_dirty_card_queue;
+  }
+
+  // If a full collection is happening, reset partial logs, and ignore
+  // completed ones: the full collection will make them all irrelevant.
+  void abandon_logs();
+
+  // If any threads have partial logs, add them to the global list of logs.
+  void concatenate_logs();
+  void clear_n_completed_buffers() { _n_completed_buffers = 0;}
+
+  jint processed_buffers_mut() {
+    return _processed_buffers_mut;
+  }
+  jint processed_buffers_rs_thread() {
+    return _processed_buffers_rs_thread;
+  }
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,628 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1BlockOffsetTable.cpp.incl"
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetSharedArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetSharedArray::G1BlockOffsetSharedArray(MemRegion reserved,
+                                                   size_t init_word_size) :
+  _reserved(reserved), _end(NULL)
+{
+  size_t size = compute_size(reserved.word_size());
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(size));
+  if (!rs.is_reserved()) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  if (!_vs.initialize(rs, 0)) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  _offset_array = (u_char*)_vs.low_boundary();
+  resize(init_word_size);
+  if (TraceBlockOffsetTable) {
+    gclog_or_tty->print_cr("G1BlockOffsetSharedArray::G1BlockOffsetSharedArray: ");
+    gclog_or_tty->print_cr("  "
+                  "  rs.base(): " INTPTR_FORMAT
+                  "  rs.size(): " INTPTR_FORMAT
+                  "  rs end(): " INTPTR_FORMAT,
+                  rs.base(), rs.size(), rs.base() + rs.size());
+    gclog_or_tty->print_cr("  "
+                  "  _vs.low_boundary(): " INTPTR_FORMAT
+                  "  _vs.high_boundary(): " INTPTR_FORMAT,
+                  _vs.low_boundary(),
+                  _vs.high_boundary());
+  }
+}
+
+void G1BlockOffsetSharedArray::resize(size_t new_word_size) {
+  assert(new_word_size <= _reserved.word_size(), "Resize larger than reserved");
+  size_t new_size = compute_size(new_word_size);
+  size_t old_size = _vs.committed_size();
+  size_t delta;
+  char* high = _vs.high();
+  _end = _reserved.start() + new_word_size;
+  if (new_size > old_size) {
+    delta = ReservedSpace::page_align_size_up(new_size - old_size);
+    assert(delta > 0, "just checking");
+    if (!_vs.expand_by(delta)) {
+      // Do better than this for Merlin
+      vm_exit_out_of_memory(delta, "offset table expansion");
+    }
+    assert(_vs.high() == high + delta, "invalid expansion");
+    // Initialization of the contents is left to the
+    // G1BlockOffsetArray that uses it.
+  } else {
+    delta = ReservedSpace::page_align_size_down(old_size - new_size);
+    if (delta == 0) return;
+    _vs.shrink_by(delta);
+    assert(_vs.high() == high - delta, "invalid expansion");
+  }
+}
+
+bool G1BlockOffsetSharedArray::is_card_boundary(HeapWord* p) const {
+  assert(p >= _reserved.start(), "just checking");
+  size_t delta = pointer_delta(p, _reserved.start());
+  return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetArray::G1BlockOffsetArray(G1BlockOffsetSharedArray* array,
+                                       MemRegion mr, bool init_to_zero) :
+  G1BlockOffsetTable(mr.start(), mr.end()),
+  _unallocated_block(_bottom),
+  _array(array), _csp(NULL),
+  _init_to_zero(init_to_zero) {
+  assert(_bottom <= _end, "arguments out of order");
+  if (!_init_to_zero) {
+    // initialize cards to point back to mr.start()
+    set_remainder_to_point_to_start(mr.start() + N_words, mr.end());
+    _array->set_offset_array(0, 0);  // set first card to 0
+  }
+}
+
+void G1BlockOffsetArray::set_space(Space* sp) {
+  _sp = sp;
+  _csp = sp->toContiguousSpace();
+}
+
+// The arguments follow the normal convention of denoting
+// a right-open interval: [start, end)
+void
+G1BlockOffsetArray:: set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) {
+
+  if (start >= end) {
+    // The start address is equal to the end address (or to
+    // the right of the end address) so there are not cards
+    // that need to be updated..
+    return;
+  }
+
+  // Write the backskip value for each region.
+  //
+  //    offset
+  //    card             2nd                       3rd
+  //     | +- 1st        |                         |
+  //     v v             v                         v
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    |x|0|0|0|0|0|0|0|1|1|1|1|1|1| ... |1|1|1|1|2|2|2|2|2|2| ...
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    11              19                        75
+  //      12
+  //
+  //    offset card is the card that points to the start of an object
+  //      x - offset value of offset card
+  //    1st - start of first logarithmic region
+  //      0 corresponds to logarithmic value N_words + 0 and 2**(3 * 0) = 1
+  //    2nd - start of second logarithmic region
+  //      1 corresponds to logarithmic value N_words + 1 and 2**(3 * 1) = 8
+  //    3rd - start of third logarithmic region
+  //      2 corresponds to logarithmic value N_words + 2 and 2**(3 * 2) = 64
+  //
+  //    integer below the block offset entry is an example of
+  //    the index of the entry
+  //
+  //    Given an address,
+  //      Find the index for the address
+  //      Find the block offset table entry
+  //      Convert the entry to a back slide
+  //        (e.g., with today's, offset = 0x81 =>
+  //          back slip = 2**(3*(0x81 - N_words)) = 2**3) = 8
+  //      Move back N (e.g., 8) entries and repeat with the
+  //        value of the new entry
+  //
+  size_t start_card = _array->index_for(start);
+  size_t end_card = _array->index_for(end-1);
+  assert(start ==_array->address_for_index(start_card), "Precondition");
+  assert(end ==_array->address_for_index(end_card)+N_words, "Precondition");
+  set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval
+}
+
+// Unlike the normal convention in this code, the argument here denotes
+// a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start()
+// above.
+void
+G1BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) {
+  if (start_card > end_card) {
+    return;
+  }
+  assert(start_card > _array->index_for(_bottom), "Cannot be first card");
+  assert(_array->offset_array(start_card-1) <= N_words,
+    "Offset card has an unexpected value");
+  size_t start_card_for_region = start_card;
+  u_char offset = max_jubyte;
+  for (int i = 0; i < BlockOffsetArray::N_powers; i++) {
+    // -1 so that the the card with the actual offset is counted.  Another -1
+    // so that the reach ends in this region and not at the start
+    // of the next.
+    size_t reach = start_card - 1 + (BlockOffsetArray::power_to_cards_back(i+1) - 1);
+    offset = N_words + i;
+    if (reach >= end_card) {
+      _array->set_offset_array(start_card_for_region, end_card, offset);
+      start_card_for_region = reach + 1;
+      break;
+    }
+    _array->set_offset_array(start_card_for_region, reach, offset);
+    start_card_for_region = reach + 1;
+  }
+  assert(start_card_for_region > end_card, "Sanity check");
+  DEBUG_ONLY(check_all_cards(start_card, end_card);)
+}
+
+// The block [blk_start, blk_end) has been allocated;
+// adjust the block offset table to represent this information;
+// right-open interval: [blk_start, blk_end)
+void
+G1BlockOffsetArray::alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+  mark_block(blk_start, blk_end);
+  allocated(blk_start, blk_end);
+}
+
+// Adjust BOT to show that a previously whole block has been split
+// into two.
+void G1BlockOffsetArray::split_block(HeapWord* blk, size_t blk_size,
+                                     size_t left_blk_size) {
+  // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+  verify_single_block(blk, blk_size);
+  // Update the BOT to indicate that [blk + left_blk_size, blk + blk_size)
+  // is one single block.
+  mark_block(blk + left_blk_size, blk + blk_size);
+}
+
+
+// Action_mark - update the BOT for the block [blk_start, blk_end).
+//               Current typical use is for splitting a block.
+// Action_single - udpate the BOT for an allocation.
+// Action_verify - BOT verification.
+void G1BlockOffsetArray::do_block_internal(HeapWord* blk_start,
+                                           HeapWord* blk_end,
+                                           Action action) {
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  // This is optimized to make the test fast, assuming we only rarely
+  // cross boundaries.
+  uintptr_t end_ui = (uintptr_t)(blk_end - 1);
+  uintptr_t start_ui = (uintptr_t)blk_start;
+  // Calculate the last card boundary preceding end of blk
+  intptr_t boundary_before_end = (intptr_t)end_ui;
+  clear_bits(boundary_before_end, right_n_bits(LogN));
+  if (start_ui <= (uintptr_t)boundary_before_end) {
+    // blk starts at or crosses a boundary
+    // Calculate index of card on which blk begins
+    size_t    start_index = _array->index_for(blk_start);
+    // Index of card on which blk ends
+    size_t    end_index   = _array->index_for(blk_end - 1);
+    // Start address of card on which blk begins
+    HeapWord* boundary    = _array->address_for_index(start_index);
+    assert(boundary <= blk_start, "blk should start at or after boundary");
+    if (blk_start != boundary) {
+      // blk starts strictly after boundary
+      // adjust card boundary and start_index forward to next card
+      boundary += N_words;
+      start_index++;
+    }
+    assert(start_index <= end_index, "monotonicity of index_for()");
+    assert(boundary <= (HeapWord*)boundary_before_end, "tautology");
+    switch (action) {
+      case Action_mark: {
+        if (init_to_zero()) {
+          _array->set_offset_array(start_index, boundary, blk_start);
+          break;
+        } // Else fall through to the next case
+      }
+      case Action_single: {
+        _array->set_offset_array(start_index, boundary, blk_start);
+        // We have finished marking the "offset card". We need to now
+        // mark the subsequent cards that this blk spans.
+        if (start_index < end_index) {
+          HeapWord* rem_st = _array->address_for_index(start_index) + N_words;
+          HeapWord* rem_end = _array->address_for_index(end_index) + N_words;
+          set_remainder_to_point_to_start(rem_st, rem_end);
+        }
+        break;
+      }
+      case Action_check: {
+        _array->check_offset_array(start_index, boundary, blk_start);
+        // We have finished checking the "offset card". We need to now
+        // check the subsequent cards that this blk spans.
+        check_all_cards(start_index + 1, end_index);
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+}
+
+// The card-interval [start_card, end_card] is a closed interval; this
+// is an expensive check -- use with care and only under protection of
+// suitable flag.
+void G1BlockOffsetArray::check_all_cards(size_t start_card, size_t end_card) const {
+
+  if (end_card < start_card) {
+    return;
+  }
+  guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card");
+  for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) {
+    u_char entry = _array->offset_array(c);
+    if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) {
+      guarantee(entry > N_words, "Should be in logarithmic region");
+    }
+    size_t backskip = BlockOffsetArray::entry_to_cards_back(entry);
+    size_t landing_card = c - backskip;
+    guarantee(landing_card >= (start_card - 1), "Inv");
+    if (landing_card >= start_card) {
+      guarantee(_array->offset_array(landing_card) <= entry, "monotonicity");
+    } else {
+      guarantee(landing_card == start_card - 1, "Tautology");
+      guarantee(_array->offset_array(landing_card) <= N_words, "Offset value");
+    }
+  }
+}
+
+// The range [blk_start, blk_end) represents a single contiguous block
+// of storage; modify the block offset table to represent this
+// information; Right-open interval: [blk_start, blk_end)
+// NOTE: this method does _not_ adjust _unallocated_block.
+void
+G1BlockOffsetArray::single_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_single);
+}
+
+// Mark the BOT such that if [blk_start, blk_end) straddles a card
+// boundary, the card following the first such boundary is marked
+// with the appropriate offset.
+// NOTE: this method does _not_ adjust _unallocated_block or
+// any cards subsequent to the first one.
+void
+G1BlockOffsetArray::mark_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_mark);
+}
+
+void G1BlockOffsetArray::join_blocks(HeapWord* blk1, HeapWord* blk2) {
+  HeapWord* blk1_start = Universe::heap()->block_start(blk1);
+  HeapWord* blk2_start = Universe::heap()->block_start(blk2);
+  assert(blk1 == blk1_start && blk2 == blk2_start,
+         "Must be block starts.");
+  assert(blk1 + _sp->block_size(blk1) == blk2, "Must be contiguous.");
+  size_t blk1_start_index = _array->index_for(blk1);
+  size_t blk2_start_index = _array->index_for(blk2);
+  assert(blk1_start_index <= blk2_start_index, "sanity");
+  HeapWord* blk2_card_start = _array->address_for_index(blk2_start_index);
+  if (blk2 == blk2_card_start) {
+    // blk2 starts a card.  Does blk1 start on the prevous card, or futher
+    // back?
+    assert(blk1_start_index < blk2_start_index, "must be lower card.");
+    if (blk1_start_index + 1 == blk2_start_index) {
+      // previous card; new value for blk2 card is size of blk1.
+      _array->set_offset_array(blk2_start_index, (u_char) _sp->block_size(blk1));
+    } else {
+      // Earlier card; go back a card.
+      _array->set_offset_array(blk2_start_index, N_words);
+    }
+  } else {
+    // blk2 does not start a card.  Does it cross a card?  If not, nothing
+    // to do.
+    size_t blk2_end_index =
+      _array->index_for(blk2 + _sp->block_size(blk2) - 1);
+    assert(blk2_end_index >= blk2_start_index, "sanity");
+    if (blk2_end_index > blk2_start_index) {
+      // Yes, it crosses a card.  The value for the next card must change.
+      if (blk1_start_index + 1 == blk2_start_index) {
+        // previous card; new value for second blk2 card is size of blk1.
+        _array->set_offset_array(blk2_start_index + 1,
+                                 (u_char) _sp->block_size(blk1));
+      } else {
+        // Earlier card; go back a card.
+        _array->set_offset_array(blk2_start_index + 1, N_words);
+      }
+    }
+  }
+}
+
+HeapWord* G1BlockOffsetArray::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+// This duplicates a little code from the above: unavoidable.
+HeapWord*
+G1BlockOffsetArray::block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+
+HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
+                                                          HeapWord* n,
+                                                          const void* addr) {
+  // We're not in the normal case.  We need to handle an important subcase
+  // here: LAB allocation.  An allocation previously recorded in the
+  // offset table was actually a lab allocation, and was divided into
+  // several objects subsequently.  Fix this situation as we answer the
+  // query, by updating entries as we cross them.
+
+  // If the fist object's end q is at the card boundary. Start refining
+  // with the corresponding card (the value of the entry will be basically
+  // set to 0). If the object crosses the boundary -- start from the next card.
+  size_t next_index = _array->index_for(n) + !_array->is_card_boundary(n);
+  HeapWord* next_boundary = _array->address_for_index(next_index);
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += obj->size();
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  } else {
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += _sp->block_size(q);
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  }
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+HeapWord* G1BlockOffsetArray::block_start_careful(const void* addr) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+
+  // Otherwise, find the block start using the table, but taking
+  // care (cf block_start_unsafe() above) not to parse any objects/blocks
+  // on the cards themsleves.
+  size_t index = _array->index_for(addr);
+  assert(_array->address_for_index(index) == addr,
+         "arg should be start of card");
+
+  HeapWord* q = (HeapWord*)addr;
+  uint offset;
+  do {
+    offset = _array->offset_array(index--);
+    q -= offset;
+  } while (offset == N_words);
+  assert(q <= addr, "block start should be to left of arg");
+  return q;
+}
+
+// Note that the committed size of the covered space may have changed,
+// so the table size might also wish to change.
+void G1BlockOffsetArray::resize(size_t new_word_size) {
+  HeapWord* new_end = _bottom + new_word_size;
+  if (_end < new_end && !init_to_zero()) {
+    // verify that the old and new boundaries are also card boundaries
+    assert(_array->is_card_boundary(_end),
+           "_end not a card boundary");
+    assert(_array->is_card_boundary(new_end),
+           "new _end would not be a card boundary");
+    // set all the newly added cards
+    _array->set_offset_array(_end, new_end, N_words);
+  }
+  _end = new_end;  // update _end
+}
+
+void G1BlockOffsetArray::set_region(MemRegion mr) {
+  _bottom = mr.start();
+  _end = mr.end();
+}
+
+//
+//              threshold_
+//              |   _index_
+//              v   v
+//      +-------+-------+-------+-------+-------+
+//      | i-1   |   i   | i+1   | i+2   | i+3   |
+//      +-------+-------+-------+-------+-------+
+//       ( ^    ]
+//         block-start
+//
+void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                                           HeapWord* blk_start, HeapWord* blk_end) {
+  // For efficiency, do copy-in/copy-out.
+  HeapWord* threshold = *threshold_;
+  size_t    index = *index_;
+
+  assert(blk_start != NULL && blk_end > blk_start,
+         "phantom block");
+  assert(blk_end > threshold, "should be past threshold");
+  assert(blk_start <= threshold, "blk_start should be at or before threshold")
+  assert(pointer_delta(threshold, blk_start) <= N_words,
+         "offset should be <= BlockOffsetSharedArray::N");
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  assert(threshold == _array->_reserved.start() + index*N_words,
+         "index must agree with threshold");
+
+  DEBUG_ONLY(size_t orig_index = index;)
+
+  // Mark the card that holds the offset into the block.  Note
+  // that _next_offset_index and _next_offset_threshold are not
+  // updated until the end of this method.
+  _array->set_offset_array(index, threshold, blk_start);
+
+  // We need to now mark the subsequent cards that this blk spans.
+
+  // Index of card on which blk ends.
+  size_t end_index   = _array->index_for(blk_end - 1);
+
+  // Are there more cards left to be updated?
+  if (index + 1 <= end_index) {
+    HeapWord* rem_st  = _array->address_for_index(index + 1);
+    // Calculate rem_end this way because end_index
+    // may be the last valid index in the covered region.
+    HeapWord* rem_end = _array->address_for_index(end_index) +  N_words;
+    set_remainder_to_point_to_start(rem_st, rem_end);
+  }
+
+  index = end_index + 1;
+  // Calculate threshold_ this way because end_index
+  // may be the last valid index in the covered region.
+  threshold = _array->address_for_index(end_index) + N_words;
+  assert(threshold >= blk_end, "Incorrect offset threshold");
+
+  // index_ and threshold_ updated here.
+  *threshold_ = threshold;
+  *index_ = index;
+
+#ifdef ASSERT
+  // The offset can be 0 if the block starts on a boundary.  That
+  // is checked by an assertion above.
+  size_t start_index = _array->index_for(blk_start);
+  HeapWord* boundary    = _array->address_for_index(start_index);
+  assert((_array->offset_array(orig_index) == 0 &&
+          blk_start == boundary) ||
+          (_array->offset_array(orig_index) > 0 &&
+         _array->offset_array(orig_index) <= N_words),
+         "offset array should have been set");
+  for (size_t j = orig_index + 1; j <= end_index; j++) {
+    assert(_array->offset_array(j) > 0 &&
+           _array->offset_array(j) <=
+             (u_char) (N_words+BlockOffsetArray::N_powers-1),
+           "offset array should have been set");
+  }
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArrayContigSpace
+//////////////////////////////////////////////////////////////////////
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::
+block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+G1BlockOffsetArrayContigSpace::
+G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array,
+                              MemRegion mr) :
+  G1BlockOffsetArray(array, mr, true)
+{
+  _next_offset_threshold = NULL;
+  _next_offset_index = 0;
+}
+
+HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  _next_offset_index = _array->index_for(_bottom);
+  _next_offset_index++;
+  _next_offset_threshold =
+    _array->address_for_index(_next_offset_index);
+  return _next_offset_threshold;
+}
+
+void G1BlockOffsetArrayContigSpace::zero_bottom_entry() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  size_t bottom_index = _array->index_for(_bottom);
+  assert(_array->address_for_index(bottom_index) == _bottom,
+         "Precondition of call");
+  _array->set_offset_array(bottom_index, 0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,487 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The CollectedHeap type requires subtypes to implement a method
+// "block_start".  For some subtypes, notably generational
+// systems using card-table-based write barriers, the efficiency of this
+// operation may be important.  Implementations of the "BlockOffsetArray"
+// class may be useful in providing such efficient implementations.
+//
+// While generally mirroring the structure of the BOT for GenCollectedHeap,
+// the following types are tailored more towards G1's uses; these should,
+// however, be merged back into a common BOT to avoid code duplication
+// and reduce maintenance overhead.
+//
+//    G1BlockOffsetTable (abstract)
+//    -- G1BlockOffsetArray                (uses G1BlockOffsetSharedArray)
+//       -- G1BlockOffsetArrayContigSpace
+//
+// A main impediment to the consolidation of this code might be the
+// effect of making some of the block_start*() calls non-const as
+// below. Whether that might adversely affect performance optimizations
+// that compilers might normally perform in the case of non-G1
+// collectors needs to be carefully investigated prior to any such
+// consolidation.
+
+// Forward declarations
+class ContiguousSpace;
+class G1BlockOffsetSharedArray;
+
+class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+protected:
+  // These members describe the region covered by the table.
+
+  // The space this table is covering.
+  HeapWord* _bottom;    // == reserved.start
+  HeapWord* _end;       // End of currently allocated region.
+
+public:
+  // Initialize the table to cover the given space.
+  // The contents of the initial table are undefined.
+  G1BlockOffsetTable(HeapWord* bottom, HeapWord* end) :
+    _bottom(bottom), _end(end)
+    {
+      assert(_bottom <= _end, "arguments out of order");
+    }
+
+  // Note that the committed size of the covered space may have changed,
+  // so the table size might also wish to change.
+  virtual void resize(size_t new_word_size) = 0;
+
+  virtual void set_bottom(HeapWord* new_bottom) {
+    assert(new_bottom <= _end, "new_bottom > _end");
+    _bottom = new_bottom;
+    resize(pointer_delta(_end, _bottom));
+  }
+
+  // Requires "addr" to be contained by a block, and returns the address of
+  // the start of that block.  (May have side effects, namely updating of
+  // shared array entries that "point" too far backwards.  This can occur,
+  // for example, when LAB allocation is used in a space covered by the
+  // table.)
+  virtual HeapWord* block_start_unsafe(const void* addr) = 0;
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const = 0;
+
+  // Returns the address of the start of the block containing "addr", or
+  // else "null" if it is covered by no block.  (May have side effects,
+  // namely updating of shared array entries that "point" too far
+  // backwards.  This can occur, for example, when lab allocation is used
+  // in a space covered by the table.)
+  inline HeapWord* block_start(const void* addr);
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  inline HeapWord* block_start_const(const void* addr) const;
+};
+
+// This implementation of "G1BlockOffsetTable" divides the covered region
+// into "N"-word subregions (where "N" = 2^"LogN".  An array with an entry
+// for each such subregion indicates how far back one must go to find the
+// start of the chunk that includes the first word of the subregion.
+//
+// Each BlockOffsetArray is owned by a Space.  However, the actual array
+// may be shared by several BlockOffsetArrays; this is useful
+// when a single resizable area (such as a generation) is divided up into
+// several spaces in which contiguous allocation takes place,
+// such as, for example, in G1 or in the train generation.)
+
+// Here is the shared array type.
+
+class G1BlockOffsetSharedArray: public CHeapObj {
+  friend class G1BlockOffsetArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+
+private:
+  // The reserved region covered by the shared array.
+  MemRegion _reserved;
+
+  // End of the current committed region.
+  HeapWord* _end;
+
+  // Array for keeping offsets for retrieving object start fast given an
+  // address.
+  VirtualSpace _vs;
+  u_char* _offset_array;          // byte array keeping backwards offsets
+
+  // Bounds checking accessors:
+  // For performance these have to devolve to array accesses in product builds.
+  u_char offset_array(size_t index) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    return _offset_array[index];
+  }
+
+  void set_offset_array(size_t index, u_char offset) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(offset <= N_words, "offset too large");
+    _offset_array[index] = offset;
+  }
+
+  void set_offset_array(size_t index, HeapWord* high, HeapWord* low) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    _offset_array[index] = (u_char) pointer_delta(high, low);
+  }
+
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+    assert(index_for(right - 1) < _vs.committed_size(),
+           "right address out of range");
+    assert(left  < right, "Heap addresses out of order");
+    size_t num_cards = pointer_delta(right, left) >> LogN_words;
+    memset(&_offset_array[index_for(left)], offset, num_cards);
+  }
+
+  void set_offset_array(size_t left, size_t right, u_char offset) {
+    assert(right < _vs.committed_size(), "right address out of range");
+    assert(left  <= right, "indexes out of order");
+    size_t num_cards = right - left + 1;
+    memset(&_offset_array[left], offset, num_cards);
+  }
+
+  void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    assert(_offset_array[index] == pointer_delta(high, low),
+           "Wrong offset");
+  }
+
+  bool is_card_boundary(HeapWord* p) const;
+
+  // Return the number of slots needed for an offset array
+  // that covers mem_region_words words.
+  // We always add an extra slot because if an object
+  // ends on a card boundary we put a 0 in the next
+  // offset array slot, so we want that slot always
+  // to be reserved.
+
+  size_t compute_size(size_t mem_region_words) {
+    size_t number_of_slots = (mem_region_words / N_words) + 1;
+    return ReservedSpace::page_align_size_up(number_of_slots);
+  }
+
+public:
+  enum SomePublicConstants {
+    LogN = 9,
+    LogN_words = LogN - LogHeapWordSize,
+    N_bytes = 1 << LogN,
+    N_words = 1 << LogN_words
+  };
+
+  // Initialize the table to cover from "base" to (at least)
+  // "base + init_word_size".  In the future, the table may be expanded
+  // (see "resize" below) up to the size of "_reserved" (which must be at
+  // least "init_word_size".) The contents of the initial table are
+  // undefined; it is the responsibility of the constituent
+  // G1BlockOffsetTable(s) to initialize cards.
+  G1BlockOffsetSharedArray(MemRegion reserved, size_t init_word_size);
+
+  // Notes a change in the committed size of the region covered by the
+  // table.  The "new_word_size" may not be larger than the size of the
+  // reserved region this table covers.
+  void resize(size_t new_word_size);
+
+  void set_bottom(HeapWord* new_bottom);
+
+  // Updates all the BlockOffsetArray's sharing this shared array to
+  // reflect the current "top"'s of their spaces.
+  void update_offset_arrays();
+
+  // Return the appropriate index into "_offset_array" for "p".
+  inline size_t index_for(const void* p) const;
+
+  // Return the address indicating the start of the region corresponding to
+  // "index" in "_offset_array".
+  inline HeapWord* address_for_index(size_t index) const;
+};
+
+// And here is the G1BlockOffsetTable subtype that uses the array.
+
+class G1BlockOffsetArray: public G1BlockOffsetTable {
+  friend class G1BlockOffsetSharedArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+private:
+  enum SomePrivateConstants {
+    N_words = G1BlockOffsetSharedArray::N_words,
+    LogN    = G1BlockOffsetSharedArray::LogN
+  };
+
+  // The following enums are used by do_block_helper
+  enum Action {
+    Action_single,      // BOT records a single block (see single_block())
+    Action_mark,        // BOT marks the start of a block (see mark_block())
+    Action_check        // Check that BOT records block correctly
+                        // (see verify_single_block()).
+  };
+
+  // This is the array, which can be shared by several BlockOffsetArray's
+  // servicing different
+  G1BlockOffsetSharedArray* _array;
+
+  // The space that owns this subregion.
+  Space* _sp;
+
+  // If "_sp" is a contiguous space, the field below is the view of "_sp"
+  // as a contiguous space, else NULL.
+  ContiguousSpace* _csp;
+
+  // If true, array entries are initialized to 0; otherwise, they are
+  // initialized to point backwards to the beginning of the covered region.
+  bool _init_to_zero;
+
+  // The portion [_unallocated_block, _sp.end()) of the space that
+  // is a single block known not to contain any objects.
+  // NOTE: See BlockOffsetArrayUseUnallocatedBlock flag.
+  HeapWord* _unallocated_block;
+
+  // Sets the entries
+  // corresponding to the cards starting at "start" and ending at "end"
+  // to point back to the card before "start": the interval [start, end)
+  // is right-open.
+  void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end);
+  // Same as above, except that the args here are a card _index_ interval
+  // that is closed: [start_index, end_index]
+  void set_remainder_to_point_to_start_incl(size_t start, size_t end);
+
+  // A helper function for BOT adjustment/verification work
+  void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action);
+
+protected:
+
+  ContiguousSpace* csp() const { return _csp; }
+
+  // Returns the address of a block whose start is at most "addr".
+  // If "has_max_index" is true, "assumes "max_index" is the last valid one
+  // in the array.
+  inline HeapWord* block_at_or_preceding(const void* addr,
+                                         bool has_max_index,
+                                         size_t max_index) const;
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  Does so without side
+  // effects (see, e.g., spec of  block_start.)
+  inline HeapWord*
+  forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                         const void* addr) const;
+
+  // "q" is a block boundary that is <= "addr"; return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  inline HeapWord* forward_to_block_containing_addr(HeapWord* q,
+                                                    const void* addr);
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  HeapWord* forward_to_block_containing_addr_slow(HeapWord* q,
+                                                  HeapWord* n,
+                                                  const void* addr);
+
+  // Requires that "*threshold_" be the first array entry boundary at or
+  // above "blk_start", and that "*index_" be the corresponding array
+  // index.  If the block starts at or crosses "*threshold_", records
+  // "blk_start" as the appropriate block start for the array index
+  // starting at "*threshold_", and for any other indices crossed by the
+  // block.  Updates "*threshold_" and "*index_" to correspond to the first
+  // index after the block end.
+  void alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                         HeapWord* blk_start, HeapWord* blk_end);
+
+public:
+  // The space may not have it's bottom and top set yet, which is why the
+  // region is passed as a parameter.  If "init_to_zero" is true, the
+  // elements of the array are initialized to zero.  Otherwise, they are
+  // initialized to point backwards to the beginning.
+  G1BlockOffsetArray(G1BlockOffsetSharedArray* array, MemRegion mr,
+                     bool init_to_zero);
+
+  // Note: this ought to be part of the constructor, but that would require
+  // "this" to be passed as a parameter to a member constructor for
+  // the containing concrete subtype of Space.
+  // This would be legal C++, but MS VC++ doesn't allow it.
+  void set_space(Space* sp);
+
+  // Resets the covered region to the given "mr".
+  void set_region(MemRegion mr);
+
+  // Resets the covered region to one with the same _bottom as before but
+  // the "new_word_size".
+  void resize(size_t new_word_size);
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".
+  virtual void alloc_block(HeapWord* blk_start, HeapWord* blk_end);
+  virtual void alloc_block(HeapWord* blk, size_t size) {
+    alloc_block(blk, blk + size);
+  }
+
+  // The following methods are useful and optimized for a
+  // general, non-contiguous space.
+
+  // The given arguments are required to be the starts of adjacent ("blk1"
+  // before "blk2") well-formed blocks covered by "this".  After this call,
+  // they should be considered to form one block.
+  virtual void join_blocks(HeapWord* blk1, HeapWord* blk2);
+
+  // Given a block [blk_start, blk_start + full_blk_size), and
+  // a left_blk_size < full_blk_size, adjust the BOT to show two
+  // blocks [blk_start, blk_start + left_blk_size) and
+  // [blk_start + left_blk_size, blk_start + full_blk_size).
+  // It is assumed (and verified in the non-product VM) that the
+  // BOT was correct for the original block.
+  void split_block(HeapWord* blk_start, size_t full_blk_size,
+                           size_t left_blk_size);
+
+  // Adjust the BOT to show that it has a single block in the
+  // range [blk_start, blk_start + size). All necessary BOT
+  // cards are adjusted, but _unallocated_block isn't.
+  void single_block(HeapWord* blk_start, HeapWord* blk_end);
+  void single_block(HeapWord* blk, size_t size) {
+    single_block(blk, blk + size);
+  }
+
+  // Adjust BOT to show that it has a block in the range
+  // [blk_start, blk_start + size). Only the first card
+  // of BOT is touched. It is assumed (and verified in the
+  // non-product VM) that the remaining cards of the block
+  // are correct.
+  void mark_block(HeapWord* blk_start, HeapWord* blk_end);
+  void mark_block(HeapWord* blk, size_t size) {
+    mark_block(blk, blk + size);
+  }
+
+  // Adjust _unallocated_block to indicate that a particular
+  // block has been newly allocated or freed. It is assumed (and
+  // verified in the non-product VM) that the BOT is correct for
+  // the given block.
+  inline void allocated(HeapWord* blk_start, HeapWord* blk_end) {
+    // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+    verify_single_block(blk_start, blk_end);
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      _unallocated_block = MAX2(_unallocated_block, blk_end);
+    }
+  }
+
+  inline void allocated(HeapWord* blk, size_t size) {
+    allocated(blk, blk + size);
+  }
+
+  inline void freed(HeapWord* blk_start, HeapWord* blk_end);
+
+  inline void freed(HeapWord* blk, size_t size);
+
+  virtual HeapWord* block_start_unsafe(const void* addr);
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const;
+
+  // Requires "addr" to be the start of a card and returns the
+  // start of the block that contains the given address.
+  HeapWord* block_start_careful(const void* addr) const;
+
+  // If true, initialize array slots with no allocated blocks to zero.
+  // Otherwise, make them point back to the front.
+  bool init_to_zero() { return _init_to_zero; }
+
+  // Verification & debugging - ensure that the offset table reflects the fact
+  // that the block [blk_start, blk_end) or [blk, blk + size) is a
+  // single block of storage. NOTE: can;t const this because of
+  // call to non-const do_block_internal() below.
+  inline void verify_single_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (VerifyBlockOffsetArray) {
+      do_block_internal(blk_start, blk_end, Action_check);
+    }
+  }
+
+  inline void verify_single_block(HeapWord* blk, size_t size) {
+    verify_single_block(blk, blk + size);
+  }
+
+  // Verify that the given block is before _unallocated_block
+  inline void verify_not_unallocated(HeapWord* blk_start,
+                                     HeapWord* blk_end) const {
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      assert(blk_start < blk_end, "Block inconsistency?");
+      assert(blk_end <= _unallocated_block, "_unallocated_block problem");
+    }
+  }
+
+  inline void verify_not_unallocated(HeapWord* blk, size_t size) const {
+    verify_not_unallocated(blk, blk + size);
+  }
+
+  void check_all_cards(size_t left_card, size_t right_card) const;
+};
+
+// A subtype of BlockOffsetArray that takes advantage of the fact
+// that its underlying space is a ContiguousSpace, so that its "active"
+// region can be more efficiently tracked (than for a non-contiguous space).
+class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray {
+  friend class VMStructs;
+
+  // allocation boundary at which offset array must be updated
+  HeapWord* _next_offset_threshold;
+  size_t    _next_offset_index;      // index corresponding to that boundary
+
+  // Work function to be called when allocation start crosses the next
+  // threshold in the contig space.
+  void alloc_block_work1(HeapWord* blk_start, HeapWord* blk_end) {
+    alloc_block_work2(&_next_offset_threshold, &_next_offset_index,
+                      blk_start, blk_end);
+  }
+
+
+ public:
+  G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr);
+
+  // Initialize the threshold to reflect the first boundary after the
+  // bottom of the covered region.
+  HeapWord* initialize_threshold();
+
+  // Zero out the entry for _bottom (offset will be zero).
+  void      zero_bottom_entry();
+
+  // Return the next threshold, the point at which the table should be
+  // updated.
+  HeapWord* threshold() const { return _next_offset_threshold; }
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".  In this
+  // implementation, that's true because NULL is represented as 0, and thus
+  // never exceeds the "_next_offset_threshold".
+  void alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (blk_end > _next_offset_threshold)
+      alloc_block_work1(blk_start, blk_end);
+  }
+  void alloc_block(HeapWord* blk, size_t size) {
+     alloc_block(blk, blk+size);
+  }
+
+  HeapWord* block_start_unsafe(const void* addr);
+  HeapWord* block_start_unsafe_const(const void* addr) const;
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline HeapWord*
+G1BlockOffsetTable::block_start_const(const void* addr) const {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe_const(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline size_t G1BlockOffsetSharedArray::index_for(const void* p) const {
+  char* pc = (char*)p;
+  assert(pc >= (char*)_reserved.start() &&
+         pc <  (char*)_reserved.end(),
+         "p not in range.");
+  size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char));
+  size_t result = delta >> LogN;
+  assert(result < _vs.committed_size(), "bad index from address");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetSharedArray::address_for_index(size_t index) const {
+  assert(index < _vs.committed_size(), "bad index");
+  HeapWord* result = _reserved.start() + (index << LogN_words);
+  assert(result >= _reserved.start() && result < _reserved.end(),
+         "bad address from index");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::block_at_or_preceding(const void* addr,
+                                          bool has_max_index,
+                                          size_t max_index) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+  size_t index = _array->index_for(addr);
+  // We must make sure that the offset table entry we use is valid.  If
+  // "addr" is past the end, start at the last known one and go forward.
+  if (has_max_index) {
+    index = MIN2(index, max_index);
+  }
+  HeapWord* q = _array->address_for_index(index);
+
+  uint offset = _array->offset_array(index);  // Extend u_char to uint.
+  while (offset >= N_words) {
+    // The excess of the offset from N_words indicates a power of Base
+    // to go back by.
+    size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
+    q -= (N_words * n_cards_back);
+    assert(q >= _sp->bottom(), "Went below bottom!");
+    index -= n_cards_back;
+    offset = _array->offset_array(index);
+  }
+  assert(offset < N_words, "offset too large");
+  q -= offset;
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::
+forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                       const void* addr) const {
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += obj->size();
+    }
+  } else {
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += _sp->block_size(q);
+    }
+  }
+  assert(q <= n, "wrong order for q and addr");
+  assert(addr < n, "wrong order for addr and n");
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
+                                                     const void* addr) {
+  if (oop(q)->klass() == NULL) return q;
+  HeapWord* n = q + _sp->block_size(q);
+  // In the normal case, where the query "addr" is a card boundary, and the
+  // offset table chunks are the same size as cards, the block starting at
+  // "q" will contain addr, so the test below will fail, and we'll fall
+  // through quickly.
+  if (n <= addr) {
+    q = forward_to_block_containing_addr_slow(q, n, addr);
+  }
+  assert(q <= addr, "wrong order for current and arg");
+  return q;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// BlockOffsetArrayNonContigSpace inlines
+//////////////////////////////////////////////////////////////////////////
+inline void G1BlockOffsetArray::freed(HeapWord* blk_start, HeapWord* blk_end) {
+  // Verify that the BOT shows [blk_start, blk_end) to be one block.
+  verify_single_block(blk_start, blk_end);
+  // adjust _unallocated_block upward or downward
+  // as appropriate
+  if (BlockOffsetArrayUseUnallocatedBlock) {
+    assert(_unallocated_block <= _end,
+           "Inconsistent value for _unallocated_block");
+    if (blk_end >= _unallocated_block && blk_start <= _unallocated_block) {
+      // CMS-specific note: a block abutting _unallocated_block to
+      // its left is being freed, a new block is being added or
+      // we are resetting following a compaction
+      _unallocated_block = blk_start;
+    }
+  }
+}
+
+inline void G1BlockOffsetArray::freed(HeapWord* blk, size_t size) {
+  freed(blk, blk + size);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,5497 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1CollectedHeap.cpp.incl"
+
+// turn it on so that the contents of the young list (scan-only /
+// to-be-collected) are printed at "strategic" points before / during
+// / after the collection --- this is useful for debugging
+#define SCAN_ONLY_VERBOSE 0
+// CURRENT STATUS
+// This file is under construction.  Search for "FIXME".
+
+// INVARIANTS/NOTES
+//
+// All allocation activity covered by the G1CollectedHeap interface is
+//   serialized by acquiring the HeapLock.  This happens in
+//   mem_allocate_work, which all such allocation functions call.
+//   (Note that this does not apply to TLAB allocation, which is not part
+//   of this interface: it is done by clients of this interface.)
+
+// Local to this file.
+
+// Finds the first HeapRegion.
+// No longer used, but might be handy someday.
+
+class FindFirstRegionClosure: public HeapRegionClosure {
+  HeapRegion* _a_region;
+public:
+  FindFirstRegionClosure() : _a_region(NULL) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _a_region = r;
+    return true;
+  }
+  HeapRegion* result() { return _a_region; }
+};
+
+
+class RefineCardTableEntryClosure: public CardTableEntryClosure {
+  SuspendibleThreadSet* _sts;
+  G1RemSet* _g1rs;
+  ConcurrentG1Refine* _cg1r;
+  bool _concurrent;
+public:
+  RefineCardTableEntryClosure(SuspendibleThreadSet* sts,
+                              G1RemSet* g1rs,
+                              ConcurrentG1Refine* cg1r) :
+    _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
+  {}
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    _g1rs->concurrentRefineOneCard(card_ptr, worker_i);
+    if (_concurrent && _sts->should_yield()) {
+      // Caller will actually yield.
+      return false;
+    }
+    // Otherwise, we finished successfully; return true.
+    return true;
+  }
+  void set_concurrent(bool b) { _concurrent = b; }
+};
+
+
+class ClearLoggedCardTableEntryClosure: public CardTableEntryClosure {
+  int _calls;
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+  int _histo[256];
+public:
+  ClearLoggedCardTableEntryClosure() :
+    _calls(0)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _ctbs = (CardTableModRefBS*)_g1h->barrier_set();
+    for (int i = 0; i < 256; i++) _histo[i] = 0;
+  }
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
+      _calls++;
+      unsigned char* ujb = (unsigned char*)card_ptr;
+      int ind = (int)(*ujb);
+      _histo[ind]++;
+      *card_ptr = -1;
+    }
+    return true;
+  }
+  int calls() { return _calls; }
+  void print_histo() {
+    gclog_or_tty->print_cr("Card table value histogram:");
+    for (int i = 0; i < 256; i++) {
+      if (_histo[i] != 0) {
+        gclog_or_tty->print_cr("  %d: %d", i, _histo[i]);
+      }
+    }
+  }
+};
+
+class RedirtyLoggedCardTableEntryClosure: public CardTableEntryClosure {
+  int _calls;
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+public:
+  RedirtyLoggedCardTableEntryClosure() :
+    _calls(0)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _ctbs = (CardTableModRefBS*)_g1h->barrier_set();
+  }
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
+      _calls++;
+      *card_ptr = 0;
+    }
+    return true;
+  }
+  int calls() { return _calls; }
+};
+
+YoungList::YoungList(G1CollectedHeap* g1h)
+  : _g1h(g1h), _head(NULL),
+    _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL),
+    _length(0), _scan_only_length(0),
+    _last_sampled_rs_lengths(0),
+    _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0)
+{
+  guarantee( check_list_empty(false), "just making sure..." );
+}
+
+void YoungList::push_region(HeapRegion *hr) {
+  assert(!hr->is_young(), "should not already be young");
+  assert(hr->get_next_young_region() == NULL, "cause it should!");
+
+  hr->set_next_young_region(_head);
+  _head = hr;
+
+  hr->set_young();
+  double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length);
+  ++_length;
+}
+
+void YoungList::add_survivor_region(HeapRegion* hr) {
+  assert(!hr->is_survivor(), "should not already be for survived");
+  assert(hr->get_next_young_region() == NULL, "cause it should!");
+
+  hr->set_next_young_region(_survivor_head);
+  if (_survivor_head == NULL) {
+    _survivors_tail = hr;
+  }
+  _survivor_head = hr;
+
+  hr->set_survivor();
+  ++_survivor_length;
+}
+
+HeapRegion* YoungList::pop_region() {
+  while (_head != NULL) {
+    assert( length() > 0, "list should not be empty" );
+    HeapRegion* ret = _head;
+    _head = ret->get_next_young_region();
+    ret->set_next_young_region(NULL);
+    --_length;
+    assert(ret->is_young(), "region should be very young");
+
+    // Replace 'Survivor' region type with 'Young'. So the region will
+    // be treated as a young region and will not be 'confused' with
+    // newly created survivor regions.
+    if (ret->is_survivor()) {
+      ret->set_young();
+    }
+
+    if (!ret->is_scan_only()) {
+      return ret;
+    }
+
+    // scan-only, we'll add it to the scan-only list
+    if (_scan_only_tail == NULL) {
+      guarantee( _scan_only_head == NULL, "invariant" );
+
+      _scan_only_head = ret;
+      _curr_scan_only = ret;
+    } else {
+      guarantee( _scan_only_head != NULL, "invariant" );
+      _scan_only_tail->set_next_young_region(ret);
+    }
+    guarantee( ret->get_next_young_region() == NULL, "invariant" );
+    _scan_only_tail = ret;
+
+    // no need to be tagged as scan-only any more
+    ret->set_young();
+
+    ++_scan_only_length;
+  }
+  assert( length() == 0, "list should be empty" );
+  return NULL;
+}
+
+void YoungList::empty_list(HeapRegion* list) {
+  while (list != NULL) {
+    HeapRegion* next = list->get_next_young_region();
+    list->set_next_young_region(NULL);
+    list->uninstall_surv_rate_group();
+    list->set_not_young();
+    list = next;
+  }
+}
+
+void YoungList::empty_list() {
+  assert(check_list_well_formed(), "young list should be well formed");
+
+  empty_list(_head);
+  _head = NULL;
+  _length = 0;
+
+  empty_list(_scan_only_head);
+  _scan_only_head = NULL;
+  _scan_only_tail = NULL;
+  _scan_only_length = 0;
+  _curr_scan_only = NULL;
+
+  empty_list(_survivor_head);
+  _survivor_head = NULL;
+  _survivors_tail = NULL;
+  _survivor_length = 0;
+
+  _last_sampled_rs_lengths = 0;
+
+  assert(check_list_empty(false), "just making sure...");
+}
+
+bool YoungList::check_list_well_formed() {
+  bool ret = true;
+
+  size_t length = 0;
+  HeapRegion* curr = _head;
+  HeapRegion* last = NULL;
+  while (curr != NULL) {
+    if (!curr->is_young() || curr->is_scan_only()) {
+      gclog_or_tty->print_cr("### YOUNG REGION "PTR_FORMAT"-"PTR_FORMAT" "
+                             "incorrectly tagged (%d, %d)",
+                             curr->bottom(), curr->end(),
+                             curr->is_young(), curr->is_scan_only());
+      ret = false;
+    }
+    ++length;
+    last = curr;
+    curr = curr->get_next_young_region();
+  }
+  ret = ret && (length == _length);
+
+  if (!ret) {
+    gclog_or_tty->print_cr("### YOUNG LIST seems not well formed!");
+    gclog_or_tty->print_cr("###   list has %d entries, _length is %d",
+                           length, _length);
+  }
+
+  bool scan_only_ret = true;
+  length = 0;
+  curr = _scan_only_head;
+  last = NULL;
+  while (curr != NULL) {
+    if (!curr->is_young() || curr->is_scan_only()) {
+      gclog_or_tty->print_cr("### SCAN-ONLY REGION "PTR_FORMAT"-"PTR_FORMAT" "
+                             "incorrectly tagged (%d, %d)",
+                             curr->bottom(), curr->end(),
+                             curr->is_young(), curr->is_scan_only());
+      scan_only_ret = false;
+    }
+    ++length;
+    last = curr;
+    curr = curr->get_next_young_region();
+  }
+  scan_only_ret = scan_only_ret && (length == _scan_only_length);
+
+  if ( (last != _scan_only_tail) ||
+       (_scan_only_head == NULL && _scan_only_tail != NULL) ||
+       (_scan_only_head != NULL && _scan_only_tail == NULL) ) {
+     gclog_or_tty->print_cr("## _scan_only_tail is set incorrectly");
+     scan_only_ret = false;
+  }
+
+  if (_curr_scan_only != NULL && _curr_scan_only != _scan_only_head) {
+    gclog_or_tty->print_cr("### _curr_scan_only is set incorrectly");
+    scan_only_ret = false;
+   }
+
+  if (!scan_only_ret) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST seems not well formed!");
+    gclog_or_tty->print_cr("###   list has %d entries, _scan_only_length is %d",
+                  length, _scan_only_length);
+  }
+
+  return ret && scan_only_ret;
+}
+
+bool YoungList::check_list_empty(bool ignore_scan_only_list,
+                                 bool check_sample) {
+  bool ret = true;
+
+  if (_length != 0) {
+    gclog_or_tty->print_cr("### YOUNG LIST should have 0 length, not %d",
+                  _length);
+    ret = false;
+  }
+  if (check_sample && _last_sampled_rs_lengths != 0) {
+    gclog_or_tty->print_cr("### YOUNG LIST has non-zero last sampled RS lengths");
+    ret = false;
+  }
+  if (_head != NULL) {
+    gclog_or_tty->print_cr("### YOUNG LIST does not have a NULL head");
+    ret = false;
+  }
+  if (!ret) {
+    gclog_or_tty->print_cr("### YOUNG LIST does not seem empty");
+  }
+
+  if (ignore_scan_only_list)
+    return ret;
+
+  bool scan_only_ret = true;
+  if (_scan_only_length != 0) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST should have 0 length, not %d",
+                  _scan_only_length);
+    scan_only_ret = false;
+  }
+  if (_scan_only_head != NULL) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL head");
+     scan_only_ret = false;
+  }
+  if (_scan_only_tail != NULL) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL tail");
+    scan_only_ret = false;
+  }
+  if (!scan_only_ret) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not seem empty");
+  }
+
+  return ret && scan_only_ret;
+}
+
+void
+YoungList::rs_length_sampling_init() {
+  _sampled_rs_lengths = 0;
+  _curr               = _head;
+}
+
+bool
+YoungList::rs_length_sampling_more() {
+  return _curr != NULL;
+}
+
+void
+YoungList::rs_length_sampling_next() {
+  assert( _curr != NULL, "invariant" );
+  _sampled_rs_lengths += _curr->rem_set()->occupied();
+  _curr = _curr->get_next_young_region();
+  if (_curr == NULL) {
+    _last_sampled_rs_lengths = _sampled_rs_lengths;
+    // gclog_or_tty->print_cr("last sampled RS lengths = %d", _last_sampled_rs_lengths);
+  }
+}
+
+void
+YoungList::reset_auxilary_lists() {
+  // We could have just "moved" the scan-only list to the young list.
+  // However, the scan-only list is ordered according to the region
+  // age in descending order, so, by moving one entry at a time, we
+  // ensure that it is recreated in ascending order.
+
+  guarantee( is_empty(), "young list should be empty" );
+  assert(check_list_well_formed(), "young list should be well formed");
+
+  // Add survivor regions to SurvRateGroup.
+  _g1h->g1_policy()->note_start_adding_survivor_regions();
+  for (HeapRegion* curr = _survivor_head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    _g1h->g1_policy()->set_region_survivors(curr);
+  }
+  _g1h->g1_policy()->note_stop_adding_survivor_regions();
+
+  if (_survivor_head != NULL) {
+    _head           = _survivor_head;
+    _length         = _survivor_length + _scan_only_length;
+    _survivors_tail->set_next_young_region(_scan_only_head);
+  } else {
+    _head           = _scan_only_head;
+    _length         = _scan_only_length;
+  }
+
+  for (HeapRegion* curr = _scan_only_head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    curr->recalculate_age_in_surv_rate_group();
+  }
+  _scan_only_head   = NULL;
+  _scan_only_tail   = NULL;
+  _scan_only_length = 0;
+  _curr_scan_only   = NULL;
+
+  _survivor_head    = NULL;
+  _survivors_tail   = NULL;
+  _survivor_length  = 0;
+  _g1h->g1_policy()->finished_recalculating_age_indexes();
+
+  assert(check_list_well_formed(), "young list should be well formed");
+}
+
+void YoungList::print() {
+  HeapRegion* lists[] = {_head,   _scan_only_head, _survivor_head};
+  const char* names[] = {"YOUNG", "SCAN-ONLY",     "SURVIVOR"};
+
+  for (unsigned int list = 0; list < ARRAY_SIZE(lists); ++list) {
+    gclog_or_tty->print_cr("%s LIST CONTENTS", names[list]);
+    HeapRegion *curr = lists[list];
+    if (curr == NULL)
+      gclog_or_tty->print_cr("  empty");
+    while (curr != NULL) {
+      gclog_or_tty->print_cr("  [%08x-%08x], t: %08x, P: %08x, N: %08x, C: %08x, "
+                             "age: %4d, y: %d, s-o: %d, surv: %d",
+                             curr->bottom(), curr->end(),
+                             curr->top(),
+                             curr->prev_top_at_mark_start(),
+                             curr->next_top_at_mark_start(),
+                             curr->top_at_conc_mark_count(),
+                             curr->age_in_surv_rate_group_cond(),
+                             curr->is_young(),
+                             curr->is_scan_only(),
+                             curr->is_survivor());
+      curr = curr->get_next_young_region();
+    }
+  }
+
+  gclog_or_tty->print_cr("");
+}
+
+void G1CollectedHeap::stop_conc_gc_threads() {
+  _cg1r->cg1rThread()->stop();
+  _czft->stop();
+  _cmThread->stop();
+}
+
+
+void G1CollectedHeap::check_ct_logs_at_safepoint() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set();
+
+  // Count the dirty cards at the start.
+  CountNonCleanMemRegionClosure count1(this);
+  ct_bs->mod_card_iterate(&count1);
+  int orig_count = count1.n();
+
+  // First clear the logged cards.
+  ClearLoggedCardTableEntryClosure clear;
+  dcqs.set_closure(&clear);
+  dcqs.apply_closure_to_all_completed_buffers();
+  dcqs.iterate_closure_all_threads(false);
+  clear.print_histo();
+
+  // Now ensure that there's no dirty cards.
+  CountNonCleanMemRegionClosure count2(this);
+  ct_bs->mod_card_iterate(&count2);
+  if (count2.n() != 0) {
+    gclog_or_tty->print_cr("Card table has %d entries; %d originally",
+                           count2.n(), orig_count);
+  }
+  guarantee(count2.n() == 0, "Card table should be clean.");
+
+  RedirtyLoggedCardTableEntryClosure redirty;
+  JavaThread::dirty_card_queue_set().set_closure(&redirty);
+  dcqs.apply_closure_to_all_completed_buffers();
+  dcqs.iterate_closure_all_threads(false);
+  gclog_or_tty->print_cr("Log entries = %d, dirty cards = %d.",
+                         clear.calls(), orig_count);
+  guarantee(redirty.calls() == clear.calls(),
+            "Or else mechanism is broken.");
+
+  CountNonCleanMemRegionClosure count3(this);
+  ct_bs->mod_card_iterate(&count3);
+  if (count3.n() != orig_count) {
+    gclog_or_tty->print_cr("Should have restored them all: orig = %d, final = %d.",
+                           orig_count, count3.n());
+    guarantee(count3.n() >= orig_count, "Should have restored them all.");
+  }
+
+  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
+}
+
+// Private class members.
+
+G1CollectedHeap* G1CollectedHeap::_g1h;
+
+// Private methods.
+
+// Finds a HeapRegion that can be used to allocate a given size of block.
+
+
+HeapRegion* G1CollectedHeap::newAllocRegion_work(size_t word_size,
+                                                 bool do_expand,
+                                                 bool zero_filled) {
+  ConcurrentZFThread::note_region_alloc();
+  HeapRegion* res = alloc_free_region_from_lists(zero_filled);
+  if (res == NULL && do_expand) {
+    expand(word_size * HeapWordSize);
+    res = alloc_free_region_from_lists(zero_filled);
+    assert(res == NULL ||
+           (!res->isHumongous() &&
+            (!zero_filled ||
+             res->zero_fill_state() == HeapRegion::Allocated)),
+           "Alloc Regions must be zero filled (and non-H)");
+  }
+  if (res != NULL && res->is_empty()) _free_regions--;
+  assert(res == NULL ||
+         (!res->isHumongous() &&
+          (!zero_filled ||
+           res->zero_fill_state() == HeapRegion::Allocated)),
+         "Non-young alloc Regions must be zero filled (and non-H)");
+
+  if (G1TraceRegions) {
+    if (res != NULL) {
+      gclog_or_tty->print_cr("new alloc region %d:["PTR_FORMAT", "PTR_FORMAT"], "
+                             "top "PTR_FORMAT,
+                             res->hrs_index(), res->bottom(), res->end(), res->top());
+    }
+  }
+
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::newAllocRegionWithExpansion(int purpose,
+                                                         size_t word_size,
+                                                         bool zero_filled) {
+  HeapRegion* alloc_region = NULL;
+  if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
+    alloc_region = newAllocRegion_work(word_size, true, zero_filled);
+    if (purpose == GCAllocForSurvived && alloc_region != NULL) {
+      _young_list->add_survivor_region(alloc_region);
+    }
+    ++_gc_alloc_region_counts[purpose];
+  } else {
+    g1_policy()->note_alloc_region_limit_reached(purpose);
+  }
+  return alloc_region;
+}
+
+// If could fit into free regions w/o expansion, try.
+// Otherwise, if can expand, do so.
+// Otherwise, if using ex regions might help, try with ex given back.
+HeapWord* G1CollectedHeap::humongousObjAllocate(size_t word_size) {
+  assert(regions_accounted_for(), "Region leakage!");
+
+  // We can't allocate H regions while cleanupComplete is running, since
+  // some of the regions we find to be empty might not yet be added to the
+  // unclean list.  (If we're already at a safepoint, this call is
+  // unnecessary, not to mention wrong.)
+  if (!SafepointSynchronize::is_at_safepoint())
+    wait_for_cleanup_complete();
+
+  size_t num_regions =
+    round_to(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords;
+
+  // Special case if < one region???
+
+  // Remember the ft size.
+  size_t x_size = expansion_regions();
+
+  HeapWord* res = NULL;
+  bool eliminated_allocated_from_lists = false;
+
+  // Can the allocation potentially fit in the free regions?
+  if (free_regions() >= num_regions) {
+    res = _hrs->obj_allocate(word_size);
+  }
+  if (res == NULL) {
+    // Try expansion.
+    size_t fs = _hrs->free_suffix();
+    if (fs + x_size >= num_regions) {
+      expand((num_regions - fs) * HeapRegion::GrainBytes);
+      res = _hrs->obj_allocate(word_size);
+      assert(res != NULL, "This should have worked.");
+    } else {
+      // Expansion won't help.  Are there enough free regions if we get rid
+      // of reservations?
+      size_t avail = free_regions();
+      if (avail >= num_regions) {
+        res = _hrs->obj_allocate(word_size);
+        if (res != NULL) {
+          remove_allocated_regions_from_lists();
+          eliminated_allocated_from_lists = true;
+        }
+      }
+    }
+  }
+  if (res != NULL) {
+    // Increment by the number of regions allocated.
+    // FIXME: Assumes regions all of size GrainBytes.
+#ifndef PRODUCT
+    mr_bs()->verify_clean_region(MemRegion(res, res + num_regions *
+                                           HeapRegion::GrainWords));
+#endif
+    if (!eliminated_allocated_from_lists)
+      remove_allocated_regions_from_lists();
+    _summary_bytes_used += word_size * HeapWordSize;
+    _free_regions -= num_regions;
+    _num_humongous_regions += (int) num_regions;
+  }
+  assert(regions_accounted_for(), "Region Leakage");
+  return res;
+}
+
+HeapWord*
+G1CollectedHeap::attempt_allocation_slow(size_t word_size,
+                                         bool permit_collection_pause) {
+  HeapWord* res = NULL;
+  HeapRegion* allocated_young_region = NULL;
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          Heap_lock->owned_by_self(), "pre condition of the call" );
+
+  if (isHumongous(word_size)) {
+    // Allocation of a humongous object can, in a sense, complete a
+    // partial region, if the previous alloc was also humongous, and
+    // caused the test below to succeed.
+    if (permit_collection_pause)
+      do_collection_pause_if_appropriate(word_size);
+    res = humongousObjAllocate(word_size);
+    assert(_cur_alloc_region == NULL
+           || !_cur_alloc_region->isHumongous(),
+           "Prevent a regression of this bug.");
+
+  } else {
+    // We may have concurrent cleanup working at the time. Wait for it
+    // to complete. In the future we would probably want to make the
+    // concurrent cleanup truly concurrent by decoupling it from the
+    // allocation.
+    if (!SafepointSynchronize::is_at_safepoint())
+      wait_for_cleanup_complete();
+    // If we do a collection pause, this will be reset to a non-NULL
+    // value.  If we don't, nulling here ensures that we allocate a new
+    // region below.
+    if (_cur_alloc_region != NULL) {
+      // We're finished with the _cur_alloc_region.
+      _summary_bytes_used += _cur_alloc_region->used();
+      _cur_alloc_region = NULL;
+    }
+    assert(_cur_alloc_region == NULL, "Invariant.");
+    // Completion of a heap region is perhaps a good point at which to do
+    // a collection pause.
+    if (permit_collection_pause)
+      do_collection_pause_if_appropriate(word_size);
+    // Make sure we have an allocation region available.
+    if (_cur_alloc_region == NULL) {
+      if (!SafepointSynchronize::is_at_safepoint())
+        wait_for_cleanup_complete();
+      bool next_is_young = should_set_young_locked();
+      // If the next region is not young, make sure it's zero-filled.
+      _cur_alloc_region = newAllocRegion(word_size, !next_is_young);
+      if (_cur_alloc_region != NULL) {
+        _summary_bytes_used -= _cur_alloc_region->used();
+        if (next_is_young) {
+          set_region_short_lived_locked(_cur_alloc_region);
+          allocated_young_region = _cur_alloc_region;
+        }
+      }
+    }
+    assert(_cur_alloc_region == NULL || !_cur_alloc_region->isHumongous(),
+           "Prevent a regression of this bug.");
+
+    // Now retry the allocation.
+    if (_cur_alloc_region != NULL) {
+      res = _cur_alloc_region->allocate(word_size);
+    }
+  }
+
+  // NOTE: fails frequently in PRT
+  assert(regions_accounted_for(), "Region leakage!");
+
+  if (res != NULL) {
+    if (!SafepointSynchronize::is_at_safepoint()) {
+      assert( permit_collection_pause, "invariant" );
+      assert( Heap_lock->owned_by_self(), "invariant" );
+      Heap_lock->unlock();
+    }
+
+    if (allocated_young_region != NULL) {
+      HeapRegion* hr = allocated_young_region;
+      HeapWord* bottom = hr->bottom();
+      HeapWord* end = hr->end();
+      MemRegion mr(bottom, end);
+      ((CardTableModRefBS*)_g1h->barrier_set())->dirty(mr);
+    }
+  }
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          (res == NULL && Heap_lock->owned_by_self()) ||
+          (res != NULL && !Heap_lock->owned_by_self()),
+          "post condition of the call" );
+
+  return res;
+}
+
+HeapWord*
+G1CollectedHeap::mem_allocate(size_t word_size,
+                              bool   is_noref,
+                              bool   is_tlab,
+                              bool* gc_overhead_limit_was_exceeded) {
+  debug_only(check_for_valid_allocation_state());
+  assert(no_gc_in_progress(), "Allocation during gc not allowed");
+  HeapWord* result = NULL;
+
+  // Loop until the allocation is satisified,
+  // or unsatisfied after GC.
+  for (int try_count = 1; /* return or throw */; try_count += 1) {
+    int gc_count_before;
+    {
+      Heap_lock->lock();
+      result = attempt_allocation(word_size);
+      if (result != NULL) {
+        // attempt_allocation should have unlocked the heap lock
+        assert(is_in(result), "result not in heap");
+        return result;
+      }
+      // Read the gc count while the heap lock is held.
+      gc_count_before = SharedHeap::heap()->total_collections();
+      Heap_lock->unlock();
+    }
+
+    // Create the garbage collection operation...
+    VM_G1CollectForAllocation op(word_size,
+                                 gc_count_before);
+
+    // ...and get the VM thread to execute it.
+    VMThread::execute(&op);
+    if (op.prologue_succeeded()) {
+      result = op.result();
+      assert(result == NULL || is_in(result), "result not in heap");
+      return result;
+    }
+
+    // Give a warning if we seem to be looping forever.
+    if ((QueuedAllocationWarningCount > 0) &&
+        (try_count % QueuedAllocationWarningCount == 0)) {
+      warning("G1CollectedHeap::mem_allocate_work retries %d times",
+              try_count);
+    }
+  }
+}
+
+void G1CollectedHeap::abandon_cur_alloc_region() {
+  if (_cur_alloc_region != NULL) {
+    // We're finished with the _cur_alloc_region.
+    if (_cur_alloc_region->is_empty()) {
+      _free_regions++;
+      free_region(_cur_alloc_region);
+    } else {
+      _summary_bytes_used += _cur_alloc_region->used();
+    }
+    _cur_alloc_region = NULL;
+  }
+}
+
+class PostMCRemSetClearClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mr_bs;
+public:
+  PostMCRemSetClearClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
+  bool doHeapRegion(HeapRegion* r) {
+    r->reset_gc_time_stamp();
+    if (r->continuesHumongous())
+      return false;
+    HeapRegionRemSet* hrrs = r->rem_set();
+    if (hrrs != NULL) hrrs->clear();
+    // You might think here that we could clear just the cards
+    // corresponding to the used region.  But no: if we leave a dirty card
+    // in a region we might allocate into, then it would prevent that card
+    // from being enqueued, and cause it to be missed.
+    // Re: the performance cost: we shouldn't be doing full GC anyway!
+    _mr_bs->clear(MemRegion(r->bottom(), r->end()));
+    return false;
+  }
+};
+
+
+class PostMCRemSetInvalidateClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mr_bs;
+public:
+  PostMCRemSetInvalidateClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->used_region().word_size() != 0) {
+      _mr_bs->invalidate(r->used_region(), true /*whole heap*/);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
+                                    size_t word_size) {
+  ResourceMark rm;
+
+  if (full && DisableExplicitGC) {
+    gclog_or_tty->print("\n\n\nDisabling Explicit GC\n\n\n");
+    return;
+  }
+
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
+  assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
+
+  if (GC_locker::is_active()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
+  {
+    IsGCActiveMark x;
+
+    // Timing
+    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+    TraceTime t(full ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty);
+
+    double start = os::elapsedTime();
+    GCOverheadReporter::recordSTWStart(start);
+    g1_policy()->record_full_collection_start();
+
+    gc_prologue(true);
+    increment_total_collections();
+
+    size_t g1h_prev_used = used();
+    assert(used() == recalculate_used(), "Should be equal");
+
+    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      prepare_for_verify();
+      gclog_or_tty->print(" VerifyBeforeGC:");
+      Universe::verify(true);
+    }
+    assert(regions_accounted_for(), "Region leakage!");
+
+    COMPILER2_PRESENT(DerivedPointerTable::clear());
+
+    // We want to discover references, but not process them yet.
+    // This mode is disabled in
+    // instanceRefKlass::process_discovered_references if the
+    // generation does some collection work, or
+    // instanceRefKlass::enqueue_discovered_references if the
+    // generation returns without doing any work.
+    ref_processor()->disable_discovery();
+    ref_processor()->abandon_partial_discovery();
+    ref_processor()->verify_no_references_recorded();
+
+    // Abandon current iterations of concurrent marking and concurrent
+    // refinement, if any are in progress.
+    concurrent_mark()->abort();
+
+    // Make sure we'll choose a new allocation region afterwards.
+    abandon_cur_alloc_region();
+    assert(_cur_alloc_region == NULL, "Invariant.");
+    g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS();
+    tear_down_region_lists();
+    set_used_regions_to_need_zero_fill();
+    if (g1_policy()->in_young_gc_mode()) {
+      empty_young_list();
+      g1_policy()->set_full_young_gcs(true);
+    }
+
+    // Temporarily make reference _discovery_ single threaded (non-MT).
+    ReferenceProcessorMTMutator rp_disc_ser(ref_processor(), false);
+
+    // Temporarily make refs discovery atomic
+    ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true);
+
+    // Temporarily clear _is_alive_non_header
+    ReferenceProcessorIsAliveMutator rp_is_alive_null(ref_processor(), NULL);
+
+    ref_processor()->enable_discovery();
+
+    // Do collection work
+    {
+      HandleMark hm;  // Discard invalid handles created during gc
+      G1MarkSweep::invoke_at_safepoint(ref_processor(), clear_all_soft_refs);
+    }
+    // Because freeing humongous regions may have added some unclean
+    // regions, it is necessary to tear down again before rebuilding.
+    tear_down_region_lists();
+    rebuild_region_lists();
+
+    _summary_bytes_used = recalculate_used();
+
+    ref_processor()->enqueue_discovered_references();
+
+    COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+
+    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      gclog_or_tty->print(" VerifyAfterGC:");
+      Universe::verify(false);
+    }
+    NOT_PRODUCT(ref_processor()->verify_no_references_recorded());
+
+    reset_gc_time_stamp();
+    // Since everything potentially moved, we will clear all remembered
+    // sets, and clear all cards.  Later we will also cards in the used
+    // portion of the heap after the resizing (which could be a shrinking.)
+    // We will also reset the GC time stamps of the regions.
+    PostMCRemSetClearClosure rs_clear(mr_bs());
+    heap_region_iterate(&rs_clear);
+
+    // Resize the heap if necessary.
+    resize_if_necessary_after_full_collection(full ? 0 : word_size);
+
+    // Since everything potentially moved, we will clear all remembered
+    // sets, but also dirty all cards corresponding to used regions.
+    PostMCRemSetInvalidateClosure rs_invalidate(mr_bs());
+    heap_region_iterate(&rs_invalidate);
+    if (_cg1r->use_cache()) {
+      _cg1r->clear_and_record_card_counts();
+      _cg1r->clear_hot_cache();
+    }
+
+    if (PrintGC) {
+      print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity());
+    }
+
+    if (true) { // FIXME
+      // Ask the permanent generation to adjust size for full collections
+      perm()->compute_new_size();
+    }
+
+    double end = os::elapsedTime();
+    GCOverheadReporter::recordSTWEnd(end);
+    g1_policy()->record_full_collection_end();
+
+    gc_epilogue(true);
+
+    // Abandon concurrent refinement.  This must happen last: in the
+    // dirty-card logging system, some cards may be dirty by weak-ref
+    // processing, and may be enqueued.  But the whole card table is
+    // dirtied, so this should abandon those logs, and set "do_traversal"
+    // to true.
+    concurrent_g1_refine()->set_pya_restart();
+
+    assert(regions_accounted_for(), "Region leakage!");
+  }
+
+  if (g1_policy()->in_young_gc_mode()) {
+    _young_list->reset_sampled_info();
+    assert( check_young_list_empty(false, false),
+            "young list should be empty at this point");
+  }
+}
+
+void G1CollectedHeap::do_full_collection(bool clear_all_soft_refs) {
+  do_collection(true, clear_all_soft_refs, 0);
+}
+
+// This code is mostly copied from TenuredGeneration.
+void
+G1CollectedHeap::
+resize_if_necessary_after_full_collection(size_t word_size) {
+  assert(MinHeapFreeRatio <= MaxHeapFreeRatio, "sanity check");
+
+  // Include the current allocation, if any, and bytes that will be
+  // pre-allocated to support collections, as "used".
+  const size_t used_after_gc = used();
+  const size_t capacity_after_gc = capacity();
+  const size_t free_after_gc = capacity_after_gc - used_after_gc;
+
+  // We don't have floating point command-line arguments
+  const double minimum_free_percentage = (double) MinHeapFreeRatio / 100;
+  const double maximum_used_percentage = 1.0 - minimum_free_percentage;
+  const double maximum_free_percentage = (double) MaxHeapFreeRatio / 100;
+  const double minimum_used_percentage = 1.0 - maximum_free_percentage;
+
+  size_t minimum_desired_capacity = (size_t) (used_after_gc / maximum_used_percentage);
+  size_t maximum_desired_capacity = (size_t) (used_after_gc / minimum_used_percentage);
+
+  // Don't shrink less than the initial size.
+  minimum_desired_capacity =
+    MAX2(minimum_desired_capacity,
+         collector_policy()->initial_heap_byte_size());
+  maximum_desired_capacity =
+    MAX2(maximum_desired_capacity,
+         collector_policy()->initial_heap_byte_size());
+
+  // We are failing here because minimum_desired_capacity is
+  assert(used_after_gc <= minimum_desired_capacity, "sanity check");
+  assert(minimum_desired_capacity <= maximum_desired_capacity, "sanity check");
+
+  if (PrintGC && Verbose) {
+    const double free_percentage = ((double)free_after_gc) / capacity();
+    gclog_or_tty->print_cr("Computing new size after full GC ");
+    gclog_or_tty->print_cr("  "
+                           "  minimum_free_percentage: %6.2f",
+                           minimum_free_percentage);
+    gclog_or_tty->print_cr("  "
+                           "  maximum_free_percentage: %6.2f",
+                           maximum_free_percentage);
+    gclog_or_tty->print_cr("  "
+                           "  capacity: %6.1fK"
+                           "  minimum_desired_capacity: %6.1fK"
+                           "  maximum_desired_capacity: %6.1fK",
+                           capacity() / (double) K,
+                           minimum_desired_capacity / (double) K,
+                           maximum_desired_capacity / (double) K);
+    gclog_or_tty->print_cr("  "
+                           "   free_after_gc   : %6.1fK"
+                           "   used_after_gc   : %6.1fK",
+                           free_after_gc / (double) K,
+                           used_after_gc / (double) K);
+    gclog_or_tty->print_cr("  "
+                           "   free_percentage: %6.2f",
+                           free_percentage);
+  }
+  if (capacity() < minimum_desired_capacity) {
+    // Don't expand unless it's significant
+    size_t expand_bytes = minimum_desired_capacity - capacity_after_gc;
+    expand(expand_bytes);
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("    expanding:"
+                             "  minimum_desired_capacity: %6.1fK"
+                             "  expand_bytes: %6.1fK",
+                             minimum_desired_capacity / (double) K,
+                             expand_bytes / (double) K);
+    }
+
+    // No expansion, now see if we want to shrink
+  } else if (capacity() > maximum_desired_capacity) {
+    // Capacity too large, compute shrinking size
+    size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity;
+    shrink(shrink_bytes);
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("  "
+                             "  shrinking:"
+                             "  initSize: %.1fK"
+                             "  maximum_desired_capacity: %.1fK",
+                             collector_policy()->initial_heap_byte_size() / (double) K,
+                             maximum_desired_capacity / (double) K);
+      gclog_or_tty->print_cr("  "
+                             "  shrink_bytes: %.1fK",
+                             shrink_bytes / (double) K);
+    }
+  }
+}
+
+
+HeapWord*
+G1CollectedHeap::satisfy_failed_allocation(size_t word_size) {
+  HeapWord* result = NULL;
+
+  // In a G1 heap, we're supposed to keep allocation from failing by
+  // incremental pauses.  Therefore, at least for now, we'll favor
+  // expansion over collection.  (This might change in the future if we can
+  // do something smarter than full collection to satisfy a failed alloc.)
+
+  result = expand_and_allocate(word_size);
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // OK, I guess we have to try collection.
+
+  do_collection(false, false, word_size);
+
+  result = attempt_allocation(word_size, /*permit_collection_pause*/false);
+
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // Try collecting soft references.
+  do_collection(false, true, word_size);
+  result = attempt_allocation(word_size, /*permit_collection_pause*/false);
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // What else?  We might try synchronous finalization later.  If the total
+  // space available is large enough for the allocation, then a more
+  // complete compaction phase than we've tried so far might be
+  // appropriate.
+  return NULL;
+}
+
+// Attempting to expand the heap sufficiently
+// to support an allocation of the given "word_size".  If
+// successful, perform the allocation and return the address of the
+// allocated block, or else "NULL".
+
+HeapWord* G1CollectedHeap::expand_and_allocate(size_t word_size) {
+  size_t expand_bytes = word_size * HeapWordSize;
+  if (expand_bytes < MinHeapDeltaBytes) {
+    expand_bytes = MinHeapDeltaBytes;
+  }
+  expand(expand_bytes);
+  assert(regions_accounted_for(), "Region leakage!");
+  HeapWord* result = attempt_allocation(word_size, false /* permit_collection_pause */);
+  return result;
+}
+
+size_t G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr) {
+  size_t pre_used = 0;
+  size_t cleared_h_regions = 0;
+  size_t freed_regions = 0;
+  UncleanRegionList local_list;
+  free_region_if_totally_empty_work(hr, pre_used, cleared_h_regions,
+                                    freed_regions, &local_list);
+
+  finish_free_region_work(pre_used, cleared_h_regions, freed_regions,
+                          &local_list);
+  return pre_used;
+}
+
+void
+G1CollectedHeap::free_region_if_totally_empty_work(HeapRegion* hr,
+                                                   size_t& pre_used,
+                                                   size_t& cleared_h,
+                                                   size_t& freed_regions,
+                                                   UncleanRegionList* list,
+                                                   bool par) {
+  assert(!hr->continuesHumongous(), "should have filtered these out");
+  size_t res = 0;
+  if (!hr->popular() && hr->used() > 0 && hr->garbage_bytes() == hr->used()) {
+    if (!hr->is_young()) {
+      if (G1PolicyVerbose > 0)
+        gclog_or_tty->print_cr("Freeing empty region "PTR_FORMAT "(" SIZE_FORMAT " bytes)"
+                               " during cleanup", hr, hr->used());
+      free_region_work(hr, pre_used, cleared_h, freed_regions, list, par);
+    }
+  }
+}
+
+// FIXME: both this and shrink could probably be more efficient by
+// doing one "VirtualSpace::expand_by" call rather than several.
+void G1CollectedHeap::expand(size_t expand_bytes) {
+  size_t old_mem_size = _g1_storage.committed_size();
+  // We expand by a minimum of 1K.
+  expand_bytes = MAX2(expand_bytes, (size_t)K);
+  size_t aligned_expand_bytes =
+    ReservedSpace::page_align_size_up(expand_bytes);
+  aligned_expand_bytes = align_size_up(aligned_expand_bytes,
+                                       HeapRegion::GrainBytes);
+  expand_bytes = aligned_expand_bytes;
+  while (expand_bytes > 0) {
+    HeapWord* base = (HeapWord*)_g1_storage.high();
+    // Commit more storage.
+    bool successful = _g1_storage.expand_by(HeapRegion::GrainBytes);
+    if (!successful) {
+        expand_bytes = 0;
+    } else {
+      expand_bytes -= HeapRegion::GrainBytes;
+      // Expand the committed region.
+      HeapWord* high = (HeapWord*) _g1_storage.high();
+      _g1_committed.set_end(high);
+      // Create a new HeapRegion.
+      MemRegion mr(base, high);
+      bool is_zeroed = !_g1_max_committed.contains(base);
+      HeapRegion* hr = new HeapRegion(_bot_shared, mr, is_zeroed);
+
+      // Now update max_committed if necessary.
+      _g1_max_committed.set_end(MAX2(_g1_max_committed.end(), high));
+
+      // Add it to the HeapRegionSeq.
+      _hrs->insert(hr);
+      // Set the zero-fill state, according to whether it's already
+      // zeroed.
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        if (is_zeroed) {
+          hr->set_zero_fill_complete();
+          put_free_region_on_list_locked(hr);
+        } else {
+          hr->set_zero_fill_needed();
+          put_region_on_unclean_list_locked(hr);
+        }
+      }
+      _free_regions++;
+      // And we used up an expansion region to create it.
+      _expansion_regions--;
+      // Tell the cardtable about it.
+      Universe::heap()->barrier_set()->resize_covered_region(_g1_committed);
+      // And the offset table as well.
+      _bot_shared->resize(_g1_committed.word_size());
+    }
+  }
+  if (Verbose && PrintGC) {
+    size_t new_mem_size = _g1_storage.committed_size();
+    gclog_or_tty->print_cr("Expanding garbage-first heap from %ldK by %ldK to %ldK",
+                           old_mem_size/K, aligned_expand_bytes/K,
+                           new_mem_size/K);
+  }
+}
+
+void G1CollectedHeap::shrink_helper(size_t shrink_bytes)
+{
+  size_t old_mem_size = _g1_storage.committed_size();
+  size_t aligned_shrink_bytes =
+    ReservedSpace::page_align_size_down(shrink_bytes);
+  aligned_shrink_bytes = align_size_down(aligned_shrink_bytes,
+                                         HeapRegion::GrainBytes);
+  size_t num_regions_deleted = 0;
+  MemRegion mr = _hrs->shrink_by(aligned_shrink_bytes, num_regions_deleted);
+
+  assert(mr.end() == (HeapWord*)_g1_storage.high(), "Bad shrink!");
+  if (mr.byte_size() > 0)
+    _g1_storage.shrink_by(mr.byte_size());
+  assert(mr.start() == (HeapWord*)_g1_storage.high(), "Bad shrink!");
+
+  _g1_committed.set_end(mr.start());
+  _free_regions -= num_regions_deleted;
+  _expansion_regions += num_regions_deleted;
+
+  // Tell the cardtable about it.
+  Universe::heap()->barrier_set()->resize_covered_region(_g1_committed);
+
+  // And the offset table as well.
+  _bot_shared->resize(_g1_committed.word_size());
+
+  HeapRegionRemSet::shrink_heap(n_regions());
+
+  if (Verbose && PrintGC) {
+    size_t new_mem_size = _g1_storage.committed_size();
+    gclog_or_tty->print_cr("Shrinking garbage-first heap from %ldK by %ldK to %ldK",
+                           old_mem_size/K, aligned_shrink_bytes/K,
+                           new_mem_size/K);
+  }
+}
+
+void G1CollectedHeap::shrink(size_t shrink_bytes) {
+  release_gc_alloc_regions();
+  tear_down_region_lists();  // We will rebuild them in a moment.
+  shrink_helper(shrink_bytes);
+  rebuild_region_lists();
+}
+
+// Public methods.
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
+  SharedHeap(policy_),
+  _g1_policy(policy_),
+  _ref_processor(NULL),
+  _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
+  _bot_shared(NULL),
+  _par_alloc_during_gc_lock(Mutex::leaf, "par alloc during GC lock"),
+  _objs_with_preserved_marks(NULL), _preserved_marks_of_objs(NULL),
+  _evac_failure_scan_stack(NULL) ,
+  _mark_in_progress(false),
+  _cg1r(NULL), _czft(NULL), _summary_bytes_used(0),
+  _cur_alloc_region(NULL),
+  _refine_cte_cl(NULL),
+  _free_region_list(NULL), _free_region_list_size(0),
+  _free_regions(0),
+  _popular_object_boundary(NULL),
+  _cur_pop_hr_index(0),
+  _popular_regions_to_be_evacuated(NULL),
+  _pop_obj_rc_at_copy(),
+  _full_collection(false),
+  _unclean_region_list(),
+  _unclean_regions_coming(false),
+  _young_list(new YoungList(this)),
+  _gc_time_stamp(0),
+  _surviving_young_words(NULL)
+{
+  _g1h = this; // To catch bugs.
+  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
+    vm_exit_during_initialization("Failed necessary allocation.");
+  }
+  int n_queues = MAX2((int)ParallelGCThreads, 1);
+  _task_queues = new RefToScanQueueSet(n_queues);
+
+  int n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
+  assert(n_rem_sets > 0, "Invariant.");
+
+  HeapRegionRemSetIterator** iter_arr =
+    NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues);
+  for (int i = 0; i < n_queues; i++) {
+    iter_arr[i] = new HeapRegionRemSetIterator();
+  }
+  _rem_set_iterator = iter_arr;
+
+  for (int i = 0; i < n_queues; i++) {
+    RefToScanQueue* q = new RefToScanQueue();
+    q->initialize();
+    _task_queues->register_queue(i, q);
+  }
+
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    _gc_alloc_regions[ap]       = NULL;
+    _gc_alloc_region_counts[ap] = 0;
+  }
+  guarantee(_task_queues != NULL, "task_queues allocation failure.");
+}
+
+jint G1CollectedHeap::initialize() {
+  os::enable_vtime();
+
+  // Necessary to satisfy locking discipline assertions.
+
+  MutexLocker x(Heap_lock);
+
+  // While there are no constraints in the GC code that HeapWordSize
+  // be any particular value, there are multiple other areas in the
+  // system which believe this to be true (e.g. oop->object_size in some
+  // cases incorrectly returns the size in wordSize units rather than
+  // HeapWordSize).
+  guarantee(HeapWordSize == wordSize, "HeapWordSize must equal wordSize");
+
+  size_t init_byte_size = collector_policy()->initial_heap_byte_size();
+  size_t max_byte_size = collector_policy()->max_heap_byte_size();
+
+  // Ensure that the sizes are properly aligned.
+  Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
+  Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
+
+  // We allocate this in any case, but only do no work if the command line
+  // param is off.
+  _cg1r = new ConcurrentG1Refine();
+
+  // Reserve the maximum.
+  PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
+  // Includes the perm-gen.
+  ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
+                        HeapRegion::GrainBytes,
+                        false /*ism*/);
+
+  if (!heap_rs.is_reserved()) {
+    vm_exit_during_initialization("Could not reserve enough space for object heap");
+    return JNI_ENOMEM;
+  }
+
+  // It is important to do this in a way such that concurrent readers can't
+  // temporarily think somethings in the heap.  (I've actually seen this
+  // happen in asserts: DLD.)
+  _reserved.set_word_size(0);
+  _reserved.set_start((HeapWord*)heap_rs.base());
+  _reserved.set_end((HeapWord*)(heap_rs.base() + heap_rs.size()));
+
+  _expansion_regions = max_byte_size/HeapRegion::GrainBytes;
+
+  _num_humongous_regions = 0;
+
+  // Create the gen rem set (and barrier set) for the entire reserved region.
+  _rem_set = collector_policy()->create_rem_set(_reserved, 2);
+  set_barrier_set(rem_set()->bs());
+  if (barrier_set()->is_a(BarrierSet::ModRef)) {
+    _mr_bs = (ModRefBarrierSet*)_barrier_set;
+  } else {
+    vm_exit_during_initialization("G1 requires a mod ref bs.");
+    return JNI_ENOMEM;
+  }
+
+  // Also create a G1 rem set.
+  if (G1UseHRIntoRS) {
+    if (mr_bs()->is_a(BarrierSet::CardTableModRef)) {
+      _g1_rem_set = new HRInto_G1RemSet(this, (CardTableModRefBS*)mr_bs());
+    } else {
+      vm_exit_during_initialization("G1 requires a cardtable mod ref bs.");
+      return JNI_ENOMEM;
+    }
+  } else {
+    _g1_rem_set = new StupidG1RemSet(this);
+  }
+
+  // Carve out the G1 part of the heap.
+
+  ReservedSpace g1_rs   = heap_rs.first_part(max_byte_size);
+  _g1_reserved = MemRegion((HeapWord*)g1_rs.base(),
+                           g1_rs.size()/HeapWordSize);
+  ReservedSpace perm_gen_rs = heap_rs.last_part(max_byte_size);
+
+  _perm_gen = pgs->init(perm_gen_rs, pgs->init_size(), rem_set());
+
+  _g1_storage.initialize(g1_rs, 0);
+  _g1_committed = MemRegion((HeapWord*)_g1_storage.low(), (size_t) 0);
+  _g1_max_committed = _g1_committed;
+  _hrs = new HeapRegionSeq(_expansion_regions);
+  guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq");
+  guarantee(_cur_alloc_region == NULL, "from constructor");
+
+  _bot_shared = new G1BlockOffsetSharedArray(_reserved,
+                                             heap_word_size(init_byte_size));
+
+  _g1h = this;
+
+  // Create the ConcurrentMark data structure and thread.
+  // (Must do this late, so that "max_regions" is defined.)
+  _cm       = new ConcurrentMark(heap_rs, (int) max_regions());
+  _cmThread = _cm->cmThread();
+
+  // ...and the concurrent zero-fill thread, if necessary.
+  if (G1ConcZeroFill) {
+    _czft = new ConcurrentZFThread();
+  }
+
+
+
+  // Allocate the popular regions; take them off free lists.
+  size_t pop_byte_size = G1NumPopularRegions * HeapRegion::GrainBytes;
+  expand(pop_byte_size);
+  _popular_object_boundary =
+    _g1_reserved.start() + (G1NumPopularRegions * HeapRegion::GrainWords);
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    HeapRegion* hr = newAllocRegion(HeapRegion::GrainWords);
+    //    assert(hr != NULL && hr->bottom() < _popular_object_boundary,
+    //     "Should be enough, and all should be below boundary.");
+    hr->set_popular(true);
+  }
+  assert(_cur_pop_hr_index == 0, "Start allocating at the first region.");
+
+  // Initialize the from_card cache structure of HeapRegionRemSet.
+  HeapRegionRemSet::init_heap(max_regions());
+
+  // Now expand into the rest of the initial heap size.
+  expand(init_byte_size - pop_byte_size);
+
+  // Perform any initialization actions delegated to the policy.
+  g1_policy()->init();
+
+  g1_policy()->note_start_of_mark_thread();
+
+  _refine_cte_cl =
+    new RefineCardTableEntryClosure(ConcurrentG1RefineThread::sts(),
+                                    g1_rem_set(),
+                                    concurrent_g1_refine());
+  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
+
+  JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
+                                               SATB_Q_FL_lock,
+                                               0,
+                                               Shared_SATB_Q_lock);
+  if (G1RSBarrierUseQueue) {
+    JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+                                                  DirtyCardQ_FL_lock,
+                                                  G1DirtyCardQueueMax,
+                                                  Shared_DirtyCardQ_lock);
+  }
+  // In case we're keeping closure specialization stats, initialize those
+  // counts and that mechanism.
+  SpecializationStats::clear();
+
+  _gc_alloc_region_list = NULL;
+
+  // Do later initialization work for concurrent refinement.
+  _cg1r->init();
+
+  const char* group_names[] = { "CR", "ZF", "CM", "CL" };
+  GCOverheadReporter::initGCOverheadReporter(4, group_names);
+
+  return JNI_OK;
+}
+
+void G1CollectedHeap::ref_processing_init() {
+  SharedHeap::ref_processing_init();
+  MemRegion mr = reserved_region();
+  _ref_processor = ReferenceProcessor::create_ref_processor(
+                                         mr,    // span
+                                         false, // Reference discovery is not atomic
+                                                // (though it shouldn't matter here.)
+                                         true,  // mt_discovery
+                                         NULL,  // is alive closure: need to fill this in for efficiency
+                                         ParallelGCThreads,
+                                         ParallelRefProcEnabled,
+                                         true); // Setting next fields of discovered
+                                                // lists requires a barrier.
+}
+
+size_t G1CollectedHeap::capacity() const {
+  return _g1_committed.byte_size();
+}
+
+void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
+                                                 int worker_i) {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  int n_completed_buffers = 0;
+  while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
+    n_completed_buffers++;
+  }
+  g1_policy()->record_update_rs_processed_buffers(worker_i,
+                                                  (double) n_completed_buffers);
+  dcqs.clear_n_completed_buffers();
+  // Finish up the queue...
+  if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i,
+                                                            g1_rem_set());
+  assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
+}
+
+
+// Computes the sum of the storage used by the various regions.
+
+size_t G1CollectedHeap::used() const {
+  assert(Heap_lock->owner() != NULL,
+         "Should be owned on this thread's behalf.");
+  size_t result = _summary_bytes_used;
+  if (_cur_alloc_region != NULL)
+    result += _cur_alloc_region->used();
+  return result;
+}
+
+class SumUsedClosure: public HeapRegionClosure {
+  size_t _used;
+public:
+  SumUsedClosure() : _used(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      _used += r->used();
+    }
+    return false;
+  }
+  size_t result() { return _used; }
+};
+
+size_t G1CollectedHeap::recalculate_used() const {
+  SumUsedClosure blk;
+  _hrs->iterate(&blk);
+  return blk.result();
+}
+
+#ifndef PRODUCT
+class SumUsedRegionsClosure: public HeapRegionClosure {
+  size_t _num;
+public:
+  // _num is set to 1 to account for the popular region
+  SumUsedRegionsClosure() : _num(G1NumPopularRegions) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous() || r->used() > 0 || r->is_gc_alloc_region()) {
+      _num += 1;
+    }
+    return false;
+  }
+  size_t result() { return _num; }
+};
+
+size_t G1CollectedHeap::recalculate_used_regions() const {
+  SumUsedRegionsClosure blk;
+  _hrs->iterate(&blk);
+  return blk.result();
+}
+#endif // PRODUCT
+
+size_t G1CollectedHeap::unsafe_max_alloc() {
+  if (_free_regions > 0) return HeapRegion::GrainBytes;
+  // otherwise, is there space in the current allocation region?
+
+  // We need to store the current allocation region in a local variable
+  // here. The problem is that this method doesn't take any locks and
+  // there may be other threads which overwrite the current allocation
+  // region field. attempt_allocation(), for example, sets it to NULL
+  // and this can happen *after* the NULL check here but before the call
+  // to free(), resulting in a SIGSEGV. Note that this doesn't appear
+  // to be a problem in the optimized build, since the two loads of the
+  // current allocation region field are optimized away.
+  HeapRegion* car = _cur_alloc_region;
+
+  // FIXME: should iterate over all regions?
+  if (car == NULL) {
+    return 0;
+  }
+  return car->free();
+}
+
+void G1CollectedHeap::collect(GCCause::Cause cause) {
+  // The caller doesn't have the Heap_lock
+  assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock");
+  MutexLocker ml(Heap_lock);
+  collect_locked(cause);
+}
+
+void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) {
+  assert(Thread::current()->is_VM_thread(), "Precondition#1");
+  assert(Heap_lock->is_locked(), "Precondition#2");
+  GCCauseSetter gcs(this, cause);
+  switch (cause) {
+    case GCCause::_heap_inspection:
+    case GCCause::_heap_dump: {
+      HandleMark hm;
+      do_full_collection(false);         // don't clear all soft refs
+      break;
+    }
+    default: // XXX FIX ME
+      ShouldNotReachHere(); // Unexpected use of this function
+  }
+}
+
+
+void G1CollectedHeap::collect_locked(GCCause::Cause cause) {
+  // Don't want to do a GC until cleanup is completed.
+  wait_for_cleanup_complete();
+
+  // Read the GC count while holding the Heap_lock
+  int gc_count_before = SharedHeap::heap()->total_collections();
+  {
+    MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+    VM_G1CollectFull op(gc_count_before, cause);
+    VMThread::execute(&op);
+  }
+}
+
+bool G1CollectedHeap::is_in(const void* p) const {
+  if (_g1_committed.contains(p)) {
+    HeapRegion* hr = _hrs->addr_to_region(p);
+    return hr->is_in(p);
+  } else {
+    return _perm_gen->as_gen()->is_in(p);
+  }
+}
+
+// Iteration functions.
+
+// Iterates an OopClosure over all ref-containing fields of objects
+// within a HeapRegion.
+
+class IterateOopClosureRegionClosure: public HeapRegionClosure {
+  MemRegion _mr;
+  OopClosure* _cl;
+public:
+  IterateOopClosureRegionClosure(MemRegion mr, OopClosure* cl)
+    : _mr(mr), _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (! r->continuesHumongous()) {
+      r->oop_iterate(_cl);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::oop_iterate(OopClosure* cl) {
+  IterateOopClosureRegionClosure blk(_g1_committed, cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::oop_iterate(MemRegion mr, OopClosure* cl) {
+  IterateOopClosureRegionClosure blk(mr, cl);
+  _hrs->iterate(&blk);
+}
+
+// Iterates an ObjectClosure over all objects within a HeapRegion.
+
+class IterateObjectClosureRegionClosure: public HeapRegionClosure {
+  ObjectClosure* _cl;
+public:
+  IterateObjectClosureRegionClosure(ObjectClosure* cl) : _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (! r->continuesHumongous()) {
+      r->object_iterate(_cl);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::object_iterate(ObjectClosure* cl) {
+  IterateObjectClosureRegionClosure blk(cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) {
+  // FIXME: is this right?
+  guarantee(false, "object_iterate_since_last_GC not supported by G1 heap");
+}
+
+// Calls a SpaceClosure on a HeapRegion.
+
+class SpaceClosureRegionClosure: public HeapRegionClosure {
+  SpaceClosure* _cl;
+public:
+  SpaceClosureRegionClosure(SpaceClosure* cl) : _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _cl->do_space(r);
+    return false;
+  }
+};
+
+void G1CollectedHeap::space_iterate(SpaceClosure* cl) {
+  SpaceClosureRegionClosure blk(cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::heap_region_iterate(HeapRegionClosure* cl) {
+  _hrs->iterate(cl);
+}
+
+void G1CollectedHeap::heap_region_iterate_from(HeapRegion* r,
+                                               HeapRegionClosure* cl) {
+  _hrs->iterate_from(r, cl);
+}
+
+void
+G1CollectedHeap::heap_region_iterate_from(int idx, HeapRegionClosure* cl) {
+  _hrs->iterate_from(idx, cl);
+}
+
+HeapRegion* G1CollectedHeap::region_at(size_t idx) { return _hrs->at(idx); }
+
+void
+G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
+                                                 int worker,
+                                                 jint claim_value) {
+  const size_t regions = n_regions();
+  const size_t worker_num = (ParallelGCThreads > 0 ? ParallelGCThreads : 1);
+  // try to spread out the starting points of the workers
+  const size_t start_index = regions / worker_num * (size_t) worker;
+
+  // each worker will actually look at all regions
+  for (size_t count = 0; count < regions; ++count) {
+    const size_t index = (start_index + count) % regions;
+    assert(0 <= index && index < regions, "sanity");
+    HeapRegion* r = region_at(index);
+    // we'll ignore "continues humongous" regions (we'll process them
+    // when we come across their corresponding "start humongous"
+    // region) and regions already claimed
+    if (r->claim_value() == claim_value || r->continuesHumongous()) {
+      continue;
+    }
+    // OK, try to claim it
+    if (r->claimHeapRegion(claim_value)) {
+      // success!
+      assert(!r->continuesHumongous(), "sanity");
+      if (r->startsHumongous()) {
+        // If the region is "starts humongous" we'll iterate over its
+        // "continues humongous" first; in fact we'll do them
+        // first. The order is important. In on case, calling the
+        // closure on the "starts humongous" region might de-allocate
+        // and clear all its "continues humongous" regions and, as a
+        // result, we might end up processing them twice. So, we'll do
+        // them first (notice: most closures will ignore them anyway) and
+        // then we'll do the "starts humongous" region.
+        for (size_t ch_index = index + 1; ch_index < regions; ++ch_index) {
+          HeapRegion* chr = region_at(ch_index);
+
+          // if the region has already been claimed or it's not
+          // "continues humongous" we're done
+          if (chr->claim_value() == claim_value ||
+              !chr->continuesHumongous()) {
+            break;
+          }
+
+          // Noone should have claimed it directly. We can given
+          // that we claimed its "starts humongous" region.
+          assert(chr->claim_value() != claim_value, "sanity");
+          assert(chr->humongous_start_region() == r, "sanity");
+
+          if (chr->claimHeapRegion(claim_value)) {
+            // we should always be able to claim it; noone else should
+            // be trying to claim this region
+
+            bool res2 = cl->doHeapRegion(chr);
+            assert(!res2, "Should not abort");
+
+            // Right now, this holds (i.e., no closure that actually
+            // does something with "continues humongous" regions
+            // clears them). We might have to weaken it in the future,
+            // but let's leave these two asserts here for extra safety.
+            assert(chr->continuesHumongous(), "should still be the case");
+            assert(chr->humongous_start_region() == r, "sanity");
+          } else {
+            guarantee(false, "we should not reach here");
+          }
+        }
+      }
+
+      assert(!r->continuesHumongous(), "sanity");
+      bool res = cl->doHeapRegion(r);
+      assert(!res, "Should not abort");
+    }
+  }
+}
+
+class ResetClaimValuesClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    r->set_claim_value(HeapRegion::InitialClaimValue);
+    return false;
+  }
+};
+
+void
+G1CollectedHeap::reset_heap_region_claim_values() {
+  ResetClaimValuesClosure blk;
+  heap_region_iterate(&blk);
+}
+
+#ifdef ASSERT
+// This checks whether all regions in the heap have the correct claim
+// value. I also piggy-backed on this a check to ensure that the
+// humongous_start_region() information on "continues humongous"
+// regions is correct.
+
+class CheckClaimValuesClosure : public HeapRegionClosure {
+private:
+  jint _claim_value;
+  size_t _failures;
+  HeapRegion* _sh_region;
+public:
+  CheckClaimValuesClosure(jint claim_value) :
+    _claim_value(claim_value), _failures(0), _sh_region(NULL) { }
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->claim_value() != _claim_value) {
+      gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
+                             "claim value = %d, should be %d",
+                             r->bottom(), r->end(), r->claim_value(),
+                             _claim_value);
+      ++_failures;
+    }
+    if (!r->isHumongous()) {
+      _sh_region = NULL;
+    } else if (r->startsHumongous()) {
+      _sh_region = r;
+    } else if (r->continuesHumongous()) {
+      if (r->humongous_start_region() != _sh_region) {
+        gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
+                               "HS = "PTR_FORMAT", should be "PTR_FORMAT,
+                               r->bottom(), r->end(),
+                               r->humongous_start_region(),
+                               _sh_region);
+        ++_failures;
+      }
+    }
+    return false;
+  }
+  size_t failures() {
+    return _failures;
+  }
+};
+
+bool G1CollectedHeap::check_heap_region_claim_values(jint claim_value) {
+  CheckClaimValuesClosure cl(claim_value);
+  heap_region_iterate(&cl);
+  return cl.failures() == 0;
+}
+#endif // ASSERT
+
+void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
+  HeapRegion* r = g1_policy()->collection_set();
+  while (r != NULL) {
+    HeapRegion* next = r->next_in_collection_set();
+    if (cl->doHeapRegion(r)) {
+      cl->incomplete();
+      return;
+    }
+    r = next;
+  }
+}
+
+void G1CollectedHeap::collection_set_iterate_from(HeapRegion* r,
+                                                  HeapRegionClosure *cl) {
+  assert(r->in_collection_set(),
+         "Start region must be a member of the collection set.");
+  HeapRegion* cur = r;
+  while (cur != NULL) {
+    HeapRegion* next = cur->next_in_collection_set();
+    if (cl->doHeapRegion(cur) && false) {
+      cl->incomplete();
+      return;
+    }
+    cur = next;
+  }
+  cur = g1_policy()->collection_set();
+  while (cur != r) {
+    HeapRegion* next = cur->next_in_collection_set();
+    if (cl->doHeapRegion(cur) && false) {
+      cl->incomplete();
+      return;
+    }
+    cur = next;
+  }
+}
+
+CompactibleSpace* G1CollectedHeap::first_compactible_space() {
+  return _hrs->length() > 0 ? _hrs->at(0) : NULL;
+}
+
+
+Space* G1CollectedHeap::space_containing(const void* addr) const {
+  Space* res = heap_region_containing(addr);
+  if (res == NULL)
+    res = perm_gen()->space_containing(addr);
+  return res;
+}
+
+HeapWord* G1CollectedHeap::block_start(const void* addr) const {
+  Space* sp = space_containing(addr);
+  if (sp != NULL) {
+    return sp->block_start(addr);
+  }
+  return NULL;
+}
+
+size_t G1CollectedHeap::block_size(const HeapWord* addr) const {
+  Space* sp = space_containing(addr);
+  assert(sp != NULL, "block_size of address outside of heap");
+  return sp->block_size(addr);
+}
+
+bool G1CollectedHeap::block_is_obj(const HeapWord* addr) const {
+  Space* sp = space_containing(addr);
+  return sp->block_is_obj(addr);
+}
+
+bool G1CollectedHeap::supports_tlab_allocation() const {
+  return true;
+}
+
+size_t G1CollectedHeap::tlab_capacity(Thread* ignored) const {
+  return HeapRegion::GrainBytes;
+}
+
+size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
+  // Return the remaining space in the cur alloc region, but not less than
+  // the min TLAB size.
+  // Also, no more than half the region size, since we can't allow tlabs to
+  // grow big enough to accomodate humongous objects.
+
+  // We need to story it locally, since it might change between when we
+  // test for NULL and when we use it later.
+  ContiguousSpace* cur_alloc_space = _cur_alloc_region;
+  if (cur_alloc_space == NULL) {
+    return HeapRegion::GrainBytes/2;
+  } else {
+    return MAX2(MIN2(cur_alloc_space->free(),
+                     (size_t)(HeapRegion::GrainBytes/2)),
+                (size_t)MinTLABSize);
+  }
+}
+
+HeapWord* G1CollectedHeap::allocate_new_tlab(size_t size) {
+  bool dummy;
+  return G1CollectedHeap::mem_allocate(size, false, true, &dummy);
+}
+
+bool G1CollectedHeap::allocs_are_zero_filled() {
+  return false;
+}
+
+size_t G1CollectedHeap::large_typearray_limit() {
+  // FIXME
+  return HeapRegion::GrainBytes/HeapWordSize;
+}
+
+size_t G1CollectedHeap::max_capacity() const {
+  return _g1_committed.byte_size();
+}
+
+jlong G1CollectedHeap::millis_since_last_gc() {
+  // assert(false, "NYI");
+  return 0;
+}
+
+
+void G1CollectedHeap::prepare_for_verify() {
+  if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
+    ensure_parsability(false);
+  }
+  g1_rem_set()->prepare_for_verify();
+}
+
+class VerifyLivenessOopClosure: public OopClosure {
+  G1CollectedHeap* g1h;
+public:
+  VerifyLivenessOopClosure(G1CollectedHeap* _g1h) {
+    g1h = _g1h;
+  }
+  void do_oop(narrowOop *p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop *p) {
+    oop obj = *p;
+    assert(obj == NULL || !g1h->is_obj_dead(obj),
+           "Dead object referenced by a not dead object");
+  }
+};
+
+class VerifyObjsInRegionClosure: public ObjectClosure {
+  G1CollectedHeap* _g1h;
+  size_t _live_bytes;
+  HeapRegion *_hr;
+public:
+  VerifyObjsInRegionClosure(HeapRegion *hr) : _live_bytes(0), _hr(hr) {
+    _g1h = G1CollectedHeap::heap();
+  }
+  void do_object(oop o) {
+    VerifyLivenessOopClosure isLive(_g1h);
+    assert(o != NULL, "Huh?");
+    if (!_g1h->is_obj_dead(o)) {
+      o->oop_iterate(&isLive);
+      if (!_hr->obj_allocated_since_prev_marking(o))
+        _live_bytes += (o->size() * HeapWordSize);
+    }
+  }
+  size_t live_bytes() { return _live_bytes; }
+};
+
+class PrintObjsInRegionClosure : public ObjectClosure {
+  HeapRegion *_hr;
+  G1CollectedHeap *_g1;
+public:
+  PrintObjsInRegionClosure(HeapRegion *hr) : _hr(hr) {
+    _g1 = G1CollectedHeap::heap();
+  };
+
+  void do_object(oop o) {
+    if (o != NULL) {
+      HeapWord *start = (HeapWord *) o;
+      size_t word_sz = o->size();
+      gclog_or_tty->print("\nPrinting obj "PTR_FORMAT" of size " SIZE_FORMAT
+                          " isMarkedPrev %d isMarkedNext %d isAllocSince %d\n",
+                          (void*) o, word_sz,
+                          _g1->isMarkedPrev(o),
+                          _g1->isMarkedNext(o),
+                          _hr->obj_allocated_since_prev_marking(o));
+      HeapWord *end = start + word_sz;
+      HeapWord *cur;
+      int *val;
+      for (cur = start; cur < end; cur++) {
+        val = (int *) cur;
+        gclog_or_tty->print("\t "PTR_FORMAT":"PTR_FORMAT"\n", val, *val);
+      }
+    }
+  }
+};
+
+class VerifyRegionClosure: public HeapRegionClosure {
+public:
+  bool _allow_dirty;
+  bool _par;
+  VerifyRegionClosure(bool allow_dirty, bool par = false)
+    : _allow_dirty(allow_dirty), _par(par) {}
+  bool doHeapRegion(HeapRegion* r) {
+    guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
+              "Should be unclaimed at verify points.");
+    if (r->isHumongous()) {
+      if (r->startsHumongous()) {
+        // Verify the single H object.
+        oop(r->bottom())->verify();
+        size_t word_sz = oop(r->bottom())->size();
+        guarantee(r->top() == r->bottom() + word_sz,
+                  "Only one object in a humongous region");
+      }
+    } else {
+      VerifyObjsInRegionClosure not_dead_yet_cl(r);
+      r->verify(_allow_dirty);
+      r->object_iterate(&not_dead_yet_cl);
+      guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(),
+                "More live objects than counted in last complete marking.");
+    }
+    return false;
+  }
+};
+
+class VerifyRootsClosure: public OopsInGenClosure {
+private:
+  G1CollectedHeap* _g1h;
+  bool             _failures;
+
+public:
+  VerifyRootsClosure() :
+    _g1h(G1CollectedHeap::heap()), _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop obj = *p;
+    if (obj != NULL) {
+      if (_g1h->is_obj_dead(obj)) {
+        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
+                               "points to dead obj "PTR_FORMAT, p, (void*) obj);
+        obj->print_on(gclog_or_tty);
+        _failures = true;
+      }
+    }
+  }
+};
+
+// This is the task used for parallel heap verification.
+
+class G1ParVerifyTask: public AbstractGangTask {
+private:
+  G1CollectedHeap* _g1h;
+  bool _allow_dirty;
+
+public:
+  G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty) :
+    AbstractGangTask("Parallel verify task"),
+    _g1h(g1h), _allow_dirty(allow_dirty) { }
+
+  void work(int worker_i) {
+    VerifyRegionClosure blk(_allow_dirty, true);
+    _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+                                          HeapRegion::ParVerifyClaimValue);
+  }
+};
+
+void G1CollectedHeap::verify(bool allow_dirty, bool silent) {
+  if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
+    if (!silent) { gclog_or_tty->print("roots "); }
+    VerifyRootsClosure rootsCl;
+    process_strong_roots(false,
+                         SharedHeap::SO_AllClasses,
+                         &rootsCl,
+                         &rootsCl);
+    rem_set()->invalidate(perm_gen()->used_region(), false);
+    if (!silent) { gclog_or_tty->print("heapRegions "); }
+    if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
+      assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+             "sanity check");
+
+      G1ParVerifyTask task(this, allow_dirty);
+      int n_workers = workers()->total_workers();
+      set_par_threads(n_workers);
+      workers()->run_task(&task);
+      set_par_threads(0);
+
+      assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
+             "sanity check");
+
+      reset_heap_region_claim_values();
+
+      assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+             "sanity check");
+    } else {
+      VerifyRegionClosure blk(allow_dirty);
+      _hrs->iterate(&blk);
+    }
+    if (!silent) gclog_or_tty->print("remset ");
+    rem_set()->verify();
+    guarantee(!rootsCl.failures(), "should not have had failures");
+  } else {
+    if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) ");
+  }
+}
+
+class PrintRegionClosure: public HeapRegionClosure {
+  outputStream* _st;
+public:
+  PrintRegionClosure(outputStream* st) : _st(st) {}
+  bool doHeapRegion(HeapRegion* r) {
+    r->print_on(_st);
+    return false;
+  }
+};
+
+void G1CollectedHeap::print() const { print_on(gclog_or_tty); }
+
+void G1CollectedHeap::print_on(outputStream* st) const {
+  PrintRegionClosure blk(st);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
+  if (ParallelGCThreads > 0) {
+    workers()->print_worker_threads();
+  }
+  st->print("\"G1 concurrent mark GC Thread\" ");
+  _cmThread->print();
+  st->cr();
+  st->print("\"G1 concurrent refinement GC Thread\" ");
+  _cg1r->cg1rThread()->print_on(st);
+  st->cr();
+  st->print("\"G1 zero-fill GC Thread\" ");
+  _czft->print_on(st);
+  st->cr();
+}
+
+void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
+  if (ParallelGCThreads > 0) {
+    workers()->threads_do(tc);
+  }
+  tc->do_thread(_cmThread);
+  tc->do_thread(_cg1r->cg1rThread());
+  tc->do_thread(_czft);
+}
+
+void G1CollectedHeap::print_tracing_info() const {
+  concurrent_g1_refine()->print_final_card_counts();
+
+  // We'll overload this to mean "trace GC pause statistics."
+  if (TraceGen0Time || TraceGen1Time) {
+    // The "G1CollectorPolicy" is keeping track of these stats, so delegate
+    // to that.
+    g1_policy()->print_tracing_info();
+  }
+  if (SummarizeG1RSStats) {
+    g1_rem_set()->print_summary_info();
+  }
+  if (SummarizeG1ConcMark) {
+    concurrent_mark()->print_summary_info();
+  }
+  if (SummarizeG1ZFStats) {
+    ConcurrentZFThread::print_summary_info();
+  }
+  if (G1SummarizePopularity) {
+    print_popularity_summary_info();
+  }
+  g1_policy()->print_yg_surv_rate_info();
+
+  GCOverheadReporter::printGCOverhead();
+
+  SpecializationStats::print();
+}
+
+
+int G1CollectedHeap::addr_to_arena_id(void* addr) const {
+  HeapRegion* hr = heap_region_containing(addr);
+  if (hr == NULL) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+G1CollectedHeap* G1CollectedHeap::heap() {
+  assert(_sh->kind() == CollectedHeap::G1CollectedHeap,
+         "not a garbage-first heap");
+  return _g1h;
+}
+
+void G1CollectedHeap::gc_prologue(bool full /* Ignored */) {
+  if (PrintHeapAtGC){
+    gclog_or_tty->print_cr(" {Heap before GC collections=%d:", total_collections());
+    Universe::print();
+  }
+  assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer");
+  // Call allocation profiler
+  AllocationProfiler::iterate_since_last_gc();
+  // Fill TLAB's and such
+  ensure_parsability(true);
+}
+
+void G1CollectedHeap::gc_epilogue(bool full /* Ignored */) {
+  // FIXME: what is this about?
+  // I'm ignoring the "fill_newgen()" call if "alloc_event_enabled"
+  // is set.
+  COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(),
+                        "derived pointer present"));
+
+  if (PrintHeapAtGC){
+    gclog_or_tty->print_cr(" Heap after GC collections=%d:", total_collections());
+    Universe::print();
+    gclog_or_tty->print("} ");
+  }
+}
+
+void G1CollectedHeap::do_collection_pause() {
+  // Read the GC count while holding the Heap_lock
+  // we need to do this _before_ wait_for_cleanup_complete(), to
+  // ensure that we do not give up the heap lock and potentially
+  // pick up the wrong count
+  int gc_count_before = SharedHeap::heap()->total_collections();
+
+  // Don't want to do a GC pause while cleanup is being completed!
+  wait_for_cleanup_complete();
+
+  g1_policy()->record_stop_world_start();
+  {
+    MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+    VM_G1IncCollectionPause op(gc_count_before);
+    VMThread::execute(&op);
+  }
+}
+
+void
+G1CollectedHeap::doConcurrentMark() {
+  if (G1ConcMark) {
+    MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+    if (!_cmThread->in_progress()) {
+      _cmThread->set_started();
+      CGC_lock->notify();
+    }
+  }
+}
+
+class VerifyMarkedObjsClosure: public ObjectClosure {
+    G1CollectedHeap* _g1h;
+    public:
+    VerifyMarkedObjsClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
+    void do_object(oop obj) {
+      assert(obj->mark()->is_marked() ? !_g1h->is_obj_dead(obj) : true,
+             "markandsweep mark should agree with concurrent deadness");
+    }
+};
+
+void
+G1CollectedHeap::checkConcurrentMark() {
+    VerifyMarkedObjsClosure verifycl(this);
+    doConcurrentMark();
+    //    MutexLockerEx x(getMarkBitMapLock(),
+    //              Mutex::_no_safepoint_check_flag);
+    object_iterate(&verifycl);
+}
+
+void G1CollectedHeap::do_sync_mark() {
+  _cm->checkpointRootsInitial();
+  _cm->markFromRoots();
+  _cm->checkpointRootsFinal(false);
+}
+
+// <NEW PREDICTION>
+
+double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr,
+                                                       bool young) {
+  return _g1_policy->predict_region_elapsed_time_ms(hr, young);
+}
+
+void G1CollectedHeap::check_if_region_is_too_expensive(double
+                                                           predicted_time_ms) {
+  _g1_policy->check_if_region_is_too_expensive(predicted_time_ms);
+}
+
+size_t G1CollectedHeap::pending_card_num() {
+  size_t extra_cards = 0;
+  JavaThread *curr = Threads::first();
+  while (curr != NULL) {
+    DirtyCardQueue& dcq = curr->dirty_card_queue();
+    extra_cards += dcq.size();
+    curr = curr->next();
+  }
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  size_t buffer_size = dcqs.buffer_size();
+  size_t buffer_num = dcqs.completed_buffers_num();
+  return buffer_size * buffer_num + extra_cards;
+}
+
+size_t G1CollectedHeap::max_pending_card_num() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  size_t buffer_size = dcqs.buffer_size();
+  size_t buffer_num  = dcqs.completed_buffers_num();
+  int thread_num  = Threads::number_of_threads();
+  return (buffer_num + thread_num) * buffer_size;
+}
+
+size_t G1CollectedHeap::cards_scanned() {
+  HRInto_G1RemSet* g1_rset = (HRInto_G1RemSet*) g1_rem_set();
+  return g1_rset->cardsScanned();
+}
+
+void
+G1CollectedHeap::setup_surviving_young_words() {
+  guarantee( _surviving_young_words == NULL, "pre-condition" );
+  size_t array_length = g1_policy()->young_cset_length();
+  _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length);
+  if (_surviving_young_words == NULL) {
+    vm_exit_out_of_memory(sizeof(size_t) * array_length,
+                          "Not enough space for young surv words summary.");
+  }
+  memset(_surviving_young_words, 0, array_length * sizeof(size_t));
+  for (size_t i = 0;  i < array_length; ++i) {
+    guarantee( _surviving_young_words[i] == 0, "invariant" );
+  }
+}
+
+void
+G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  size_t array_length = g1_policy()->young_cset_length();
+  for (size_t i = 0; i < array_length; ++i)
+    _surviving_young_words[i] += surv_young_words[i];
+}
+
+void
+G1CollectedHeap::cleanup_surviving_young_words() {
+  guarantee( _surviving_young_words != NULL, "pre-condition" );
+  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words);
+  _surviving_young_words = NULL;
+}
+
+// </NEW PREDICTION>
+
+void
+G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) {
+  char verbose_str[128];
+  sprintf(verbose_str, "GC pause ");
+  if (popular_region != NULL)
+    strcat(verbose_str, "(popular)");
+  else if (g1_policy()->in_young_gc_mode()) {
+    if (g1_policy()->full_young_gcs())
+      strcat(verbose_str, "(young)");
+    else
+      strcat(verbose_str, "(partial)");
+  }
+  bool reset_should_initiate_conc_mark = false;
+  if (popular_region != NULL && g1_policy()->should_initiate_conc_mark()) {
+    // we currently do not allow an initial mark phase to be piggy-backed
+    // on a popular pause
+    reset_should_initiate_conc_mark = true;
+    g1_policy()->unset_should_initiate_conc_mark();
+  }
+  if (g1_policy()->should_initiate_conc_mark())
+    strcat(verbose_str, " (initial-mark)");
+
+  GCCauseSetter x(this, (popular_region == NULL ?
+                         GCCause::_g1_inc_collection_pause :
+                         GCCause::_g1_pop_region_collection_pause));
+
+  // if PrintGCDetails is on, we'll print long statistics information
+  // in the collector policy code, so let's not print this as the output
+  // is messy if we do.
+  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+  TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
+
+  ResourceMark rm;
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
+  assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
+  guarantee(!is_gc_active(), "collection is not reentrant");
+  assert(regions_accounted_for(), "Region leakage!");
+
+  increment_gc_time_stamp();
+
+  if (g1_policy()->in_young_gc_mode()) {
+    assert(check_young_list_well_formed(),
+                "young list should be well formed");
+  }
+
+  if (GC_locker::is_active()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
+  bool abandoned = false;
+  { // Call to jvmpi::post_class_unload_events must occur outside of active GC
+    IsGCActiveMark x;
+
+    gc_prologue(false);
+    increment_total_collections();
+
+#if G1_REM_SET_LOGGING
+    gclog_or_tty->print_cr("\nJust chose CS, heap:");
+    print();
+#endif
+
+    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      prepare_for_verify();
+      gclog_or_tty->print(" VerifyBeforeGC:");
+      Universe::verify(false);
+    }
+
+    COMPILER2_PRESENT(DerivedPointerTable::clear());
+
+    // We want to turn off ref discovere, if necessary, and turn it back on
+    // on again later if we do.
+    bool was_enabled = ref_processor()->discovery_enabled();
+    if (was_enabled) ref_processor()->disable_discovery();
+
+    // Forget the current alloc region (we might even choose it to be part
+    // of the collection set!).
+    abandon_cur_alloc_region();
+
+    // The elapsed time induced by the start time below deliberately elides
+    // the possible verification above.
+    double start_time_sec = os::elapsedTime();
+    GCOverheadReporter::recordSTWStart(start_time_sec);
+    size_t start_used_bytes = used();
+    if (!G1ConcMark) {
+      do_sync_mark();
+    }
+
+    g1_policy()->record_collection_pause_start(start_time_sec,
+                                               start_used_bytes);
+
+#if SCAN_ONLY_VERBOSE
+    _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+    if (g1_policy()->should_initiate_conc_mark()) {
+      concurrent_mark()->checkpointRootsInitialPre();
+    }
+    save_marks();
+
+    // We must do this before any possible evacuation that should propogate
+    // marks, including evacuation of popular objects in a popular pause.
+    if (mark_in_progress()) {
+      double start_time_sec = os::elapsedTime();
+
+      _cm->drainAllSATBBuffers();
+      double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
+      g1_policy()->record_satb_drain_time(finish_mark_ms);
+
+    }
+    // Record the number of elements currently on the mark stack, so we
+    // only iterate over these.  (Since evacuation may add to the mark
+    // stack, doing more exposes race conditions.)  If no mark is in
+    // progress, this will be zero.
+    _cm->set_oops_do_bound();
+
+    assert(regions_accounted_for(), "Region leakage.");
+
+    bool abandoned = false;
+
+    if (mark_in_progress())
+      concurrent_mark()->newCSet();
+
+    // Now choose the CS.
+    if (popular_region == NULL) {
+      g1_policy()->choose_collection_set();
+    } else {
+      // We may be evacuating a single region (for popularity).
+      g1_policy()->record_popular_pause_preamble_start();
+      popularity_pause_preamble(popular_region);
+      g1_policy()->record_popular_pause_preamble_end();
+      abandoned = (g1_policy()->collection_set() == NULL);
+      // Now we allow more regions to be added (we have to collect
+      // all popular regions).
+      if (!abandoned) {
+        g1_policy()->choose_collection_set(popular_region);
+      }
+    }
+    // We may abandon a pause if we find no region that will fit in the MMU
+    // pause.
+    abandoned = (g1_policy()->collection_set() == NULL);
+
+    // Nothing to do if we were unable to choose a collection set.
+    if (!abandoned) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("\nAfter pause, heap:");
+      print();
+#endif
+
+      setup_surviving_young_words();
+
+      // Set up the gc allocation regions.
+      get_gc_alloc_regions();
+
+      // Actually do the work...
+      evacuate_collection_set();
+      free_collection_set(g1_policy()->collection_set());
+      g1_policy()->clear_collection_set();
+
+      if (popular_region != NULL) {
+        // We have to wait until now, because we don't want the region to
+        // be rescheduled for pop-evac during RS update.
+        popular_region->set_popular_pending(false);
+      }
+
+      release_gc_alloc_regions();
+
+      cleanup_surviving_young_words();
+
+      if (g1_policy()->in_young_gc_mode()) {
+        _young_list->reset_sampled_info();
+        assert(check_young_list_empty(true),
+               "young list should be empty");
+
+#if SCAN_ONLY_VERBOSE
+        _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+        _young_list->reset_auxilary_lists();
+      }
+    } else {
+      COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+    }
+
+    if (evacuation_failed()) {
+      _summary_bytes_used = recalculate_used();
+    } else {
+      // The "used" of the the collection set have already been subtracted
+      // when they were freed.  Add in the bytes evacuated.
+      _summary_bytes_used += g1_policy()->bytes_in_to_space();
+    }
+
+    if (g1_policy()->in_young_gc_mode() &&
+        g1_policy()->should_initiate_conc_mark()) {
+      concurrent_mark()->checkpointRootsInitialPost();
+      set_marking_started();
+      doConcurrentMark();
+    }
+
+#if SCAN_ONLY_VERBOSE
+    _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+    double end_time_sec = os::elapsedTime();
+    g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    GCOverheadReporter::recordSTWEnd(end_time_sec);
+    g1_policy()->record_collection_pause_end(popular_region != NULL,
+                                             abandoned);
+
+    assert(regions_accounted_for(), "Region leakage.");
+
+    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      gclog_or_tty->print(" VerifyAfterGC:");
+      Universe::verify(false);
+    }
+
+    if (was_enabled) ref_processor()->enable_discovery();
+
+    {
+      size_t expand_bytes = g1_policy()->expansion_amount();
+      if (expand_bytes > 0) {
+        size_t bytes_before = capacity();
+        expand(expand_bytes);
+      }
+    }
+
+    if (mark_in_progress())
+      concurrent_mark()->update_g1_committed();
+
+    gc_epilogue(false);
+  }
+
+  assert(verify_region_lists(), "Bad region lists.");
+
+  if (reset_should_initiate_conc_mark)
+    g1_policy()->set_should_initiate_conc_mark();
+
+  if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
+    gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
+    print_tracing_info();
+    vm_exit(-1);
+  }
+}
+
+void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
+  assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
+  HeapWord* original_top = NULL;
+  if (r != NULL)
+    original_top = r->top();
+
+  // We will want to record the used space in r as being there before gc.
+  // One we install it as a GC alloc region it's eligible for allocation.
+  // So record it now and use it later.
+  size_t r_used = 0;
+  if (r != NULL) {
+    r_used = r->used();
+
+    if (ParallelGCThreads > 0) {
+      // need to take the lock to guard against two threads calling
+      // get_gc_alloc_region concurrently (very unlikely but...)
+      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      r->save_marks();
+    }
+  }
+  HeapRegion* old_alloc_region = _gc_alloc_regions[purpose];
+  _gc_alloc_regions[purpose] = r;
+  if (old_alloc_region != NULL) {
+    // Replace aliases too.
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      if (_gc_alloc_regions[ap] == old_alloc_region) {
+        _gc_alloc_regions[ap] = r;
+      }
+    }
+  }
+  if (r != NULL) {
+    push_gc_alloc_region(r);
+    if (mark_in_progress() && original_top != r->next_top_at_mark_start()) {
+      // We are using a region as a GC alloc region after it has been used
+      // as a mutator allocation region during the current marking cycle.
+      // The mutator-allocated objects are currently implicitly marked, but
+      // when we move hr->next_top_at_mark_start() forward at the the end
+      // of the GC pause, they won't be.  We therefore mark all objects in
+      // the "gap".  We do this object-by-object, since marking densely
+      // does not currently work right with marking bitmap iteration.  This
+      // means we rely on TLAB filling at the start of pauses, and no
+      // "resuscitation" of filled TLAB's.  If we want to do this, we need
+      // to fix the marking bitmap iteration.
+      HeapWord* curhw = r->next_top_at_mark_start();
+      HeapWord* t = original_top;
+
+      while (curhw < t) {
+        oop cur = (oop)curhw;
+        // We'll assume parallel for generality.  This is rare code.
+        concurrent_mark()->markAndGrayObjectIfNecessary(cur); // can't we just mark them?
+        curhw = curhw + cur->size();
+      }
+      assert(curhw == t, "Should have parsed correctly.");
+    }
+    if (G1PolicyVerbose > 1) {
+      gclog_or_tty->print("New alloc region ["PTR_FORMAT", "PTR_FORMAT", " PTR_FORMAT") "
+                          "for survivors:", r->bottom(), original_top, r->end());
+      r->print();
+    }
+    g1_policy()->record_before_bytes(r_used);
+  }
+}
+
+void G1CollectedHeap::push_gc_alloc_region(HeapRegion* hr) {
+  assert(Thread::current()->is_VM_thread() ||
+         par_alloc_during_gc_lock()->owned_by_self(), "Precondition");
+  assert(!hr->is_gc_alloc_region() && !hr->in_collection_set(),
+         "Precondition.");
+  hr->set_is_gc_alloc_region(true);
+  hr->set_next_gc_alloc_region(_gc_alloc_region_list);
+  _gc_alloc_region_list = hr;
+}
+
+#ifdef G1_DEBUG
+class FindGCAllocRegion: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_gc_alloc_region()) {
+      gclog_or_tty->print_cr("Region %d ["PTR_FORMAT"...] is still a gc_alloc_region.",
+                             r->hrs_index(), r->bottom());
+    }
+    return false;
+  }
+};
+#endif // G1_DEBUG
+
+void G1CollectedHeap::forget_alloc_region_list() {
+  assert(Thread::current()->is_VM_thread(), "Precondition");
+  while (_gc_alloc_region_list != NULL) {
+    HeapRegion* r = _gc_alloc_region_list;
+    assert(r->is_gc_alloc_region(), "Invariant.");
+    _gc_alloc_region_list = r->next_gc_alloc_region();
+    r->set_next_gc_alloc_region(NULL);
+    r->set_is_gc_alloc_region(false);
+    if (r->is_empty()) {
+      ++_free_regions;
+    }
+  }
+#ifdef G1_DEBUG
+  FindGCAllocRegion fa;
+  heap_region_iterate(&fa);
+#endif // G1_DEBUG
+}
+
+
+bool G1CollectedHeap::check_gc_alloc_regions() {
+  // TODO: allocation regions check
+  return true;
+}
+
+void G1CollectedHeap::get_gc_alloc_regions() {
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    // Create new GC alloc regions.
+    HeapRegion* alloc_region = _gc_alloc_regions[ap];
+    // Clear this alloc region, so that in case it turns out to be
+    // unacceptable, we end up with no allocation region, rather than a bad
+    // one.
+    _gc_alloc_regions[ap] = NULL;
+    if (alloc_region == NULL || alloc_region->in_collection_set()) {
+      // Can't re-use old one.  Allocate a new one.
+      alloc_region = newAllocRegionWithExpansion(ap, 0);
+    }
+    if (alloc_region != NULL) {
+      set_gc_alloc_region(ap, alloc_region);
+    }
+  }
+  // Set alternative regions for allocation purposes that have reached
+  // thier limit.
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap);
+    if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) {
+      _gc_alloc_regions[ap] = _gc_alloc_regions[alt_purpose];
+    }
+  }
+  assert(check_gc_alloc_regions(), "alloc regions messed up");
+}
+
+void G1CollectedHeap::release_gc_alloc_regions() {
+  // We keep a separate list of all regions that have been alloc regions in
+  // the current collection pause.  Forget that now.
+  forget_alloc_region_list();
+
+  // The current alloc regions contain objs that have survived
+  // collection. Make them no longer GC alloc regions.
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r != NULL && r->is_empty()) {
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        r->set_zero_fill_complete();
+        put_free_region_on_list_locked(r);
+      }
+    }
+    // set_gc_alloc_region will also NULLify all aliases to the region
+    set_gc_alloc_region(ap, NULL);
+    _gc_alloc_region_counts[ap] = 0;
+  }
+}
+
+void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
+  _drain_in_progress = false;
+  set_evac_failure_closure(cl);
+  _evac_failure_scan_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+}
+
+void G1CollectedHeap::finalize_for_evac_failure() {
+  assert(_evac_failure_scan_stack != NULL &&
+         _evac_failure_scan_stack->length() == 0,
+         "Postcondition");
+  assert(!_drain_in_progress, "Postcondition");
+  // Don't have to delete, since the scan stack is a resource object.
+  _evac_failure_scan_stack = NULL;
+}
+
+
+
+// *** Sequential G1 Evacuation
+
+HeapWord* G1CollectedHeap::allocate_during_gc(GCAllocPurpose purpose, size_t word_size) {
+  HeapRegion* alloc_region = _gc_alloc_regions[purpose];
+  // let the caller handle alloc failure
+  if (alloc_region == NULL) return NULL;
+  assert(isHumongous(word_size) || !alloc_region->isHumongous(),
+         "Either the object is humongous or the region isn't");
+  HeapWord* block = alloc_region->allocate(word_size);
+  if (block == NULL) {
+    block = allocate_during_gc_slow(purpose, alloc_region, false, word_size);
+  }
+  return block;
+}
+
+class G1IsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1IsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  void do_object(oop p) { assert(false, "Do not call."); }
+  bool do_object_b(oop p) {
+    // It is reachable if it is outside the collection set, or is inside
+    // and forwarded.
+
+#ifdef G1_DEBUG
+    gclog_or_tty->print_cr("is alive "PTR_FORMAT" in CS %d forwarded %d overall %d",
+                           (void*) p, _g1->obj_in_cs(p), p->is_forwarded(),
+                           !_g1->obj_in_cs(p) || p->is_forwarded());
+#endif // G1_DEBUG
+
+    return !_g1->obj_in_cs(p) || p->is_forwarded();
+  }
+};
+
+class G1KeepAliveClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+#ifdef G1_DEBUG
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("keep alive *"PTR_FORMAT" = "PTR_FORMAT" "PTR_FORMAT,
+                             p, (void*) obj, (void*) *p);
+    }
+#endif // G1_DEBUG
+
+    if (_g1->obj_in_cs(obj)) {
+      assert( obj->is_forwarded(), "invariant" );
+      *p = obj->forwardee();
+
+#ifdef G1_DEBUG
+      gclog_or_tty->print_cr("     in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT,
+                             (void*) obj, (void*) *p);
+#endif // G1_DEBUG
+    }
+  }
+};
+
+class RecreateRSetEntriesClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem_set;
+  HeapRegion* _from;
+public:
+  RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) :
+    _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from)
+  {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (*p != NULL) {
+      _g1_rem_set->write_ref(_from, p);
+    }
+  }
+};
+
+class RemoveSelfPointerClosure: public ObjectClosure {
+private:
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  HeapRegion* _hr;
+  size_t _prev_marked_bytes;
+  size_t _next_marked_bytes;
+public:
+  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) :
+    _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr),
+    _prev_marked_bytes(0), _next_marked_bytes(0)
+  {}
+
+  size_t prev_marked_bytes() { return _prev_marked_bytes; }
+  size_t next_marked_bytes() { return _next_marked_bytes; }
+
+  // The original idea here was to coalesce evacuated and dead objects.
+  // However that caused complications with the block offset table (BOT).
+  // In particular if there were two TLABs, one of them partially refined.
+  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
+  // The BOT entries of the unrefined part of TLAB_2 point to the start
+  // of TLAB_2. If the last object of the TLAB_1 and the first object
+  // of TLAB_2 are coalesced, then the cards of the unrefined part
+  // would point into middle of the filler object.
+  //
+  // The current approach is to not coalesce and leave the BOT contents intact.
+  void do_object(oop obj) {
+    if (obj->is_forwarded() && obj->forwardee() == obj) {
+      // The object failed to move.
+      assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
+      _cm->markPrev(obj);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+      _prev_marked_bytes += (obj->size() * HeapWordSize);
+      if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
+        _cm->markAndGrayObjectIfNecessary(obj);
+      }
+      obj->set_mark(markOopDesc::prototype());
+      // While we were processing RSet buffers during the
+      // collection, we actually didn't scan any cards on the
+      // collection set, since we didn't want to update remebered
+      // sets with entries that point into the collection set, given
+      // that live objects fromthe collection set are about to move
+      // and such entries will be stale very soon. This change also
+      // dealt with a reliability issue which involved scanning a
+      // card in the collection set and coming across an array that
+      // was being chunked and looking malformed. The problem is
+      // that, if evacuation fails, we might have remembered set
+      // entries missing given that we skipped cards on the
+      // collection set. So, we'll recreate such entries now.
+      RecreateRSetEntriesClosure cl(_g1, _hr);
+      obj->oop_iterate(&cl);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+    } else {
+      // The object has been either evacuated or is dead. Fill it with a
+      // dummy object.
+      MemRegion mr((HeapWord*)obj, obj->size());
+      SharedHeap::fill_region_with_object(mr);
+      _cm->clearRangeBothMaps(mr);
+    }
+  }
+};
+
+void G1CollectedHeap::remove_self_forwarding_pointers() {
+  HeapRegion* cur = g1_policy()->collection_set();
+
+  while (cur != NULL) {
+    assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+    if (cur->evacuation_failed()) {
+      RemoveSelfPointerClosure rspc(_g1h, cur);
+      assert(cur->in_collection_set(), "bad CS");
+      cur->object_iterate(&rspc);
+
+      // A number of manipulations to make the TAMS be the current top,
+      // and the marked bytes be the ones observed in the iteration.
+      if (_g1h->concurrent_mark()->at_least_one_mark_complete()) {
+        // The comments below are the postconditions achieved by the
+        // calls.  Note especially the last such condition, which says that
+        // the count of marked bytes has been properly restored.
+        cur->note_start_of_marking(false);
+        // _next_top_at_mark_start == top, _next_marked_bytes == 0
+        cur->add_to_marked_bytes(rspc.prev_marked_bytes());
+        // _next_marked_bytes == prev_marked_bytes.
+        cur->note_end_of_marking();
+        // _prev_top_at_mark_start == top(),
+        // _prev_marked_bytes == prev_marked_bytes
+      }
+      // If there is no mark in progress, we modified the _next variables
+      // above needlessly, but harmlessly.
+      if (_g1h->mark_in_progress()) {
+        cur->note_start_of_marking(false);
+        // _next_top_at_mark_start == top, _next_marked_bytes == 0
+        // _next_marked_bytes == next_marked_bytes.
+      }
+
+      // Now make sure the region has the right index in the sorted array.
+      g1_policy()->note_change_in_marked_bytes(cur);
+    }
+    cur = cur->next_in_collection_set();
+  }
+  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+  // Now restore saved marks, if any.
+  if (_objs_with_preserved_marks != NULL) {
+    assert(_preserved_marks_of_objs != NULL, "Both or none.");
+    assert(_objs_with_preserved_marks->length() ==
+           _preserved_marks_of_objs->length(), "Both or none.");
+    guarantee(_objs_with_preserved_marks->length() ==
+              _preserved_marks_of_objs->length(), "Both or none.");
+    for (int i = 0; i < _objs_with_preserved_marks->length(); i++) {
+      oop obj   = _objs_with_preserved_marks->at(i);
+      markOop m = _preserved_marks_of_objs->at(i);
+      obj->set_mark(m);
+    }
+    // Delete the preserved marks growable arrays (allocated on the C heap).
+    delete _objs_with_preserved_marks;
+    delete _preserved_marks_of_objs;
+    _objs_with_preserved_marks = NULL;
+    _preserved_marks_of_objs = NULL;
+  }
+}
+
+void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) {
+  _evac_failure_scan_stack->push(obj);
+}
+
+void G1CollectedHeap::drain_evac_failure_scan_stack() {
+  assert(_evac_failure_scan_stack != NULL, "precondition");
+
+  while (_evac_failure_scan_stack->length() > 0) {
+     oop obj = _evac_failure_scan_stack->pop();
+     _evac_failure_closure->set_region(heap_region_containing(obj));
+     obj->oop_iterate_backwards(_evac_failure_closure);
+  }
+}
+
+void G1CollectedHeap::handle_evacuation_failure(oop old) {
+  markOop m = old->mark();
+  // forward to self
+  assert(!old->is_forwarded(), "precondition");
+
+  old->forward_to(old);
+  handle_evacuation_failure_common(old, m);
+}
+
+oop
+G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
+                                               oop old) {
+  markOop m = old->mark();
+  oop forward_ptr = old->forward_to_atomic(old);
+  if (forward_ptr == NULL) {
+    // Forward-to-self succeeded.
+    if (_evac_failure_closure != cl) {
+      MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
+      assert(!_drain_in_progress,
+             "Should only be true while someone holds the lock.");
+      // Set the global evac-failure closure to the current thread's.
+      assert(_evac_failure_closure == NULL, "Or locking has failed.");
+      set_evac_failure_closure(cl);
+      // Now do the common part.
+      handle_evacuation_failure_common(old, m);
+      // Reset to NULL.
+      set_evac_failure_closure(NULL);
+    } else {
+      // The lock is already held, and this is recursive.
+      assert(_drain_in_progress, "This should only be the recursive case.");
+      handle_evacuation_failure_common(old, m);
+    }
+    return old;
+  } else {
+    // Someone else had a place to copy it.
+    return forward_ptr;
+  }
+}
+
+void G1CollectedHeap::handle_evacuation_failure_common(oop old, markOop m) {
+  set_evacuation_failed(true);
+
+  preserve_mark_if_necessary(old, m);
+
+  HeapRegion* r = heap_region_containing(old);
+  if (!r->evacuation_failed()) {
+    r->set_evacuation_failed(true);
+    if (G1TraceRegions) {
+      gclog_or_tty->print("evacuation failed in heap region "PTR_FORMAT" "
+                          "["PTR_FORMAT","PTR_FORMAT")\n",
+                          r, r->bottom(), r->end());
+    }
+  }
+
+  push_on_evac_failure_scan_stack(old);
+
+  if (!_drain_in_progress) {
+    // prevent recursion in copy_to_survivor_space()
+    _drain_in_progress = true;
+    drain_evac_failure_scan_stack();
+    _drain_in_progress = false;
+  }
+}
+
+void G1CollectedHeap::preserve_mark_if_necessary(oop obj, markOop m) {
+  if (m != markOopDesc::prototype()) {
+    if (_objs_with_preserved_marks == NULL) {
+      assert(_preserved_marks_of_objs == NULL, "Both or none.");
+      _objs_with_preserved_marks =
+        new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+      _preserved_marks_of_objs =
+        new (ResourceObj::C_HEAP) GrowableArray<markOop>(40, true);
+    }
+    _objs_with_preserved_marks->push(obj);
+    _preserved_marks_of_objs->push(m);
+  }
+}
+
+// *** Parallel G1 Evacuation
+
+HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose,
+                                                  size_t word_size) {
+  HeapRegion* alloc_region = _gc_alloc_regions[purpose];
+  // let the caller handle alloc failure
+  if (alloc_region == NULL) return NULL;
+
+  HeapWord* block = alloc_region->par_allocate(word_size);
+  if (block == NULL) {
+    MutexLockerEx x(par_alloc_during_gc_lock(),
+                    Mutex::_no_safepoint_check_flag);
+    block = allocate_during_gc_slow(purpose, alloc_region, true, word_size);
+  }
+  return block;
+}
+
+HeapWord*
+G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose,
+                                         HeapRegion*    alloc_region,
+                                         bool           par,
+                                         size_t         word_size) {
+  HeapWord* block = NULL;
+  // In the parallel case, a previous thread to obtain the lock may have
+  // already assigned a new gc_alloc_region.
+  if (alloc_region != _gc_alloc_regions[purpose]) {
+    assert(par, "But should only happen in parallel case.");
+    alloc_region = _gc_alloc_regions[purpose];
+    if (alloc_region == NULL) return NULL;
+    block = alloc_region->par_allocate(word_size);
+    if (block != NULL) return block;
+    // Otherwise, continue; this new region is empty, too.
+  }
+  assert(alloc_region != NULL, "We better have an allocation region");
+  // Another thread might have obtained alloc_region for the given
+  // purpose, and might be attempting to allocate in it, and might
+  // succeed.  Therefore, we can't do the "finalization" stuff on the
+  // region below until we're sure the last allocation has happened.
+  // We ensure this by allocating the remaining space with a garbage
+  // object.
+  if (par) par_allocate_remaining_space(alloc_region);
+  // Now we can do the post-GC stuff on the region.
+  alloc_region->note_end_of_copying();
+  g1_policy()->record_after_bytes(alloc_region->used());
+
+  if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) {
+    // Cannot allocate more regions for the given purpose.
+    GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(purpose);
+    // Is there an alternative?
+    if (purpose != alt_purpose) {
+      HeapRegion* alt_region = _gc_alloc_regions[alt_purpose];
+      // Has not the alternative region been aliased?
+      if (alloc_region != alt_region) {
+        // Try to allocate in the alternative region.
+        if (par) {
+          block = alt_region->par_allocate(word_size);
+        } else {
+          block = alt_region->allocate(word_size);
+        }
+        // Make an alias.
+        _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose];
+      }
+      if (block != NULL) {
+        return block;
+      }
+      // Both the allocation region and the alternative one are full
+      // and aliased, replace them with a new allocation region.
+      purpose = alt_purpose;
+    } else {
+      set_gc_alloc_region(purpose, NULL);
+      return NULL;
+    }
+  }
+
+  // Now allocate a new region for allocation.
+  alloc_region = newAllocRegionWithExpansion(purpose, word_size, false /*zero_filled*/);
+
+  // let the caller handle alloc failure
+  if (alloc_region != NULL) {
+
+    assert(check_gc_alloc_regions(), "alloc regions messed up");
+    assert(alloc_region->saved_mark_at_top(),
+           "Mark should have been saved already.");
+    // We used to assert that the region was zero-filled here, but no
+    // longer.
+
+    // This must be done last: once it's installed, other regions may
+    // allocate in it (without holding the lock.)
+    set_gc_alloc_region(purpose, alloc_region);
+
+    if (par) {
+      block = alloc_region->par_allocate(word_size);
+    } else {
+      block = alloc_region->allocate(word_size);
+    }
+    // Caller handles alloc failure.
+  } else {
+    // This sets other apis using the same old alloc region to NULL, also.
+    set_gc_alloc_region(purpose, NULL);
+  }
+  return block;  // May be NULL.
+}
+
+void G1CollectedHeap::par_allocate_remaining_space(HeapRegion* r) {
+  HeapWord* block = NULL;
+  size_t free_words;
+  do {
+    free_words = r->free()/HeapWordSize;
+    // If there's too little space, no one can allocate, so we're done.
+    if (free_words < (size_t)oopDesc::header_size()) return;
+    // Otherwise, try to claim it.
+    block = r->par_allocate(free_words);
+  } while (block == NULL);
+  SharedHeap::fill_region_with_object(MemRegion(block, free_words));
+}
+
+#define use_local_bitmaps         1
+#define verify_local_bitmaps      0
+
+#ifndef PRODUCT
+
+class GCLabBitMap;
+class GCLabBitMapClosure: public BitMapClosure {
+private:
+  ConcurrentMark* _cm;
+  GCLabBitMap*    _bitmap;
+
+public:
+  GCLabBitMapClosure(ConcurrentMark* cm,
+                     GCLabBitMap* bitmap) {
+    _cm     = cm;
+    _bitmap = bitmap;
+  }
+
+  virtual bool do_bit(size_t offset);
+};
+
+#endif // PRODUCT
+
+#define oop_buffer_length 256
+
+class GCLabBitMap: public BitMap {
+private:
+  ConcurrentMark* _cm;
+
+  int       _shifter;
+  size_t    _bitmap_word_covers_words;
+
+  // beginning of the heap
+  HeapWord* _heap_start;
+
+  // this is the actual start of the GCLab
+  HeapWord* _real_start_word;
+
+  // this is the actual end of the GCLab
+  HeapWord* _real_end_word;
+
+  // this is the first word, possibly located before the actual start
+  // of the GCLab, that corresponds to the first bit of the bitmap
+  HeapWord* _start_word;
+
+  // size of a GCLab in words
+  size_t _gclab_word_size;
+
+  static int shifter() {
+    return MinObjAlignment - 1;
+  }
+
+  // how many heap words does a single bitmap word corresponds to?
+  static size_t bitmap_word_covers_words() {
+    return BitsPerWord << shifter();
+  }
+
+  static size_t gclab_word_size() {
+    return ParallelGCG1AllocBufferSize / HeapWordSize;
+  }
+
+  static size_t bitmap_size_in_bits() {
+    size_t bits_in_bitmap = gclab_word_size() >> shifter();
+    // We are going to ensure that the beginning of a word in this
+    // bitmap also corresponds to the beginning of a word in the
+    // global marking bitmap. To handle the case where a GCLab
+    // starts from the middle of the bitmap, we need to add enough
+    // space (i.e. up to a bitmap word) to ensure that we have
+    // enough bits in the bitmap.
+    return bits_in_bitmap + BitsPerWord - 1;
+  }
+public:
+  GCLabBitMap(HeapWord* heap_start)
+    : BitMap(bitmap_size_in_bits()),
+      _cm(G1CollectedHeap::heap()->concurrent_mark()),
+      _shifter(shifter()),
+      _bitmap_word_covers_words(bitmap_word_covers_words()),
+      _heap_start(heap_start),
+      _gclab_word_size(gclab_word_size()),
+      _real_start_word(NULL),
+      _real_end_word(NULL),
+      _start_word(NULL)
+  {
+    guarantee( size_in_words() >= bitmap_size_in_words(),
+               "just making sure");
+  }
+
+  inline unsigned heapWordToOffset(HeapWord* addr) {
+    unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter;
+    assert(offset < size(), "offset should be within bounds");
+    return offset;
+  }
+
+  inline HeapWord* offsetToHeapWord(size_t offset) {
+    HeapWord* addr =  _start_word + (offset << _shifter);
+    assert(_real_start_word <= addr && addr < _real_end_word, "invariant");
+    return addr;
+  }
+
+  bool fields_well_formed() {
+    bool ret1 = (_real_start_word == NULL) &&
+                (_real_end_word == NULL) &&
+                (_start_word == NULL);
+    if (ret1)
+      return true;
+
+    bool ret2 = _real_start_word >= _start_word &&
+      _start_word < _real_end_word &&
+      (_real_start_word + _gclab_word_size) == _real_end_word &&
+      (_start_word + _gclab_word_size + _bitmap_word_covers_words)
+                                                              > _real_end_word;
+    return ret2;
+  }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (addr >= _real_start_word && addr < _real_end_word) {
+      assert(!isMarked(addr), "should not have already been marked");
+
+      // first mark it on the bitmap
+      at_put(heapWordToOffset(addr), true);
+
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool isMarked(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    return at(heapWordToOffset(addr));
+  }
+
+  void set_buffer(HeapWord* start) {
+    guarantee(use_local_bitmaps, "invariant");
+    clear();
+
+    assert(start != NULL, "invariant");
+    _real_start_word = start;
+    _real_end_word   = start + _gclab_word_size;
+
+    size_t diff =
+      pointer_delta(start, _heap_start) % _bitmap_word_covers_words;
+    _start_word = start - diff;
+
+    assert(fields_well_formed(), "invariant");
+  }
+
+#ifndef PRODUCT
+  void verify() {
+    // verify that the marks have been propagated
+    GCLabBitMapClosure cl(_cm, this);
+    iterate(&cl);
+  }
+#endif // PRODUCT
+
+  void retire() {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (_start_word != NULL) {
+      CMBitMap*       mark_bitmap = _cm->nextMarkBitMap();
+
+      // this means that the bitmap was set up for the GCLab
+      assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
+
+      mark_bitmap->mostly_disjoint_range_union(this,
+                                0, // always start from the start of the bitmap
+                                _start_word,
+                                size_in_words());
+      _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word));
+
+#ifndef PRODUCT
+      if (use_local_bitmaps && verify_local_bitmaps)
+        verify();
+#endif // PRODUCT
+    } else {
+      assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
+    }
+  }
+
+  static size_t bitmap_size_in_words() {
+    return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord;
+  }
+};
+
+#ifndef PRODUCT
+
+bool GCLabBitMapClosure::do_bit(size_t offset) {
+  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
+  guarantee(_cm->isMarked(oop(addr)), "it should be!");
+  return true;
+}
+
+#endif // PRODUCT
+
+class G1ParGCAllocBuffer: public ParGCAllocBuffer {
+private:
+  bool        _retired;
+  bool        _during_marking;
+  GCLabBitMap _bitmap;
+
+public:
+  G1ParGCAllocBuffer() :
+    ParGCAllocBuffer(ParallelGCG1AllocBufferSize / HeapWordSize),
+    _during_marking(G1CollectedHeap::heap()->mark_in_progress()),
+    _bitmap(G1CollectedHeap::heap()->reserved_region().start()),
+    _retired(false)
+  { }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(_during_marking, "invariant");
+    return _bitmap.mark(addr);
+  }
+
+  inline void set_buf(HeapWord* buf) {
+    if (use_local_bitmaps && _during_marking)
+      _bitmap.set_buffer(buf);
+    ParGCAllocBuffer::set_buf(buf);
+    _retired = false;
+  }
+
+  inline void retire(bool end_of_gc, bool retain) {
+    if (_retired)
+      return;
+    if (use_local_bitmaps && _during_marking) {
+      _bitmap.retire();
+    }
+    ParGCAllocBuffer::retire(end_of_gc, retain);
+    _retired = true;
+  }
+};
+
+
+class G1ParScanThreadState : public StackObj {
+protected:
+  G1CollectedHeap* _g1h;
+  RefToScanQueue*  _refs;
+
+  typedef GrowableArray<oop*> OverflowQueue;
+  OverflowQueue* _overflowed_refs;
+
+  G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+
+  size_t           _alloc_buffer_waste;
+  size_t           _undo_waste;
+
+  OopsInHeapRegionClosure*      _evac_failure_cl;
+  G1ParScanHeapEvacClosure*     _evac_cl;
+  G1ParScanPartialArrayClosure* _partial_scan_cl;
+
+  int _hash_seed;
+  int _queue_num;
+
+  int _term_attempts;
+#if G1_DETAILED_STATS
+  int _pushes, _pops, _steals, _steal_attempts;
+  int _overflow_pushes;
+#endif
+
+  double _start;
+  double _start_strong_roots;
+  double _strong_roots_time;
+  double _start_term;
+  double _term_time;
+
+  // Map from young-age-index (0 == not young, 1 is youngest) to
+  // surviving words. base is what we get back from the malloc call
+  size_t* _surviving_young_words_base;
+  // this points into the array, as we use the first few entries for padding
+  size_t* _surviving_young_words;
+
+#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+
+  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
+
+  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
+
+public:
+  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
+    : _g1h(g1h),
+      _refs(g1h->task_queue(queue_num)),
+      _hash_seed(17), _queue_num(queue_num),
+      _term_attempts(0),
+#if G1_DETAILED_STATS
+      _pushes(0), _pops(0), _steals(0),
+      _steal_attempts(0),  _overflow_pushes(0),
+#endif
+      _strong_roots_time(0), _term_time(0),
+      _alloc_buffer_waste(0), _undo_waste(0)
+  {
+    // we allocate G1YoungSurvRateNumRegions plus one entries, since
+    // we "sacrifice" entry 0 to keep track of surviving bytes for
+    // non-young regions (where the age is -1)
+    // We also add a few elements at the beginning and at the end in
+    // an attempt to eliminate cache contention
+    size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
+    size_t array_length = PADDING_ELEM_NUM +
+                          real_length +
+                          PADDING_ELEM_NUM;
+    _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length);
+    if (_surviving_young_words_base == NULL)
+      vm_exit_out_of_memory(array_length * sizeof(size_t),
+                            "Not enough space for young surv histo.");
+    _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
+    memset(_surviving_young_words, 0, real_length * sizeof(size_t));
+
+    _overflowed_refs = new OverflowQueue(10);
+
+    _start = os::elapsedTime();
+  }
+
+  ~G1ParScanThreadState() {
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
+  }
+
+  RefToScanQueue*   refs()            { return _refs;             }
+  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
+
+  inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+    return &_alloc_buffers[purpose];
+  }
+
+  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
+  size_t undo_waste()                            { return _undo_waste; }
+
+  void push_on_queue(oop* ref) {
+    if (!refs()->push(ref)) {
+      overflowed_refs()->push(ref);
+      IF_G1_DETAILED_STATS(note_overflow_push());
+    } else {
+      IF_G1_DETAILED_STATS(note_push());
+    }
+  }
+
+  void pop_from_queue(oop*& ref) {
+    if (!refs()->pop_local(ref)) {
+      ref = NULL;
+    } else {
+      IF_G1_DETAILED_STATS(note_pop());
+    }
+  }
+
+  void pop_from_overflow_queue(oop*& ref) {
+    ref = overflowed_refs()->pop();
+  }
+
+  int refs_to_scan()                             { return refs()->size();                 }
+  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+
+  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
+
+    HeapWord* obj = NULL;
+    if (word_sz * 100 <
+        (size_t)(ParallelGCG1AllocBufferSize / HeapWordSize) *
+                                                  ParallelGCBufferWastePct) {
+      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
+      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
+      alloc_buf->retire(false, false);
+
+      HeapWord* buf =
+        _g1h->par_allocate_during_gc(purpose, ParallelGCG1AllocBufferSize / HeapWordSize);
+      if (buf == NULL) return NULL; // Let caller handle allocation failure.
+      // Otherwise.
+      alloc_buf->set_buf(buf);
+
+      obj = alloc_buf->allocate(word_sz);
+      assert(obj != NULL, "buffer was definitely big enough...");
+    }
+    else {
+      obj = _g1h->par_allocate_during_gc(purpose, word_sz);
+    }
+    return obj;
+  }
+
+  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) {
+    HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+    if (obj != NULL) return obj;
+    return allocate_slow(purpose, word_sz);
+  }
+
+  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
+    if (alloc_buffer(purpose)->contains(obj)) {
+      guarantee(alloc_buffer(purpose)->contains(obj + word_sz - 1),
+                "should contain whole object");
+      alloc_buffer(purpose)->undo_allocation(obj, word_sz);
+    }
+    else {
+      SharedHeap::fill_region_with_object(MemRegion(obj, word_sz));
+      add_to_undo_waste(word_sz);
+    }
+  }
+
+  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
+    _evac_failure_cl = evac_failure_cl;
+  }
+  OopsInHeapRegionClosure* evac_failure_closure() {
+    return _evac_failure_cl;
+  }
+
+  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
+    _evac_cl = evac_cl;
+  }
+
+  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
+    _partial_scan_cl = partial_scan_cl;
+  }
+
+  int* hash_seed() { return &_hash_seed; }
+  int  queue_num() { return _queue_num; }
+
+  int term_attempts()   { return _term_attempts; }
+  void note_term_attempt()  { _term_attempts++; }
+
+#if G1_DETAILED_STATS
+  int pushes()          { return _pushes; }
+  int pops()            { return _pops; }
+  int steals()          { return _steals; }
+  int steal_attempts()  { return _steal_attempts; }
+  int overflow_pushes() { return _overflow_pushes; }
+
+  void note_push()          { _pushes++; }
+  void note_pop()           { _pops++; }
+  void note_steal()         { _steals++; }
+  void note_steal_attempt() { _steal_attempts++; }
+  void note_overflow_push() { _overflow_pushes++; }
+#endif
+
+  void start_strong_roots() {
+    _start_strong_roots = os::elapsedTime();
+  }
+  void end_strong_roots() {
+    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
+  }
+  double strong_roots_time() { return _strong_roots_time; }
+
+  void start_term_time() {
+    note_term_attempt();
+    _start_term = os::elapsedTime();
+  }
+  void end_term_time() {
+    _term_time += (os::elapsedTime() - _start_term);
+  }
+  double term_time() { return _term_time; }
+
+  double elapsed() {
+    return os::elapsedTime() - _start;
+  }
+
+  size_t* surviving_young_words() {
+    // We add on to hide entry 0 which accumulates surviving words for
+    // age -1 regions (i.e. non-young ones)
+    return _surviving_young_words;
+  }
+
+  void retire_alloc_buffers() {
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      size_t waste = _alloc_buffers[ap].words_remaining();
+      add_to_alloc_buffer_waste(waste);
+      _alloc_buffers[ap].retire(true, false);
+    }
+  }
+
+  void trim_queue() {
+    while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
+      oop *ref_to_scan = NULL;
+      if (overflowed_refs_to_scan() == 0) {
+        pop_from_queue(ref_to_scan);
+      } else {
+        pop_from_overflow_queue(ref_to_scan);
+      }
+      if (ref_to_scan != NULL) {
+        if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) {
+          _partial_scan_cl->do_oop_nv(ref_to_scan);
+        } else {
+          // Note: we can use "raw" versions of "region_containing" because
+          // "obj_to_scan" is definitely in the heap, and is not in a
+          // humongous region.
+          HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+          _evac_cl->set_region(r);
+          _evac_cl->do_oop_nv(ref_to_scan);
+        }
+      }
+    }
+  }
+};
+
+
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+  _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
+  _par_scan_state(par_scan_state) { }
+
+// This closure is applied to the fields of the objects that have just been copied.
+// Should probably be made inline and moved in g1OopClosures.inline.hpp.
+void G1ParScanClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL) {
+    if (_g1->obj_in_cs(obj)) {
+      if (obj->is_forwarded()) {
+        *p = obj->forwardee();
+      } else {
+        _par_scan_state->push_on_queue(p);
+        return;
+      }
+    }
+    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+  }
+}
+
+void G1ParCopyHelper::mark_forwardee(oop* p) {
+  // This is called _after_ do_oop_work has been called, hence after
+  // the object has been relocated to its new location and *p points
+  // to its new location.
+
+  oop thisOop = *p;
+  if (thisOop != NULL) {
+    assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(thisOop)),
+           "shouldn't still be in the CSet if evacuation didn't fail.");
+    HeapWord* addr = (HeapWord*)thisOop;
+    if (_g1->is_in_g1_reserved(addr))
+      _cm->grayRoot(oop(addr));
+  }
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
+  size_t    word_sz = old->size();
+  HeapRegion* from_region = _g1->heap_region_containing_raw(old);
+  // +1 to make the -1 indexes valid...
+  int       young_index = from_region->young_index_in_cset()+1;
+  assert( (from_region->is_young() && young_index > 0) ||
+          (!from_region->is_young() && young_index == 0), "invariant" );
+  G1CollectorPolicy* g1p = _g1->g1_policy();
+  markOop m = old->mark();
+  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(),
+                                                             word_sz);
+  HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
+  oop       obj     = oop(obj_ptr);
+
+  if (obj_ptr == NULL) {
+    // This will either forward-to-self, or detect that someone else has
+    // installed a forwarding pointer.
+    OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
+    return _g1->handle_evacuation_failure_par(cl, old);
+  }
+
+  oop forward_ptr = old->forward_to_atomic(obj);
+  if (forward_ptr == NULL) {
+    Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
+    obj->set_mark(m);
+    if (g1p->track_object_age(alloc_purpose)) {
+      obj->incr_age();
+    }
+    // preserve "next" mark bit
+    if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
+      if (!use_local_bitmaps ||
+          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
+        // if we couldn't mark it on the local bitmap (this happens when
+        // the object was not allocated in the GCLab), we have to bite
+        // the bullet and do the standard parallel mark
+        _cm->markAndGrayObjectIfNecessary(obj);
+      }
+#if 1
+      if (_g1->isMarkedNext(old)) {
+        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
+      }
+#endif
+    }
+
+    size_t* surv_young_words = _par_scan_state->surviving_young_words();
+    surv_young_words[young_index] += word_sz;
+
+    if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
+      arrayOop(old)->set_length(0);
+      _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK));
+    } else {
+      _scanner->set_region(_g1->heap_region_containing(obj));
+      obj->oop_iterate_backwards(_scanner);
+    }
+  } else {
+    _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz);
+    obj = forward_ptr;
+  }
+  return obj;
+}
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_forwardee>::do_oop_work(oop* p) {
+  oop obj = *p;
+  assert(barrier != G1BarrierRS || obj != NULL,
+         "Precondition: G1BarrierRS implies obj is nonNull");
+
+  if (obj != NULL) {
+    if (_g1->obj_in_cs(obj)) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.",
+                             p, (void*) obj);
+#endif
+      if (obj->is_forwarded()) {
+        *p = obj->forwardee();
+      } else {
+        *p = copy_to_survivor_space(obj);
+      }
+      // When scanning the RS, we only care about objs in CS.
+      if (barrier == G1BarrierRS) {
+        _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+      }
+    }
+    // When scanning moved objs, must look at all oops.
+    if (barrier == G1BarrierEvac) {
+      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+    }
+
+    if (do_gen_barrier) {
+      par_do_barrier(p);
+    }
+  }
+}
+
+template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
+
+template <class T> void G1ParScanPartialArrayClosure::process_array_chunk(
+  oop obj, int start, int end) {
+  // process our set of indices (include header in first chunk)
+  assert(start < end, "invariant");
+  T* const base      = (T*)objArrayOop(obj)->base();
+  T* const start_addr = base + start;
+  T* const end_addr   = base + end;
+  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
+  _scanner.set_region(_g1->heap_region_containing(obj));
+  obj->oop_iterate(&_scanner, mr);
+}
+
+void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
+  assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops");
+  oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK);
+  assert(old->is_objArray(), "must be obj array");
+  assert(old->is_forwarded(), "must be forwarded");
+  assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
+
+  objArrayOop obj = objArrayOop(old->forwardee());
+  assert((void*)old != (void*)old->forwardee(), "self forwarding here?");
+  // Process ParGCArrayScanChunk elements now
+  // and push the remainder back onto queue
+  int start     = arrayOop(old)->length();
+  int end       = obj->length();
+  int remainder = end - start;
+  assert(start <= end, "just checking");
+  if (remainder > 2 * ParGCArrayScanChunk) {
+    // Test above combines last partial chunk with a full chunk
+    end = start + ParGCArrayScanChunk;
+    arrayOop(old)->set_length(end);
+    // Push remainder.
+    _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK));
+  } else {
+    // Restore length so that the heap remains parsable in
+    // case of evacuation failure.
+    arrayOop(old)->set_length(end);
+  }
+
+  // process our set of indices (include header in first chunk)
+  process_array_chunk<oop>(obj, start, end);
+  oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr<oop>(start);
+  oop* end_addr   = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length
+  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
+  _scanner.set_region(_g1->heap_region_containing(obj));
+  obj->oop_iterate(&_scanner, mr);
+}
+
+int G1ScanAndBalanceClosure::_nq = 0;
+
+class G1ParEvacuateFollowersClosure : public VoidClosure {
+protected:
+  G1CollectedHeap*              _g1h;
+  G1ParScanThreadState*         _par_scan_state;
+  RefToScanQueueSet*            _queues;
+  ParallelTaskTerminator*       _terminator;
+
+  G1ParScanThreadState*   par_scan_state() { return _par_scan_state; }
+  RefToScanQueueSet*      queues()         { return _queues; }
+  ParallelTaskTerminator* terminator()     { return _terminator; }
+
+public:
+  G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h,
+                                G1ParScanThreadState* par_scan_state,
+                                RefToScanQueueSet* queues,
+                                ParallelTaskTerminator* terminator)
+    : _g1h(g1h), _par_scan_state(par_scan_state),
+      _queues(queues), _terminator(terminator) {}
+
+  void do_void() {
+    G1ParScanThreadState* pss = par_scan_state();
+    while (true) {
+      oop* ref_to_scan;
+      pss->trim_queue();
+      IF_G1_DETAILED_STATS(pss->note_steal_attempt());
+      if (queues()->steal(pss->queue_num(),
+                          pss->hash_seed(),
+                          ref_to_scan)) {
+        IF_G1_DETAILED_STATS(pss->note_steal());
+        pss->push_on_queue(ref_to_scan);
+        continue;
+      }
+      pss->start_term_time();
+      if (terminator()->offer_termination()) break;
+      pss->end_term_time();
+    }
+    pss->end_term_time();
+    pss->retire_alloc_buffers();
+  }
+};
+
+class G1ParTask : public AbstractGangTask {
+protected:
+  G1CollectedHeap*       _g1h;
+  RefToScanQueueSet      *_queues;
+  ParallelTaskTerminator _terminator;
+
+  Mutex _stats_lock;
+  Mutex* stats_lock() { return &_stats_lock; }
+
+  size_t getNCards() {
+    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
+      / G1BlockOffsetSharedArray::N_bytes;
+  }
+
+public:
+  G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues)
+    : AbstractGangTask("G1 collection"),
+      _g1h(g1h),
+      _queues(task_queues),
+      _terminator(workers, _queues),
+      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
+  {}
+
+  RefToScanQueueSet* queues() { return _queues; }
+
+  RefToScanQueue *work_queue(int i) {
+    return queues()->queue(i);
+  }
+
+  void work(int i) {
+    ResourceMark rm;
+    HandleMark   hm;
+
+    G1ParScanThreadState pss(_g1h, i);
+    G1ParScanHeapEvacClosure     scan_evac_cl(_g1h, &pss);
+    G1ParScanHeapEvacClosure     evac_failure_cl(_g1h, &pss);
+    G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss);
+
+    pss.set_evac_closure(&scan_evac_cl);
+    pss.set_evac_failure_closure(&evac_failure_cl);
+    pss.set_partial_scan_closure(&partial_scan_cl);
+
+    G1ParScanExtRootClosure         only_scan_root_cl(_g1h, &pss);
+    G1ParScanPermClosure            only_scan_perm_cl(_g1h, &pss);
+    G1ParScanHeapRSClosure          only_scan_heap_rs_cl(_g1h, &pss);
+    G1ParScanAndMarkExtRootClosure  scan_mark_root_cl(_g1h, &pss);
+    G1ParScanAndMarkPermClosure     scan_mark_perm_cl(_g1h, &pss);
+    G1ParScanAndMarkHeapRSClosure   scan_mark_heap_rs_cl(_g1h, &pss);
+
+    OopsInHeapRegionClosure        *scan_root_cl;
+    OopsInHeapRegionClosure        *scan_perm_cl;
+    OopsInHeapRegionClosure        *scan_so_cl;
+
+    if (_g1h->g1_policy()->should_initiate_conc_mark()) {
+      scan_root_cl = &scan_mark_root_cl;
+      scan_perm_cl = &scan_mark_perm_cl;
+      scan_so_cl   = &scan_mark_heap_rs_cl;
+    } else {
+      scan_root_cl = &only_scan_root_cl;
+      scan_perm_cl = &only_scan_perm_cl;
+      scan_so_cl   = &only_scan_heap_rs_cl;
+    }
+
+    pss.start_strong_roots();
+    _g1h->g1_process_strong_roots(/* not collecting perm */ false,
+                                  SharedHeap::SO_AllClasses,
+                                  scan_root_cl,
+                                  &only_scan_heap_rs_cl,
+                                  scan_so_cl,
+                                  scan_perm_cl,
+                                  i);
+    pss.end_strong_roots();
+    {
+      double start = os::elapsedTime();
+      G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
+      evac.do_void();
+      double elapsed_ms = (os::elapsedTime()-start)*1000.0;
+      double term_ms = pss.term_time()*1000.0;
+      _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
+      _g1h->g1_policy()->record_termination_time(i, term_ms);
+    }
+    _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
+
+    // Clean up any par-expanded rem sets.
+    HeapRegionRemSet::par_cleanup();
+
+    MutexLocker x(stats_lock());
+    if (ParallelGCVerbose) {
+      gclog_or_tty->print("Thread %d complete:\n", i);
+#if G1_DETAILED_STATS
+      gclog_or_tty->print("  Pushes: %7d    Pops: %7d   Overflows: %7d   Steals %7d (in %d attempts)\n",
+                          pss.pushes(),
+                          pss.pops(),
+                          pss.overflow_pushes(),
+                          pss.steals(),
+                          pss.steal_attempts());
+#endif
+      double elapsed      = pss.elapsed();
+      double strong_roots = pss.strong_roots_time();
+      double term         = pss.term_time();
+      gclog_or_tty->print("  Elapsed: %7.2f ms.\n"
+                          "    Strong roots: %7.2f ms (%6.2f%%)\n"
+                          "    Termination:  %7.2f ms (%6.2f%%) (in %d entries)\n",
+                          elapsed * 1000.0,
+                          strong_roots * 1000.0, (strong_roots*100.0/elapsed),
+                          term * 1000.0, (term*100.0/elapsed),
+                          pss.term_attempts());
+      size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste();
+      gclog_or_tty->print("  Waste: %8dK\n"
+                 "    Alloc Buffer: %8dK\n"
+                 "    Undo: %8dK\n",
+                 (total_waste * HeapWordSize) / K,
+                 (pss.alloc_buffer_waste() * HeapWordSize) / K,
+                 (pss.undo_waste() * HeapWordSize) / K);
+    }
+
+    assert(pss.refs_to_scan() == 0, "Task queue should be empty");
+    assert(pss.overflowed_refs_to_scan() == 0, "Overflow queue should be empty");
+  }
+};
+
+// *** Common G1 Evacuation Stuff
+
+class G1CountClosure: public OopsInHeapRegionClosure {
+public:
+  int n;
+  G1CountClosure() : n(0) {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    assert(obj != NULL && G1CollectedHeap::heap()->obj_in_cs(obj),
+           "Rem set closure called on non-rem-set pointer.");
+    n++;
+  }
+};
+
+static G1CountClosure count_closure;
+
+void
+G1CollectedHeap::
+g1_process_strong_roots(bool collecting_perm_gen,
+                        SharedHeap::ScanningOption so,
+                        OopClosure* scan_non_heap_roots,
+                        OopsInHeapRegionClosure* scan_rs,
+                        OopsInHeapRegionClosure* scan_so,
+                        OopsInGenClosure* scan_perm,
+                        int worker_i) {
+  // First scan the strong roots, including the perm gen.
+  double ext_roots_start = os::elapsedTime();
+  double closure_app_time_sec = 0.0;
+
+  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
+  BufferingOopsInGenClosure buf_scan_perm(scan_perm);
+  buf_scan_perm.set_generation(perm_gen());
+
+  process_strong_roots(collecting_perm_gen, so,
+                       &buf_scan_non_heap_roots,
+                       &buf_scan_perm);
+  // Finish up any enqueued closure apps.
+  buf_scan_non_heap_roots.done();
+  buf_scan_perm.done();
+  double ext_roots_end = os::elapsedTime();
+  g1_policy()->reset_obj_copy_time(worker_i);
+  double obj_copy_time_sec =
+    buf_scan_non_heap_roots.closure_app_seconds() +
+    buf_scan_perm.closure_app_seconds();
+  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
+  double ext_root_time_ms =
+    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
+  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
+
+  // Scan strong roots in mark stack.
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
+    concurrent_mark()->oops_do(scan_non_heap_roots);
+  }
+  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+
+  // XXX What should this be doing in the parallel case?
+  g1_policy()->record_collection_pause_end_CH_strong_roots();
+  if (G1VerifyRemSet) {
+    // :::: FIXME ::::
+    // The stupid remembered set doesn't know how to filter out dead
+    // objects, which the smart one does, and so when it is created
+    // and then compared the number of entries in each differs and
+    // the verification code fails.
+    guarantee(false, "verification code is broken, see note");
+
+    // Let's make sure that the current rem set agrees with the stupidest
+    // one possible!
+    bool refs_enabled = ref_processor()->discovery_enabled();
+    if (refs_enabled) ref_processor()->disable_discovery();
+    StupidG1RemSet stupid(this);
+    count_closure.n = 0;
+    stupid.oops_into_collection_set_do(&count_closure, worker_i);
+    int stupid_n = count_closure.n;
+    count_closure.n = 0;
+    g1_rem_set()->oops_into_collection_set_do(&count_closure, worker_i);
+    guarantee(count_closure.n == stupid_n, "Old and new rem sets differ.");
+    gclog_or_tty->print_cr("\nFound %d pointers in heap RS.", count_closure.n);
+    if (refs_enabled) ref_processor()->enable_discovery();
+  }
+  if (scan_so != NULL) {
+    scan_scan_only_set(scan_so, worker_i);
+  }
+  // Now scan the complement of the collection set.
+  if (scan_rs != NULL) {
+    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
+  }
+  // Finish with the ref_processor roots.
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
+    ref_processor()->oops_do(scan_non_heap_roots);
+  }
+  g1_policy()->record_collection_pause_end_G1_strong_roots();
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void
+G1CollectedHeap::scan_scan_only_region(HeapRegion* r,
+                                       OopsInHeapRegionClosure* oc,
+                                       int worker_i) {
+  HeapWord* startAddr = r->bottom();
+  HeapWord* endAddr = r->used_region().end();
+
+  oc->set_region(r);
+
+  HeapWord* p = r->bottom();
+  HeapWord* t = r->top();
+  guarantee( p == r->next_top_at_mark_start(), "invariant" );
+  while (p < t) {
+    oop obj = oop(p);
+    p += obj->oop_iterate(oc);
+  }
+}
+
+void
+G1CollectedHeap::scan_scan_only_set(OopsInHeapRegionClosure* oc,
+                                    int worker_i) {
+  double start = os::elapsedTime();
+
+  BufferingOopsInHeapRegionClosure boc(oc);
+
+  FilterInHeapRegionAndIntoCSClosure scan_only(this, &boc);
+  FilterAndMarkInHeapRegionAndIntoCSClosure scan_and_mark(this, &boc, concurrent_mark());
+
+  OopsInHeapRegionClosure *foc;
+  if (g1_policy()->should_initiate_conc_mark())
+    foc = &scan_and_mark;
+  else
+    foc = &scan_only;
+
+  HeapRegion* hr;
+  int n = 0;
+  while ((hr = _young_list->par_get_next_scan_only_region()) != NULL) {
+    scan_scan_only_region(hr, foc, worker_i);
+    ++n;
+  }
+  boc.done();
+
+  double closure_app_s = boc.closure_app_seconds();
+  g1_policy()->record_obj_copy_time(worker_i, closure_app_s * 1000.0);
+  double ms = (os::elapsedTime() - start - closure_app_s)*1000.0;
+  g1_policy()->record_scan_only_time(worker_i, ms, n);
+}
+
+void
+G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure,
+                                       OopClosure* non_root_closure) {
+  SharedHeap::process_weak_roots(root_closure, non_root_closure);
+}
+
+
+class SaveMarksClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    r->save_marks();
+    return false;
+  }
+};
+
+void G1CollectedHeap::save_marks() {
+  if (ParallelGCThreads == 0) {
+    SaveMarksClosure sm;
+    heap_region_iterate(&sm);
+  }
+  // We do this even in the parallel case
+  perm_gen()->save_marks();
+}
+
+void G1CollectedHeap::evacuate_collection_set() {
+  set_evacuation_failed(false);
+
+  g1_rem_set()->prepare_for_oops_into_collection_set_do();
+  concurrent_g1_refine()->set_use_cache(false);
+  int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
+
+  set_par_threads(n_workers);
+  G1ParTask g1_par_task(this, n_workers, _task_queues);
+
+  init_for_evac_failure(NULL);
+
+  change_strong_roots_parity();  // In preparation for parallel strong roots.
+  rem_set()->prepare_for_younger_refs_iterate(true);
+  double start_par = os::elapsedTime();
+
+  if (ParallelGCThreads > 0) {
+    // The individual threads will set their evac-failure closures.
+    workers()->run_task(&g1_par_task);
+  } else {
+    g1_par_task.work(0);
+  }
+
+  double par_time = (os::elapsedTime() - start_par) * 1000.0;
+  g1_policy()->record_par_time(par_time);
+  set_par_threads(0);
+  // Is this the right thing to do here?  We don't save marks
+  // on individual heap regions when we allocate from
+  // them in parallel, so this seems like the correct place for this.
+  all_alloc_regions_note_end_of_copying();
+  {
+    G1IsAliveClosure is_alive(this);
+    G1KeepAliveClosure keep_alive(this);
+    JNIHandles::weak_oops_do(&is_alive, &keep_alive);
+  }
+
+  g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+  concurrent_g1_refine()->set_use_cache(true);
+
+  finalize_for_evac_failure();
+
+  // Must do this before removing self-forwarding pointers, which clears
+  // the per-region evac-failure flags.
+  concurrent_mark()->complete_marking_in_collection_set();
+
+  if (evacuation_failed()) {
+    remove_self_forwarding_pointers();
+
+    if (PrintGCDetails) {
+      gclog_or_tty->print(" (evacuation failed)");
+    } else if (PrintGC) {
+      gclog_or_tty->print("--");
+    }
+  }
+
+  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+}
+
+void G1CollectedHeap::free_region(HeapRegion* hr) {
+  size_t pre_used = 0;
+  size_t cleared_h_regions = 0;
+  size_t freed_regions = 0;
+  UncleanRegionList local_list;
+
+  HeapWord* start = hr->bottom();
+  HeapWord* end   = hr->prev_top_at_mark_start();
+  size_t used_bytes = hr->used();
+  size_t live_bytes = hr->max_live_bytes();
+  if (used_bytes > 0) {
+    guarantee( live_bytes <= used_bytes, "invariant" );
+  } else {
+    guarantee( live_bytes == 0, "invariant" );
+  }
+
+  size_t garbage_bytes = used_bytes - live_bytes;
+  if (garbage_bytes > 0)
+    g1_policy()->decrease_known_garbage_bytes(garbage_bytes);
+
+  free_region_work(hr, pre_used, cleared_h_regions, freed_regions,
+                   &local_list);
+  finish_free_region_work(pre_used, cleared_h_regions, freed_regions,
+                          &local_list);
+}
+
+void
+G1CollectedHeap::free_region_work(HeapRegion* hr,
+                                  size_t& pre_used,
+                                  size_t& cleared_h_regions,
+                                  size_t& freed_regions,
+                                  UncleanRegionList* list,
+                                  bool par) {
+  assert(!hr->popular(), "should not free popular regions");
+  pre_used += hr->used();
+  if (hr->isHumongous()) {
+    assert(hr->startsHumongous(),
+           "Only the start of a humongous region should be freed.");
+    int ind = _hrs->find(hr);
+    assert(ind != -1, "Should have an index.");
+    // Clear the start region.
+    hr->hr_clear(par, true /*clear_space*/);
+    list->insert_before_head(hr);
+    cleared_h_regions++;
+    freed_regions++;
+    // Clear any continued regions.
+    ind++;
+    while ((size_t)ind < n_regions()) {
+      HeapRegion* hrc = _hrs->at(ind);
+      if (!hrc->continuesHumongous()) break;
+      // Otherwise, does continue the H region.
+      assert(hrc->humongous_start_region() == hr, "Huh?");
+      hrc->hr_clear(par, true /*clear_space*/);
+      cleared_h_regions++;
+      freed_regions++;
+      list->insert_before_head(hrc);
+      ind++;
+    }
+  } else {
+    hr->hr_clear(par, true /*clear_space*/);
+    list->insert_before_head(hr);
+    freed_regions++;
+    // If we're using clear2, this should not be enabled.
+    // assert(!hr->in_cohort(), "Can't be both free and in a cohort.");
+  }
+}
+
+void G1CollectedHeap::finish_free_region_work(size_t pre_used,
+                                              size_t cleared_h_regions,
+                                              size_t freed_regions,
+                                              UncleanRegionList* list) {
+  if (list != NULL && list->sz() > 0) {
+    prepend_region_list_on_unclean_list(list);
+  }
+  // Acquire a lock, if we're parallel, to update possibly-shared
+  // variables.
+  Mutex* lock = (n_par_threads() > 0) ? ParGCRareEvent_lock : NULL;
+  {
+    MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
+    _summary_bytes_used -= pre_used;
+    _num_humongous_regions -= (int) cleared_h_regions;
+    _free_regions += freed_regions;
+  }
+}
+
+
+void G1CollectedHeap::dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list) {
+  while (list != NULL) {
+    guarantee( list->is_young(), "invariant" );
+
+    HeapWord* bottom = list->bottom();
+    HeapWord* end = list->end();
+    MemRegion mr(bottom, end);
+    ct_bs->dirty(mr);
+
+    list = list->get_next_young_region();
+  }
+}
+
+void G1CollectedHeap::cleanUpCardTable() {
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
+  double start = os::elapsedTime();
+
+  ct_bs->clear(_g1_committed);
+
+  // now, redirty the cards of the scan-only and survivor regions
+  // (it seemed faster to do it this way, instead of iterating over
+  // all regions and then clearing / dirtying as approprite)
+  dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region());
+  dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region());
+
+  double elapsed = os::elapsedTime() - start;
+  g1_policy()->record_clear_ct_time( elapsed * 1000.0);
+}
+
+
+void G1CollectedHeap::do_collection_pause_if_appropriate(size_t word_size) {
+  // First do any popular regions.
+  HeapRegion* hr;
+  while ((hr = popular_region_to_evac()) != NULL) {
+    evac_popular_region(hr);
+  }
+  // Now do heuristic pauses.
+  if (g1_policy()->should_do_collection_pause(word_size)) {
+    do_collection_pause();
+  }
+}
+
+void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) {
+  double young_time_ms     = 0.0;
+  double non_young_time_ms = 0.0;
+
+  G1CollectorPolicy* policy = g1_policy();
+
+  double start_sec = os::elapsedTime();
+  bool non_young = true;
+
+  HeapRegion* cur = cs_head;
+  int age_bound = -1;
+  size_t rs_lengths = 0;
+
+  while (cur != NULL) {
+    if (non_young) {
+      if (cur->is_young()) {
+        double end_sec = os::elapsedTime();
+        double elapsed_ms = (end_sec - start_sec) * 1000.0;
+        non_young_time_ms += elapsed_ms;
+
+        start_sec = os::elapsedTime();
+        non_young = false;
+      }
+    } else {
+      if (!cur->is_on_free_list()) {
+        double end_sec = os::elapsedTime();
+        double elapsed_ms = (end_sec - start_sec) * 1000.0;
+        young_time_ms += elapsed_ms;
+
+        start_sec = os::elapsedTime();
+        non_young = true;
+      }
+    }
+
+    rs_lengths += cur->rem_set()->occupied();
+
+    HeapRegion* next = cur->next_in_collection_set();
+    assert(cur->in_collection_set(), "bad CS");
+    cur->set_next_in_collection_set(NULL);
+    cur->set_in_collection_set(false);
+
+    if (cur->is_young()) {
+      int index = cur->young_index_in_cset();
+      guarantee( index != -1, "invariant" );
+      guarantee( (size_t)index < policy->young_cset_length(), "invariant" );
+      size_t words_survived = _surviving_young_words[index];
+      cur->record_surv_words_in_group(words_survived);
+    } else {
+      int index = cur->young_index_in_cset();
+      guarantee( index == -1, "invariant" );
+    }
+
+    assert( (cur->is_young() && cur->young_index_in_cset() > -1) ||
+            (!cur->is_young() && cur->young_index_in_cset() == -1),
+            "invariant" );
+
+    if (!cur->evacuation_failed()) {
+      // And the region is empty.
+      assert(!cur->is_empty(),
+             "Should not have empty regions in a CS.");
+      free_region(cur);
+    } else {
+      guarantee( !cur->is_scan_only(), "should not be scan only" );
+      cur->uninstall_surv_rate_group();
+      if (cur->is_young())
+        cur->set_young_index_in_cset(-1);
+      cur->set_not_young();
+      cur->set_evacuation_failed(false);
+    }
+    cur = next;
+  }
+
+  policy->record_max_rs_lengths(rs_lengths);
+  policy->cset_regions_freed();
+
+  double end_sec = os::elapsedTime();
+  double elapsed_ms = (end_sec - start_sec) * 1000.0;
+  if (non_young)
+    non_young_time_ms += elapsed_ms;
+  else
+    young_time_ms += elapsed_ms;
+
+  policy->record_young_free_cset_time_ms(young_time_ms);
+  policy->record_non_young_free_cset_time_ms(non_young_time_ms);
+}
+
+HeapRegion*
+G1CollectedHeap::alloc_region_from_unclean_list_locked(bool zero_filled) {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  HeapRegion* res = pop_unclean_region_list_locked();
+  if (res != NULL) {
+    assert(!res->continuesHumongous() &&
+           res->zero_fill_state() != HeapRegion::Allocated,
+           "Only free regions on unclean list.");
+    if (zero_filled) {
+      res->ensure_zero_filled_locked();
+      res->set_zero_fill_allocated();
+    }
+  }
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::alloc_region_from_unclean_list(bool zero_filled) {
+  MutexLockerEx zx(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return alloc_region_from_unclean_list_locked(zero_filled);
+}
+
+void G1CollectedHeap::put_region_on_unclean_list(HeapRegion* r) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  put_region_on_unclean_list_locked(r);
+  if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread.
+}
+
+void G1CollectedHeap::set_unclean_regions_coming(bool b) {
+  MutexLockerEx x(Cleanup_mon);
+  set_unclean_regions_coming_locked(b);
+}
+
+void G1CollectedHeap::set_unclean_regions_coming_locked(bool b) {
+  assert(Cleanup_mon->owned_by_self(), "Precondition");
+  _unclean_regions_coming = b;
+  // Wake up mutator threads that might be waiting for completeCleanup to
+  // finish.
+  if (!b) Cleanup_mon->notify_all();
+}
+
+void G1CollectedHeap::wait_for_cleanup_complete() {
+  MutexLockerEx x(Cleanup_mon);
+  wait_for_cleanup_complete_locked();
+}
+
+void G1CollectedHeap::wait_for_cleanup_complete_locked() {
+  assert(Cleanup_mon->owned_by_self(), "precondition");
+  while (_unclean_regions_coming) {
+    Cleanup_mon->wait();
+  }
+}
+
+void
+G1CollectedHeap::put_region_on_unclean_list_locked(HeapRegion* r) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  _unclean_region_list.insert_before_head(r);
+}
+
+void
+G1CollectedHeap::prepend_region_list_on_unclean_list(UncleanRegionList* list) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  prepend_region_list_on_unclean_list_locked(list);
+  if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread.
+}
+
+void
+G1CollectedHeap::
+prepend_region_list_on_unclean_list_locked(UncleanRegionList* list) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  _unclean_region_list.prepend_list(list);
+}
+
+HeapRegion* G1CollectedHeap::pop_unclean_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  HeapRegion* res = _unclean_region_list.pop();
+  if (res != NULL) {
+    // Inform ZF thread that there's a new unclean head.
+    if (_unclean_region_list.hd() != NULL && should_zf())
+      ZF_mon->notify_all();
+  }
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::peek_unclean_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  return _unclean_region_list.hd();
+}
+
+
+bool G1CollectedHeap::move_cleaned_region_to_free_list_locked() {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  HeapRegion* r = peek_unclean_region_list_locked();
+  if (r != NULL && r->zero_fill_state() == HeapRegion::ZeroFilled) {
+    // Result of below must be equal to "r", since we hold the lock.
+    (void)pop_unclean_region_list_locked();
+    put_free_region_on_list_locked(r);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool G1CollectedHeap::move_cleaned_region_to_free_list() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return move_cleaned_region_to_free_list_locked();
+}
+
+
+void G1CollectedHeap::put_free_region_on_list_locked(HeapRegion* r) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+  assert(r->zero_fill_state() == HeapRegion::ZeroFilled,
+        "Regions on free list must be zero filled");
+  assert(!r->isHumongous(), "Must not be humongous.");
+  assert(r->is_empty(), "Better be empty");
+  assert(!r->is_on_free_list(),
+         "Better not already be on free list");
+  assert(!r->is_on_unclean_list(),
+         "Better not already be on unclean list");
+  r->set_on_free_list(true);
+  r->set_next_on_free_list(_free_region_list);
+  _free_region_list = r;
+  _free_region_list_size++;
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+}
+
+void G1CollectedHeap::put_free_region_on_list(HeapRegion* r) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  put_free_region_on_list_locked(r);
+}
+
+HeapRegion* G1CollectedHeap::pop_free_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+  HeapRegion* res = _free_region_list;
+  if (res != NULL) {
+    _free_region_list = res->next_from_free_list();
+    _free_region_list_size--;
+    res->set_on_free_list(false);
+    res->set_next_on_free_list(NULL);
+    assert(_free_region_list_size == free_region_list_length(), "Inv");
+  }
+  return res;
+}
+
+
+HeapRegion* G1CollectedHeap::alloc_free_region_from_lists(bool zero_filled) {
+  // By self, or on behalf of self.
+  assert(Heap_lock->is_locked(), "Precondition");
+  HeapRegion* res = NULL;
+  bool first = true;
+  while (res == NULL) {
+    if (zero_filled || !first) {
+      MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+      res = pop_free_region_list_locked();
+      if (res != NULL) {
+        assert(!res->zero_fill_is_allocated(),
+               "No allocated regions on free list.");
+        res->set_zero_fill_allocated();
+      } else if (!first) {
+        break;  // We tried both, time to return NULL.
+      }
+    }
+
+    if (res == NULL) {
+      res = alloc_region_from_unclean_list(zero_filled);
+    }
+    assert(res == NULL ||
+           !zero_filled ||
+           res->zero_fill_is_allocated(),
+           "We must have allocated the region we're returning");
+    first = false;
+  }
+  return res;
+}
+
+void G1CollectedHeap::remove_allocated_regions_from_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  {
+    HeapRegion* prev = NULL;
+    HeapRegion* cur = _unclean_region_list.hd();
+    while (cur != NULL) {
+      HeapRegion* next = cur->next_from_unclean_list();
+      if (cur->zero_fill_is_allocated()) {
+        // Remove from the list.
+        if (prev == NULL) {
+          (void)_unclean_region_list.pop();
+        } else {
+          _unclean_region_list.delete_after(prev);
+        }
+        cur->set_on_unclean_list(false);
+        cur->set_next_on_unclean_list(NULL);
+      } else {
+        prev = cur;
+      }
+      cur = next;
+    }
+    assert(_unclean_region_list.sz() == unclean_region_list_length(),
+           "Inv");
+  }
+
+  {
+    HeapRegion* prev = NULL;
+    HeapRegion* cur = _free_region_list;
+    while (cur != NULL) {
+      HeapRegion* next = cur->next_from_free_list();
+      if (cur->zero_fill_is_allocated()) {
+        // Remove from the list.
+        if (prev == NULL) {
+          _free_region_list = cur->next_from_free_list();
+        } else {
+          prev->set_next_on_free_list(cur->next_from_free_list());
+        }
+        cur->set_on_free_list(false);
+        cur->set_next_on_free_list(NULL);
+        _free_region_list_size--;
+      } else {
+        prev = cur;
+      }
+      cur = next;
+    }
+    assert(_free_region_list_size == free_region_list_length(), "Inv");
+  }
+}
+
+bool G1CollectedHeap::verify_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return verify_region_lists_locked();
+}
+
+bool G1CollectedHeap::verify_region_lists_locked() {
+  HeapRegion* unclean = _unclean_region_list.hd();
+  while (unclean != NULL) {
+    guarantee(unclean->is_on_unclean_list(), "Well, it is!");
+    guarantee(!unclean->is_on_free_list(), "Well, it shouldn't be!");
+    guarantee(unclean->zero_fill_state() != HeapRegion::Allocated,
+              "Everything else is possible.");
+    unclean = unclean->next_from_unclean_list();
+  }
+  guarantee(_unclean_region_list.sz() == unclean_region_list_length(), "Inv");
+
+  HeapRegion* free_r = _free_region_list;
+  while (free_r != NULL) {
+    assert(free_r->is_on_free_list(), "Well, it is!");
+    assert(!free_r->is_on_unclean_list(), "Well, it shouldn't be!");
+    switch (free_r->zero_fill_state()) {
+    case HeapRegion::NotZeroFilled:
+    case HeapRegion::ZeroFilling:
+      guarantee(false, "Should not be on free list.");
+      break;
+    default:
+      // Everything else is possible.
+      break;
+    }
+    free_r = free_r->next_from_free_list();
+  }
+  guarantee(_free_region_list_size == free_region_list_length(), "Inv");
+  // If we didn't do an assertion...
+  return true;
+}
+
+size_t G1CollectedHeap::free_region_list_length() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  size_t len = 0;
+  HeapRegion* cur = _free_region_list;
+  while (cur != NULL) {
+    len++;
+    cur = cur->next_from_free_list();
+  }
+  return len;
+}
+
+size_t G1CollectedHeap::unclean_region_list_length() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  return _unclean_region_list.length();
+}
+
+size_t G1CollectedHeap::n_regions() {
+  return _hrs->length();
+}
+
+size_t G1CollectedHeap::max_regions() {
+  return
+    (size_t)align_size_up(g1_reserved_obj_bytes(), HeapRegion::GrainBytes) /
+    HeapRegion::GrainBytes;
+}
+
+size_t G1CollectedHeap::free_regions() {
+  /* Possibly-expensive assert.
+  assert(_free_regions == count_free_regions(),
+         "_free_regions is off.");
+  */
+  return _free_regions;
+}
+
+bool G1CollectedHeap::should_zf() {
+  return _free_region_list_size < (size_t) G1ConcZFMaxRegions;
+}
+
+class RegionCounter: public HeapRegionClosure {
+  size_t _n;
+public:
+  RegionCounter() : _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_empty() && !r->popular()) {
+      assert(!r->isHumongous(), "H regions should not be empty.");
+      _n++;
+    }
+    return false;
+  }
+  int res() { return (int) _n; }
+};
+
+size_t G1CollectedHeap::count_free_regions() {
+  RegionCounter rc;
+  heap_region_iterate(&rc);
+  size_t n = rc.res();
+  if (_cur_alloc_region != NULL && _cur_alloc_region->is_empty())
+    n--;
+  return n;
+}
+
+size_t G1CollectedHeap::count_free_regions_list() {
+  size_t n = 0;
+  size_t o = 0;
+  ZF_mon->lock_without_safepoint_check();
+  HeapRegion* cur = _free_region_list;
+  while (cur != NULL) {
+    cur = cur->next_from_free_list();
+    n++;
+  }
+  size_t m = unclean_region_list_length();
+  ZF_mon->unlock();
+  return n + m;
+}
+
+bool G1CollectedHeap::should_set_young_locked() {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  return  (g1_policy()->in_young_gc_mode() &&
+           g1_policy()->should_add_next_region_to_young_list());
+}
+
+void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  _young_list->push_region(hr);
+  g1_policy()->set_region_short_lived(hr);
+}
+
+class NoYoungRegionsClosure: public HeapRegionClosure {
+private:
+  bool _success;
+public:
+  NoYoungRegionsClosure() : _success(true) { }
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_young()) {
+      gclog_or_tty->print_cr("Region ["PTR_FORMAT", "PTR_FORMAT") tagged as young",
+                             r->bottom(), r->end());
+      _success = false;
+    }
+    return false;
+  }
+  bool success() { return _success; }
+};
+
+bool G1CollectedHeap::check_young_list_empty(bool ignore_scan_only_list,
+                                             bool check_sample) {
+  bool ret = true;
+
+  ret = _young_list->check_list_empty(ignore_scan_only_list, check_sample);
+  if (!ignore_scan_only_list) {
+    NoYoungRegionsClosure closure;
+    heap_region_iterate(&closure);
+    ret = ret && closure.success();
+  }
+
+  return ret;
+}
+
+void G1CollectedHeap::empty_young_list() {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  assert(g1_policy()->in_young_gc_mode(), "should be in young GC mode");
+
+  _young_list->empty_list();
+}
+
+bool G1CollectedHeap::all_alloc_regions_no_allocs_since_save_marks() {
+  bool no_allocs = true;
+  for (int ap = 0; ap < GCAllocPurposeCount && no_allocs; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    no_allocs = r == NULL || r->saved_mark_at_top();
+  }
+  return no_allocs;
+}
+
+void G1CollectedHeap::all_alloc_regions_note_end_of_copying() {
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r != NULL) {
+      // Check for aliases.
+      bool has_processed_alias = false;
+      for (int i = 0; i < ap; ++i) {
+        if (_gc_alloc_regions[i] == r) {
+          has_processed_alias = true;
+          break;
+        }
+      }
+      if (!has_processed_alias) {
+        r->note_end_of_copying();
+        g1_policy()->record_after_bytes(r->used());
+      }
+    }
+  }
+}
+
+
+// Done at the start of full GC.
+void G1CollectedHeap::tear_down_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  while (pop_unclean_region_list_locked() != NULL) ;
+  assert(_unclean_region_list.hd() == NULL && _unclean_region_list.sz() == 0,
+         "Postconditions of loop.")
+  while (pop_free_region_list_locked() != NULL) ;
+  assert(_free_region_list == NULL, "Postcondition of loop.");
+  if (_free_region_list_size != 0) {
+    gclog_or_tty->print_cr("Size is %d.", _free_region_list_size);
+    print();
+  }
+  assert(_free_region_list_size == 0, "Postconditions of loop.");
+}
+
+
+class RegionResetter: public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+public:
+  RegionResetter() : _g1(G1CollectedHeap::heap()), _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->top() > r->bottom()) {
+      if (r->top() < r->end()) {
+        Copy::fill_to_words(r->top(),
+                          pointer_delta(r->end(), r->top()));
+      }
+      r->set_zero_fill_allocated();
+    } else {
+      assert(r->is_empty(), "tautology");
+      if (r->popular()) {
+        if (r->zero_fill_state() != HeapRegion::Allocated) {
+          r->ensure_zero_filled_locked();
+          r->set_zero_fill_allocated();
+        }
+      } else {
+        _n++;
+        switch (r->zero_fill_state()) {
+        case HeapRegion::NotZeroFilled:
+        case HeapRegion::ZeroFilling:
+          _g1->put_region_on_unclean_list_locked(r);
+          break;
+        case HeapRegion::Allocated:
+          r->set_zero_fill_complete();
+          // no break; go on to put on free list.
+        case HeapRegion::ZeroFilled:
+          _g1->put_free_region_on_list_locked(r);
+          break;
+        }
+      }
+    }
+    return false;
+  }
+
+  int getFreeRegionCount() {return _n;}
+};
+
+// Done at the end of full GC.
+void G1CollectedHeap::rebuild_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  // This needs to go at the end of the full GC.
+  RegionResetter rs;
+  heap_region_iterate(&rs);
+  _free_regions = rs.getFreeRegionCount();
+  // Tell the ZF thread it may have work to do.
+  if (should_zf()) ZF_mon->notify_all();
+}
+
+class UsedRegionsNeedZeroFillSetter: public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+public:
+  UsedRegionsNeedZeroFillSetter() : _g1(G1CollectedHeap::heap()), _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->top() > r->bottom()) {
+      // There are assertions in "set_zero_fill_needed()" below that
+      // require top() == bottom(), so this is technically illegal.
+      // We'll skirt the law here, by making that true temporarily.
+      DEBUG_ONLY(HeapWord* save_top = r->top();
+                 r->set_top(r->bottom()));
+      r->set_zero_fill_needed();
+      DEBUG_ONLY(r->set_top(save_top));
+    }
+    return false;
+  }
+};
+
+// Done at the start of full GC.
+void G1CollectedHeap::set_used_regions_to_need_zero_fill() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  // This needs to go at the end of the full GC.
+  UsedRegionsNeedZeroFillSetter rs;
+  heap_region_iterate(&rs);
+}
+
+class CountObjClosure: public ObjectClosure {
+  size_t _n;
+public:
+  CountObjClosure() : _n(0) {}
+  void do_object(oop obj) { _n++; }
+  size_t n() { return _n; }
+};
+
+size_t G1CollectedHeap::pop_object_used_objs() {
+  size_t sum_objs = 0;
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    CountObjClosure cl;
+    _hrs->at(i)->object_iterate(&cl);
+    sum_objs += cl.n();
+  }
+  return sum_objs;
+}
+
+size_t G1CollectedHeap::pop_object_used_bytes() {
+  size_t sum_bytes = 0;
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    sum_bytes += _hrs->at(i)->used();
+  }
+  return sum_bytes;
+}
+
+
+static int nq = 0;
+
+HeapWord* G1CollectedHeap::allocate_popular_object(size_t word_size) {
+  while (_cur_pop_hr_index < G1NumPopularRegions) {
+    HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index);
+    HeapWord* res = cur_pop_region->allocate(word_size);
+    if (res != NULL) {
+      // We account for popular objs directly in the used summary:
+      _summary_bytes_used += (word_size * HeapWordSize);
+      return res;
+    }
+    // Otherwise, try the next region (first making sure that we remember
+    // the last "top" value as the "next_top_at_mark_start", so that
+    // objects made popular during markings aren't automatically considered
+    // live).
+    cur_pop_region->note_end_of_copying();
+    // Otherwise, try the next region.
+    _cur_pop_hr_index++;
+  }
+  // XXX: For now !!!
+  vm_exit_out_of_memory(word_size,
+                        "Not enough pop obj space (To Be Fixed)");
+  return NULL;
+}
+
+class HeapRegionList: public CHeapObj {
+  public:
+  HeapRegion* hr;
+  HeapRegionList* next;
+};
+
+void G1CollectedHeap::schedule_popular_region_evac(HeapRegion* r) {
+  // This might happen during parallel GC, so protect by this lock.
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  // We don't schedule regions whose evacuations are already pending, or
+  // are already being evacuated.
+  if (!r->popular_pending() && !r->in_collection_set()) {
+    r->set_popular_pending(true);
+    if (G1TracePopularity) {
+      gclog_or_tty->print_cr("Scheduling region "PTR_FORMAT" "
+                             "["PTR_FORMAT", "PTR_FORMAT") for pop-object evacuation.",
+                             r, r->bottom(), r->end());
+    }
+    HeapRegionList* hrl = new HeapRegionList;
+    hrl->hr = r;
+    hrl->next = _popular_regions_to_be_evacuated;
+    _popular_regions_to_be_evacuated = hrl;
+  }
+}
+
+HeapRegion* G1CollectedHeap::popular_region_to_evac() {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  HeapRegion* res = NULL;
+  while (_popular_regions_to_be_evacuated != NULL && res == NULL) {
+    HeapRegionList* hrl = _popular_regions_to_be_evacuated;
+    _popular_regions_to_be_evacuated = hrl->next;
+    res = hrl->hr;
+    // The G1RSPopLimit may have increased, so recheck here...
+    if (res->rem_set()->occupied() < (size_t) G1RSPopLimit) {
+      // Hah: don't need to schedule.
+      if (G1TracePopularity) {
+        gclog_or_tty->print_cr("Unscheduling region "PTR_FORMAT" "
+                               "["PTR_FORMAT", "PTR_FORMAT") "
+                               "for pop-object evacuation (size %d < limit %d)",
+                               res, res->bottom(), res->end(),
+                               res->rem_set()->occupied(), G1RSPopLimit);
+      }
+      res->set_popular_pending(false);
+      res = NULL;
+    }
+    // We do not reset res->popular() here; if we did so, it would allow
+    // the region to be "rescheduled" for popularity evacuation.  Instead,
+    // this is done in the collection pause, with the world stopped.
+    // So the invariant is that the regions in the list have the popularity
+    // boolean set, but having the boolean set does not imply membership
+    // on the list (though there can at most one such pop-pending region
+    // not on the list at any time).
+    delete hrl;
+  }
+  return res;
+}
+
+void G1CollectedHeap::evac_popular_region(HeapRegion* hr) {
+  while (true) {
+    // Don't want to do a GC pause while cleanup is being completed!
+    wait_for_cleanup_complete();
+
+    // Read the GC count while holding the Heap_lock
+    int gc_count_before = SharedHeap::heap()->total_collections();
+    g1_policy()->record_stop_world_start();
+
+    {
+      MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+      VM_G1PopRegionCollectionPause op(gc_count_before, hr);
+      VMThread::execute(&op);
+
+      // If the prolog succeeded, we didn't do a GC for this.
+      if (op.prologue_succeeded()) break;
+    }
+    // Otherwise we didn't.  We should recheck the size, though, since
+    // the limit may have increased...
+    if (hr->rem_set()->occupied() < (size_t) G1RSPopLimit) {
+      hr->set_popular_pending(false);
+      break;
+    }
+  }
+}
+
+void G1CollectedHeap::atomic_inc_obj_rc(oop obj) {
+  Atomic::inc(obj_rc_addr(obj));
+}
+
+class CountRCClosure: public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  bool _parallel;
+public:
+  CountRCClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _parallel(ParallelGCThreads > 0)
+  {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    assert(obj != NULL, "Precondition.");
+    if (_parallel) {
+      // We go sticky at the limit to avoid excess contention.
+      // If we want to track the actual RC's further, we'll need to keep a
+      // per-thread hash table or something for the popular objects.
+      if (_g1h->obj_rc(obj) < G1ObjPopLimit) {
+        _g1h->atomic_inc_obj_rc(obj);
+      }
+    } else {
+      _g1h->inc_obj_rc(obj);
+    }
+  }
+};
+
+class EvacPopObjClosure: public ObjectClosure {
+  G1CollectedHeap* _g1h;
+  size_t _pop_objs;
+  size_t _max_rc;
+public:
+  EvacPopObjClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _pop_objs(0), _max_rc(0) {}
+
+  void do_object(oop obj) {
+    size_t rc = _g1h->obj_rc(obj);
+    _max_rc = MAX2(rc, _max_rc);
+    if (rc >= (size_t) G1ObjPopLimit) {
+      _g1h->_pop_obj_rc_at_copy.add((double)rc);
+      size_t word_sz = obj->size();
+      HeapWord* new_pop_loc = _g1h->allocate_popular_object(word_sz);
+      oop new_pop_obj = (oop)new_pop_loc;
+      Copy::aligned_disjoint_words((HeapWord*)obj, new_pop_loc, word_sz);
+      obj->forward_to(new_pop_obj);
+      G1ScanAndBalanceClosure scan_and_balance(_g1h);
+      new_pop_obj->oop_iterate_backwards(&scan_and_balance);
+      // preserve "next" mark bit if marking is in progress.
+      if (_g1h->mark_in_progress() && !_g1h->is_obj_ill(obj)) {
+        _g1h->concurrent_mark()->markAndGrayObjectIfNecessary(new_pop_obj);
+      }
+
+      if (G1TracePopularity) {
+        gclog_or_tty->print_cr("Found obj " PTR_FORMAT " of word size " SIZE_FORMAT
+                               " pop (%d), move to " PTR_FORMAT,
+                               (void*) obj, word_sz,
+                               _g1h->obj_rc(obj), (void*) new_pop_obj);
+      }
+      _pop_objs++;
+    }
+  }
+  size_t pop_objs() { return _pop_objs; }
+  size_t max_rc() { return _max_rc; }
+};
+
+class G1ParCountRCTask : public AbstractGangTask {
+  G1CollectedHeap* _g1h;
+  BitMap _bm;
+
+  size_t getNCards() {
+    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
+      / G1BlockOffsetSharedArray::N_bytes;
+  }
+  CountRCClosure _count_rc_closure;
+public:
+  G1ParCountRCTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Par RC Count task"),
+    _g1h(g1h), _bm(getNCards()), _count_rc_closure(g1h)
+  {}
+
+  void work(int i) {
+    ResourceMark rm;
+    HandleMark   hm;
+    _g1h->g1_rem_set()->oops_into_collection_set_do(&_count_rc_closure, i);
+  }
+};
+
+void G1CollectedHeap::popularity_pause_preamble(HeapRegion* popular_region) {
+  // We're evacuating a single region (for popularity).
+  if (G1TracePopularity) {
+    gclog_or_tty->print_cr("Doing pop region pause for ["PTR_FORMAT", "PTR_FORMAT")",
+                           popular_region->bottom(), popular_region->end());
+  }
+  g1_policy()->set_single_region_collection_set(popular_region);
+  size_t max_rc;
+  if (!compute_reference_counts_and_evac_popular(popular_region,
+                                                 &max_rc)) {
+    // We didn't evacuate any popular objects.
+    // We increase the RS popularity limit, to prevent this from
+    // happening in the future.
+    if (G1RSPopLimit < (1 << 30)) {
+      G1RSPopLimit *= 2;
+    }
+    // For now, interesting enough for a message:
+#if 1
+    gclog_or_tty->print_cr("In pop region pause for ["PTR_FORMAT", "PTR_FORMAT"), "
+                           "failed to find a pop object (max = %d).",
+                           popular_region->bottom(), popular_region->end(),
+                           max_rc);
+    gclog_or_tty->print_cr("Increased G1RSPopLimit to %d.", G1RSPopLimit);
+#endif // 0
+    // Also, we reset the collection set to NULL, to make the rest of
+    // the collection do nothing.
+    assert(popular_region->next_in_collection_set() == NULL,
+           "should be single-region.");
+    popular_region->set_in_collection_set(false);
+    popular_region->set_popular_pending(false);
+    g1_policy()->clear_collection_set();
+  }
+}
+
+bool G1CollectedHeap::
+compute_reference_counts_and_evac_popular(HeapRegion* popular_region,
+                                          size_t* max_rc) {
+  HeapWord* rc_region_bot;
+  HeapWord* rc_region_end;
+
+  // Set up the reference count region.
+  HeapRegion* rc_region = newAllocRegion(HeapRegion::GrainWords);
+  if (rc_region != NULL) {
+    rc_region_bot = rc_region->bottom();
+    rc_region_end = rc_region->end();
+  } else {
+    rc_region_bot = NEW_C_HEAP_ARRAY(HeapWord, HeapRegion::GrainWords);
+    if (rc_region_bot == NULL) {
+      vm_exit_out_of_memory(HeapRegion::GrainWords,
+                            "No space for RC region.");
+    }
+    rc_region_end = rc_region_bot + HeapRegion::GrainWords;
+  }
+
+  if (G1TracePopularity)
+    gclog_or_tty->print_cr("RC region is ["PTR_FORMAT", "PTR_FORMAT")",
+                           rc_region_bot, rc_region_end);
+  if (rc_region_bot > popular_region->bottom()) {
+    _rc_region_above = true;
+    _rc_region_diff =
+      pointer_delta(rc_region_bot, popular_region->bottom(), 1);
+  } else {
+    assert(rc_region_bot < popular_region->bottom(), "Can't be equal.");
+    _rc_region_above = false;
+    _rc_region_diff =
+      pointer_delta(popular_region->bottom(), rc_region_bot, 1);
+  }
+  g1_policy()->record_pop_compute_rc_start();
+  // Count external references.
+  g1_rem_set()->prepare_for_oops_into_collection_set_do();
+  if (ParallelGCThreads > 0) {
+
+    set_par_threads(workers()->total_workers());
+    G1ParCountRCTask par_count_rc_task(this);
+    workers()->run_task(&par_count_rc_task);
+    set_par_threads(0);
+
+  } else {
+    CountRCClosure count_rc_closure(this);
+    g1_rem_set()->oops_into_collection_set_do(&count_rc_closure, 0);
+  }
+  g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+  g1_policy()->record_pop_compute_rc_end();
+
+  // Now evacuate popular objects.
+  g1_policy()->record_pop_evac_start();
+  EvacPopObjClosure evac_pop_obj_cl(this);
+  popular_region->object_iterate(&evac_pop_obj_cl);
+  *max_rc = evac_pop_obj_cl.max_rc();
+
+  // Make sure the last "top" value of the current popular region is copied
+  // as the "next_top_at_mark_start", so that objects made popular during
+  // markings aren't automatically considered live.
+  HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index);
+  cur_pop_region->note_end_of_copying();
+
+  if (rc_region != NULL) {
+    free_region(rc_region);
+  } else {
+    FREE_C_HEAP_ARRAY(HeapWord, rc_region_bot);
+  }
+  g1_policy()->record_pop_evac_end();
+
+  return evac_pop_obj_cl.pop_objs() > 0;
+}
+
+class CountPopObjInfoClosure: public HeapRegionClosure {
+  size_t _objs;
+  size_t _bytes;
+
+  class CountObjClosure: public ObjectClosure {
+    int _n;
+  public:
+    CountObjClosure() : _n(0) {}
+    void do_object(oop obj) { _n++; }
+    size_t n() { return _n; }
+  };
+
+public:
+  CountPopObjInfoClosure() : _objs(0), _bytes(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _bytes += r->used();
+    CountObjClosure blk;
+    r->object_iterate(&blk);
+    _objs += blk.n();
+    return false;
+  }
+  size_t objs() { return _objs; }
+  size_t bytes() { return _bytes; }
+};
+
+
+void G1CollectedHeap::print_popularity_summary_info() const {
+  CountPopObjInfoClosure blk;
+  for (int i = 0; i <= _cur_pop_hr_index; i++) {
+    blk.doHeapRegion(_hrs->at(i));
+  }
+  gclog_or_tty->print_cr("\nPopular objects: %d objs, %d bytes.",
+                         blk.objs(), blk.bytes());
+  gclog_or_tty->print_cr("   RC at copy = [avg = %5.2f, max = %5.2f, sd = %5.2f].",
+                _pop_obj_rc_at_copy.avg(),
+                _pop_obj_rc_at_copy.maximum(),
+                _pop_obj_rc_at_copy.sd());
+}
+
+void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) {
+  _refine_cte_cl->set_concurrent(concurrent);
+}
+
+#ifndef PRODUCT
+
+class PrintHeapRegionClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion *r) {
+    gclog_or_tty->print("Region: "PTR_FORMAT":", r);
+    if (r != NULL) {
+      if (r->is_on_free_list())
+        gclog_or_tty->print("Free ");
+      if (r->is_young())
+        gclog_or_tty->print("Young ");
+      if (r->isHumongous())
+        gclog_or_tty->print("Is Humongous ");
+      r->print();
+    }
+    return false;
+  }
+};
+
+class SortHeapRegionClosure : public HeapRegionClosure {
+  size_t young_regions,free_regions, unclean_regions;
+  size_t hum_regions, count;
+  size_t unaccounted, cur_unclean, cur_alloc;
+  size_t total_free;
+  HeapRegion* cur;
+public:
+  SortHeapRegionClosure(HeapRegion *_cur) : cur(_cur), young_regions(0),
+    free_regions(0), unclean_regions(0),
+    hum_regions(0),
+    count(0), unaccounted(0),
+    cur_alloc(0), total_free(0)
+  {}
+  bool doHeapRegion(HeapRegion *r) {
+    count++;
+    if (r->is_on_free_list()) free_regions++;
+    else if (r->is_on_unclean_list()) unclean_regions++;
+    else if (r->isHumongous())  hum_regions++;
+    else if (r->is_young()) young_regions++;
+    else if (r == cur) cur_alloc++;
+    else unaccounted++;
+    return false;
+  }
+  void print() {
+    total_free = free_regions + unclean_regions;
+    gclog_or_tty->print("%d regions\n", count);
+    gclog_or_tty->print("%d free: free_list = %d unclean = %d\n",
+                        total_free, free_regions, unclean_regions);
+    gclog_or_tty->print("%d humongous %d young\n",
+                        hum_regions, young_regions);
+    gclog_or_tty->print("%d cur_alloc\n", cur_alloc);
+    gclog_or_tty->print("UHOH unaccounted = %d\n", unaccounted);
+  }
+};
+
+void G1CollectedHeap::print_region_counts() {
+  SortHeapRegionClosure sc(_cur_alloc_region);
+  PrintHeapRegionClosure cl;
+  heap_region_iterate(&cl);
+  heap_region_iterate(&sc);
+  sc.print();
+  print_region_accounting_info();
+};
+
+bool G1CollectedHeap::regions_accounted_for() {
+  // TODO: regions accounting for young/survivor/tenured
+  return true;
+}
+
+bool G1CollectedHeap::print_region_accounting_info() {
+  gclog_or_tty->print_cr("P regions: %d.", G1NumPopularRegions);
+  gclog_or_tty->print_cr("Free regions: %d (count: %d count list %d) (clean: %d unclean: %d).",
+                         free_regions(),
+                         count_free_regions(), count_free_regions_list(),
+                         _free_region_list_size, _unclean_region_list.sz());
+  gclog_or_tty->print_cr("cur_alloc: %d.",
+                         (_cur_alloc_region == NULL ? 0 : 1));
+  gclog_or_tty->print_cr("H regions: %d.", _num_humongous_regions);
+
+  // TODO: check regions accounting for young/survivor/tenured
+  return true;
+}
+
+bool G1CollectedHeap::is_in_closed_subset(const void* p) const {
+  HeapRegion* hr = heap_region_containing(p);
+  if (hr == NULL) {
+    return is_in_permanent(p);
+  } else {
+    return hr->is_in(p);
+  }
+}
+#endif // PRODUCT
+
+void G1CollectedHeap::g1_unimplemented() {
+  // Unimplemented();
+}
+
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,1203 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A "G1CollectedHeap" is an implementation of a java heap for HotSpot.
+// It uses the "Garbage First" heap organization and algorithm, which
+// may combine concurrent marking with parallel, incremental compaction of
+// heap subsets that will yield large amounts of garbage.
+
+class HeapRegion;
+class HeapRegionSeq;
+class HeapRegionList;
+class PermanentGenerationSpec;
+class GenerationSpec;
+class OopsInHeapRegionClosure;
+class G1ScanHeapEvacClosure;
+class ObjectClosure;
+class SpaceClosure;
+class CompactibleSpaceClosure;
+class Space;
+class G1CollectorPolicy;
+class GenRemSet;
+class G1RemSet;
+class HeapRegionRemSetIterator;
+class ConcurrentMark;
+class ConcurrentMarkThread;
+class ConcurrentG1Refine;
+class ConcurrentZFThread;
+
+// If want to accumulate detailed statistics on work queues
+// turn this on.
+#define G1_DETAILED_STATS 0
+
+#if G1_DETAILED_STATS
+#  define IF_G1_DETAILED_STATS(code) code
+#else
+#  define IF_G1_DETAILED_STATS(code)
+#endif
+
+typedef GenericTaskQueue<oop*>    RefToScanQueue;
+typedef GenericTaskQueueSet<oop*> RefToScanQueueSet;
+
+enum G1GCThreadGroups {
+  G1CRGroup = 0,
+  G1ZFGroup = 1,
+  G1CMGroup = 2,
+  G1CLGroup = 3
+};
+
+enum GCAllocPurpose {
+  GCAllocForTenured,
+  GCAllocForSurvived,
+  GCAllocPurposeCount
+};
+
+class YoungList : public CHeapObj {
+private:
+  G1CollectedHeap* _g1h;
+
+  HeapRegion* _head;
+
+  HeapRegion* _scan_only_head;
+  HeapRegion* _scan_only_tail;
+  size_t      _length;
+  size_t      _scan_only_length;
+
+  size_t      _last_sampled_rs_lengths;
+  size_t      _sampled_rs_lengths;
+  HeapRegion* _curr;
+  HeapRegion* _curr_scan_only;
+
+  HeapRegion* _survivor_head;
+  HeapRegion* _survivors_tail;
+  size_t      _survivor_length;
+
+  void          empty_list(HeapRegion* list);
+
+public:
+  YoungList(G1CollectedHeap* g1h);
+
+  void          push_region(HeapRegion* hr);
+  void          add_survivor_region(HeapRegion* hr);
+  HeapRegion*   pop_region();
+  void          empty_list();
+  bool          is_empty() { return _length == 0; }
+  size_t        length() { return _length; }
+  size_t        scan_only_length() { return _scan_only_length; }
+
+  void rs_length_sampling_init();
+  bool rs_length_sampling_more();
+  void rs_length_sampling_next();
+
+  void reset_sampled_info() {
+    _last_sampled_rs_lengths =   0;
+  }
+  size_t sampled_rs_lengths() { return _last_sampled_rs_lengths; }
+
+  // for development purposes
+  void reset_auxilary_lists();
+  HeapRegion* first_region() { return _head; }
+  HeapRegion* first_scan_only_region() { return _scan_only_head; }
+  HeapRegion* first_survivor_region() { return _survivor_head; }
+  HeapRegion* par_get_next_scan_only_region() {
+    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+    HeapRegion* ret = _curr_scan_only;
+    if (ret != NULL)
+      _curr_scan_only = ret->get_next_young_region();
+    return ret;
+  }
+
+  // debugging
+  bool          check_list_well_formed();
+  bool          check_list_empty(bool ignore_scan_only_list,
+                                 bool check_sample = true);
+  void          print();
+};
+
+class RefineCardTableEntryClosure;
+class G1CollectedHeap : public SharedHeap {
+  friend class VM_G1CollectForAllocation;
+  friend class VM_GenCollectForPermanentAllocation;
+  friend class VM_G1CollectFull;
+  friend class VM_G1IncCollectionPause;
+  friend class VM_G1PopRegionCollectionPause;
+  friend class VMStructs;
+
+  // Closures used in implementation.
+  friend class G1ParCopyHelper;
+  friend class G1IsAliveClosure;
+  friend class G1EvacuateFollowersClosure;
+  friend class G1ParScanThreadState;
+  friend class G1ParScanClosureSuper;
+  friend class G1ParEvacuateFollowersClosure;
+  friend class G1ParTask;
+  friend class G1FreeGarbageRegionClosure;
+  friend class RefineCardTableEntryClosure;
+  friend class G1PrepareCompactClosure;
+  friend class RegionSorter;
+  friend class CountRCClosure;
+  friend class EvacPopObjClosure;
+
+  // Other related classes.
+  friend class G1MarkSweep;
+
+private:
+  enum SomePrivateConstants {
+    VeryLargeInBytes = HeapRegion::GrainBytes/2,
+    VeryLargeInWords = VeryLargeInBytes/HeapWordSize,
+    MinHeapDeltaBytes = 10 * HeapRegion::GrainBytes,      // FIXME
+    NumAPIs = HeapRegion::MaxAge
+  };
+
+
+  // The one and only G1CollectedHeap, so static functions can find it.
+  static G1CollectedHeap* _g1h;
+
+  // Storage for the G1 heap (excludes the permanent generation).
+  VirtualSpace _g1_storage;
+  MemRegion    _g1_reserved;
+
+  // The part of _g1_storage that is currently committed.
+  MemRegion _g1_committed;
+
+  // The maximum part of _g1_storage that has ever been committed.
+  MemRegion _g1_max_committed;
+
+  // The number of regions that are completely free.
+  size_t _free_regions;
+
+  // The number of regions we could create by expansion.
+  size_t _expansion_regions;
+
+  // Return the number of free regions in the heap (by direct counting.)
+  size_t count_free_regions();
+  // Return the number of free regions on the free and unclean lists.
+  size_t count_free_regions_list();
+
+  // The block offset table for the G1 heap.
+  G1BlockOffsetSharedArray* _bot_shared;
+
+  // Move all of the regions off the free lists, then rebuild those free
+  // lists, before and after full GC.
+  void tear_down_region_lists();
+  void rebuild_region_lists();
+  // This sets all non-empty regions to need zero-fill (which they will if
+  // they are empty after full collection.)
+  void set_used_regions_to_need_zero_fill();
+
+  // The sequence of all heap regions in the heap.
+  HeapRegionSeq* _hrs;
+
+  // The region from which normal-sized objects are currently being
+  // allocated.  May be NULL.
+  HeapRegion* _cur_alloc_region;
+
+  // Postcondition: cur_alloc_region == NULL.
+  void abandon_cur_alloc_region();
+
+  // The to-space memory regions into which objects are being copied during
+  // a GC.
+  HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
+  uint _gc_alloc_region_counts[GCAllocPurposeCount];
+
+  // A list of the regions that have been set to be alloc regions in the
+  // current collection.
+  HeapRegion* _gc_alloc_region_list;
+
+  // When called by par thread, require par_alloc_during_gc_lock() to be held.
+  void push_gc_alloc_region(HeapRegion* hr);
+
+  // This should only be called single-threaded.  Undeclares all GC alloc
+  // regions.
+  void forget_alloc_region_list();
+
+  // Should be used to set an alloc region, because there's other
+  // associated bookkeeping.
+  void set_gc_alloc_region(int purpose, HeapRegion* r);
+
+  // Check well-formedness of alloc region list.
+  bool check_gc_alloc_regions();
+
+  // Outside of GC pauses, the number of bytes used in all regions other
+  // than the current allocation region.
+  size_t _summary_bytes_used;
+
+  // Summary information about popular objects; method to print it.
+  NumberSeq _pop_obj_rc_at_copy;
+  void print_popularity_summary_info() const;
+
+  volatile unsigned _gc_time_stamp;
+
+  size_t* _surviving_young_words;
+
+  void setup_surviving_young_words();
+  void update_surviving_young_words(size_t* surv_young_words);
+  void cleanup_surviving_young_words();
+
+protected:
+
+  // Returns "true" iff none of the gc alloc regions have any allocations
+  // since the last call to "save_marks".
+  bool all_alloc_regions_no_allocs_since_save_marks();
+  // Calls "note_end_of_copying on all gc alloc_regions.
+  void all_alloc_regions_note_end_of_copying();
+
+  // The number of regions allocated to hold humongous objects.
+  int         _num_humongous_regions;
+  YoungList*  _young_list;
+
+  // The current policy object for the collector.
+  G1CollectorPolicy* _g1_policy;
+
+  // Parallel allocation lock to protect the current allocation region.
+  Mutex  _par_alloc_during_gc_lock;
+  Mutex* par_alloc_during_gc_lock() { return &_par_alloc_during_gc_lock; }
+
+  // If possible/desirable, allocate a new HeapRegion for normal object
+  // allocation sufficient for an allocation of the given "word_size".
+  // If "do_expand" is true, will attempt to expand the heap if necessary
+  // to to satisfy the request.  If "zero_filled" is true, requires a
+  // zero-filled region.
+  // (Returning NULL will trigger a GC.)
+  virtual HeapRegion* newAllocRegion_work(size_t word_size,
+                                          bool do_expand,
+                                          bool zero_filled);
+
+  virtual HeapRegion* newAllocRegion(size_t word_size,
+                                     bool zero_filled = true) {
+    return newAllocRegion_work(word_size, false, zero_filled);
+  }
+  virtual HeapRegion* newAllocRegionWithExpansion(int purpose,
+                                                  size_t word_size,
+                                                  bool zero_filled = true);
+
+  // Attempt to allocate an object of the given (very large) "word_size".
+  // Returns "NULL" on failure.
+  virtual HeapWord* humongousObjAllocate(size_t word_size);
+
+  // If possible, allocate a block of the given word_size, else return "NULL".
+  // Returning NULL will trigger GC or heap expansion.
+  // These two methods have rather awkward pre- and
+  // post-conditions. If they are called outside a safepoint, then
+  // they assume that the caller is holding the heap lock. Upon return
+  // they release the heap lock, if they are returning a non-NULL
+  // value. attempt_allocation_slow() also dirties the cards of a
+  // newly-allocated young region after it releases the heap
+  // lock. This change in interface was the neatest way to achieve
+  // this card dirtying without affecting mem_allocate(), which is a
+  // more frequently called method. We tried two or three different
+  // approaches, but they were even more hacky.
+  HeapWord* attempt_allocation(size_t word_size,
+                               bool permit_collection_pause = true);
+
+  HeapWord* attempt_allocation_slow(size_t word_size,
+                                    bool permit_collection_pause = true);
+
+  // Allocate blocks during garbage collection. Will ensure an
+  // allocation region, either by picking one or expanding the
+  // heap, and then allocate a block of the given size. The block
+  // may not be a humongous - it must fit into a single heap region.
+  HeapWord* allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
+  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
+
+  HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
+                                    HeapRegion*    alloc_region,
+                                    bool           par,
+                                    size_t         word_size);
+
+  // Ensure that no further allocations can happen in "r", bearing in mind
+  // that parallel threads might be attempting allocations.
+  void par_allocate_remaining_space(HeapRegion* r);
+
+  // Helper function for two callbacks below.
+  // "full", if true, indicates that the GC is for a System.gc() request,
+  // and should collect the entire heap.  If "clear_all_soft_refs" is true,
+  // all soft references are cleared during the GC.  If "full" is false,
+  // "word_size" describes the allocation that the GC should
+  // attempt (at least) to satisfy.
+  void do_collection(bool full, bool clear_all_soft_refs,
+                     size_t word_size);
+
+  // Callback from VM_G1CollectFull operation.
+  // Perform a full collection.
+  void do_full_collection(bool clear_all_soft_refs);
+
+  // Resize the heap if necessary after a full collection.  If this is
+  // after a collect-for allocation, "word_size" is the allocation size,
+  // and will be considered part of the used portion of the heap.
+  void resize_if_necessary_after_full_collection(size_t word_size);
+
+  // Callback from VM_G1CollectForAllocation operation.
+  // This function does everything necessary/possible to satisfy a
+  // failed allocation request (including collection, expansion, etc.)
+  HeapWord* satisfy_failed_allocation(size_t word_size);
+
+  // Attempting to expand the heap sufficiently
+  // to support an allocation of the given "word_size".  If
+  // successful, perform the allocation and return the address of the
+  // allocated block, or else "NULL".
+  virtual HeapWord* expand_and_allocate(size_t word_size);
+
+public:
+  // Expand the garbage-first heap by at least the given size (in bytes!).
+  // (Rounds up to a HeapRegion boundary.)
+  virtual void expand(size_t expand_bytes);
+
+  // Do anything common to GC's.
+  virtual void gc_prologue(bool full);
+  virtual void gc_epilogue(bool full);
+
+protected:
+
+  // Shrink the garbage-first heap by at most the given size (in bytes!).
+  // (Rounds down to a HeapRegion boundary.)
+  virtual void shrink(size_t expand_bytes);
+  void shrink_helper(size_t expand_bytes);
+
+  // Do an incremental collection: identify a collection set, and evacuate
+  // its live objects elsewhere.
+  virtual void do_collection_pause();
+
+  // The guts of the incremental collection pause, executed by the vm
+  // thread.  If "popular_region" is non-NULL, this pause should evacuate
+  // this single region whose remembered set has gotten large, moving
+  // any popular objects to one of the popular regions.
+  virtual void do_collection_pause_at_safepoint(HeapRegion* popular_region);
+
+  // Actually do the work of evacuating the collection set.
+  virtual void evacuate_collection_set();
+
+  // If this is an appropriate right time, do a collection pause.
+  // The "word_size" argument, if non-zero, indicates the size of an
+  // allocation request that is prompting this query.
+  void do_collection_pause_if_appropriate(size_t word_size);
+
+  // The g1 remembered set of the heap.
+  G1RemSet* _g1_rem_set;
+  // And it's mod ref barrier set, used to track updates for the above.
+  ModRefBarrierSet* _mr_bs;
+
+  // The Heap Region Rem Set Iterator.
+  HeapRegionRemSetIterator** _rem_set_iterator;
+
+  // The closure used to refine a single card.
+  RefineCardTableEntryClosure* _refine_cte_cl;
+
+  // A function to check the consistency of dirty card logs.
+  void check_ct_logs_at_safepoint();
+
+  // After a collection pause, make the regions in the CS into free
+  // regions.
+  void free_collection_set(HeapRegion* cs_head);
+
+  // Applies "scan_non_heap_roots" to roots outside the heap,
+  // "scan_rs" to roots inside the heap (having done "set_region" to
+  // indicate the region in which the root resides), and does "scan_perm"
+  // (setting the generation to the perm generation.)  If "scan_rs" is
+  // NULL, then this step is skipped.  The "worker_i"
+  // param is for use with parallel roots processing, and should be
+  // the "i" of the calling parallel worker thread's work(i) function.
+  // In the sequential case this param will be ignored.
+  void g1_process_strong_roots(bool collecting_perm_gen,
+                               SharedHeap::ScanningOption so,
+                               OopClosure* scan_non_heap_roots,
+                               OopsInHeapRegionClosure* scan_rs,
+                               OopsInHeapRegionClosure* scan_so,
+                               OopsInGenClosure* scan_perm,
+                               int worker_i);
+
+  void scan_scan_only_set(OopsInHeapRegionClosure* oc,
+                          int worker_i);
+  void scan_scan_only_region(HeapRegion* hr,
+                             OopsInHeapRegionClosure* oc,
+                             int worker_i);
+
+  // Apply "blk" to all the weak roots of the system.  These include
+  // JNI weak roots, the code cache, system dictionary, symbol table,
+  // string table, and referents of reachable weak refs.
+  void g1_process_weak_roots(OopClosure* root_closure,
+                             OopClosure* non_root_closure);
+
+  // Invoke "save_marks" on all heap regions.
+  void save_marks();
+
+  // Free a heap region.
+  void free_region(HeapRegion* hr);
+  // A component of "free_region", exposed for 'batching'.
+  // All the params after "hr" are out params: the used bytes of the freed
+  // region(s), the number of H regions cleared, the number of regions
+  // freed, and pointers to the head and tail of a list of freed contig
+  // regions, linked throught the "next_on_unclean_list" field.
+  void free_region_work(HeapRegion* hr,
+                        size_t& pre_used,
+                        size_t& cleared_h,
+                        size_t& freed_regions,
+                        UncleanRegionList* list,
+                        bool par = false);
+
+
+  // The concurrent marker (and the thread it runs in.)
+  ConcurrentMark* _cm;
+  ConcurrentMarkThread* _cmThread;
+  bool _mark_in_progress;
+
+  // The concurrent refiner.
+  ConcurrentG1Refine* _cg1r;
+
+  // The concurrent zero-fill thread.
+  ConcurrentZFThread* _czft;
+
+  // The parallel task queues
+  RefToScanQueueSet *_task_queues;
+
+  // True iff a evacuation has failed in the current collection.
+  bool _evacuation_failed;
+
+  // Set the attribute indicating whether evacuation has failed in the
+  // current collection.
+  void set_evacuation_failed(bool b) { _evacuation_failed = b; }
+
+  // Failed evacuations cause some logical from-space objects to have
+  // forwarding pointers to themselves.  Reset them.
+  void remove_self_forwarding_pointers();
+
+  // When one is non-null, so is the other.  Together, they each pair is
+  // an object with a preserved mark, and its mark value.
+  GrowableArray<oop>*     _objs_with_preserved_marks;
+  GrowableArray<markOop>* _preserved_marks_of_objs;
+
+  // Preserve the mark of "obj", if necessary, in preparation for its mark
+  // word being overwritten with a self-forwarding-pointer.
+  void preserve_mark_if_necessary(oop obj, markOop m);
+
+  // The stack of evac-failure objects left to be scanned.
+  GrowableArray<oop>*    _evac_failure_scan_stack;
+  // The closure to apply to evac-failure objects.
+
+  OopsInHeapRegionClosure* _evac_failure_closure;
+  // Set the field above.
+  void
+  set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_closure) {
+    _evac_failure_closure = evac_failure_closure;
+  }
+
+  // Push "obj" on the scan stack.
+  void push_on_evac_failure_scan_stack(oop obj);
+  // Process scan stack entries until the stack is empty.
+  void drain_evac_failure_scan_stack();
+  // True iff an invocation of "drain_scan_stack" is in progress; to
+  // prevent unnecessary recursion.
+  bool _drain_in_progress;
+
+  // Do any necessary initialization for evacuation-failure handling.
+  // "cl" is the closure that will be used to process evac-failure
+  // objects.
+  void init_for_evac_failure(OopsInHeapRegionClosure* cl);
+  // Do any necessary cleanup for evacuation-failure handling data
+  // structures.
+  void finalize_for_evac_failure();
+
+  // An attempt to evacuate "obj" has failed; take necessary steps.
+  void handle_evacuation_failure(oop obj);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
+  void handle_evacuation_failure_common(oop obj, markOop m);
+
+
+  // Ensure that the relevant gc_alloc regions are set.
+  void get_gc_alloc_regions();
+  // We're done with GC alloc regions; release them, as appropriate.
+  void release_gc_alloc_regions();
+
+  // ("Weak") Reference processing support
+  ReferenceProcessor* _ref_processor;
+
+  enum G1H_process_strong_roots_tasks {
+    G1H_PS_mark_stack_oops_do,
+    G1H_PS_refProcessor_oops_do,
+    // Leave this one last.
+    G1H_PS_NumElements
+  };
+
+  SubTasksDone* _process_strong_tasks;
+
+  // Allocate space to hold a popular object.  Result is guaranteed below
+  // "popular_object_boundary()".  Note: CURRENTLY halts the system if we
+  // run out of space to hold popular objects.
+  HeapWord* allocate_popular_object(size_t word_size);
+
+  // The boundary between popular and non-popular objects.
+  HeapWord* _popular_object_boundary;
+
+  HeapRegionList* _popular_regions_to_be_evacuated;
+
+  // Compute which objects in "single_region" are popular.  If any are,
+  // evacuate them to a popular region, leaving behind forwarding pointers,
+  // and select "popular_region" as the single collection set region.
+  // Otherwise, leave the collection set null.
+  void popularity_pause_preamble(HeapRegion* populer_region);
+
+  // Compute which objects in "single_region" are popular, and evacuate
+  // them to a popular region, leaving behind forwarding pointers.
+  // Returns "true" if at least one popular object is discovered and
+  // evacuated.  In any case, "*max_rc" is set to the maximum reference
+  // count of an object in the region.
+  bool compute_reference_counts_and_evac_popular(HeapRegion* populer_region,
+                                                 size_t* max_rc);
+  // Subroutines used in the above.
+  bool _rc_region_above;
+  size_t _rc_region_diff;
+  jint* obj_rc_addr(oop obj) {
+    uintptr_t obj_addr = (uintptr_t)obj;
+    if (_rc_region_above) {
+      jint* res = (jint*)(obj_addr + _rc_region_diff);
+      assert((uintptr_t)res > obj_addr, "RC region is above.");
+      return res;
+    } else {
+      jint* res = (jint*)(obj_addr - _rc_region_diff);
+      assert((uintptr_t)res < obj_addr, "RC region is below.");
+      return res;
+    }
+  }
+  jint obj_rc(oop obj) {
+    return *obj_rc_addr(obj);
+  }
+  void inc_obj_rc(oop obj) {
+    (*obj_rc_addr(obj))++;
+  }
+  void atomic_inc_obj_rc(oop obj);
+
+
+  // Number of popular objects and bytes (latter is cheaper!).
+  size_t pop_object_used_objs();
+  size_t pop_object_used_bytes();
+
+  // Index of the popular region in which allocation is currently being
+  // done.
+  int _cur_pop_hr_index;
+
+  // List of regions which require zero filling.
+  UncleanRegionList _unclean_region_list;
+  bool _unclean_regions_coming;
+
+  bool check_age_cohort_well_formed_work(int a, HeapRegion* hr);
+
+public:
+  void set_refine_cte_cl_concurrency(bool concurrent);
+
+  RefToScanQueue *task_queue(int i);
+
+  // Create a G1CollectedHeap with the specified policy.
+  // Must call the initialize method afterwards.
+  // May not return if something goes wrong.
+  G1CollectedHeap(G1CollectorPolicy* policy);
+
+  // Initialize the G1CollectedHeap to have the initial and
+  // maximum sizes, permanent generation, and remembered and barrier sets
+  // specified by the policy object.
+  jint initialize();
+
+  void ref_processing_init();
+
+  void set_par_threads(int t) {
+    SharedHeap::set_par_threads(t);
+    _process_strong_tasks->set_par_threads(t);
+  }
+
+  virtual CollectedHeap::Name kind() const {
+    return CollectedHeap::G1CollectedHeap;
+  }
+
+  // The current policy object for the collector.
+  G1CollectorPolicy* g1_policy() const { return _g1_policy; }
+
+  // Adaptive size policy.  No such thing for g1.
+  virtual AdaptiveSizePolicy* size_policy() { return NULL; }
+
+  // The rem set and barrier set.
+  G1RemSet* g1_rem_set() const { return _g1_rem_set; }
+  ModRefBarrierSet* mr_bs() const { return _mr_bs; }
+
+  // The rem set iterator.
+  HeapRegionRemSetIterator* rem_set_iterator(int i) {
+    return _rem_set_iterator[i];
+  }
+
+  HeapRegionRemSetIterator* rem_set_iterator() {
+    return _rem_set_iterator[0];
+  }
+
+  unsigned get_gc_time_stamp() {
+    return _gc_time_stamp;
+  }
+
+  void reset_gc_time_stamp() {
+    _gc_time_stamp = 0;
+    OrderAccess::fence();
+  }
+
+  void increment_gc_time_stamp() {
+    ++_gc_time_stamp;
+    OrderAccess::fence();
+  }
+
+  void iterate_dirty_card_closure(bool concurrent, int worker_i);
+
+  // The shared block offset table array.
+  G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; }
+
+  // Reference Processing accessor
+  ReferenceProcessor* ref_processor() { return _ref_processor; }
+
+  // Reserved (g1 only; super method includes perm), capacity and the used
+  // portion in bytes.
+  size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); }
+  virtual size_t capacity() const;
+  virtual size_t used() const;
+  size_t recalculate_used() const;
+#ifndef PRODUCT
+  size_t recalculate_used_regions() const;
+#endif // PRODUCT
+
+  // These virtual functions do the actual allocation.
+  virtual HeapWord* mem_allocate(size_t word_size,
+                                 bool   is_noref,
+                                 bool   is_tlab,
+                                 bool* gc_overhead_limit_was_exceeded);
+
+  // Some heaps may offer a contiguous region for shared non-blocking
+  // allocation, via inlined code (by exporting the address of the top and
+  // end fields defining the extent of the contiguous allocation region.)
+  // But G1CollectedHeap doesn't yet support this.
+
+  // Return an estimate of the maximum allocation that could be performed
+  // without triggering any collection or expansion activity.  In a
+  // generational collector, for example, this is probably the largest
+  // allocation that could be supported (without expansion) in the youngest
+  // generation.  It is "unsafe" because no locks are taken; the result
+  // should be treated as an approximation, not a guarantee, for use in
+  // heuristic resizing decisions.
+  virtual size_t unsafe_max_alloc();
+
+  virtual bool is_maximal_no_gc() const {
+    return _g1_storage.uncommitted_size() == 0;
+  }
+
+  // The total number of regions in the heap.
+  size_t n_regions();
+
+  // The number of regions that are completely free.
+  size_t max_regions();
+
+  // The number of regions that are completely free.
+  size_t free_regions();
+
+  // The number of regions that are not completely free.
+  size_t used_regions() { return n_regions() - free_regions(); }
+
+  // True iff the ZF thread should run.
+  bool should_zf();
+
+  // The number of regions available for "regular" expansion.
+  size_t expansion_regions() { return _expansion_regions; }
+
+#ifndef PRODUCT
+  bool regions_accounted_for();
+  bool print_region_accounting_info();
+  void print_region_counts();
+#endif
+
+  HeapRegion* alloc_region_from_unclean_list(bool zero_filled);
+  HeapRegion* alloc_region_from_unclean_list_locked(bool zero_filled);
+
+  void put_region_on_unclean_list(HeapRegion* r);
+  void put_region_on_unclean_list_locked(HeapRegion* r);
+
+  void prepend_region_list_on_unclean_list(UncleanRegionList* list);
+  void prepend_region_list_on_unclean_list_locked(UncleanRegionList* list);
+
+  void set_unclean_regions_coming(bool b);
+  void set_unclean_regions_coming_locked(bool b);
+  // Wait for cleanup to be complete.
+  void wait_for_cleanup_complete();
+  // Like above, but assumes that the calling thread owns the Heap_lock.
+  void wait_for_cleanup_complete_locked();
+
+  // Return the head of the unclean list.
+  HeapRegion* peek_unclean_region_list_locked();
+  // Remove and return the head of the unclean list.
+  HeapRegion* pop_unclean_region_list_locked();
+
+  // List of regions which are zero filled and ready for allocation.
+  HeapRegion* _free_region_list;
+  // Number of elements on the free list.
+  size_t _free_region_list_size;
+
+  // If the head of the unclean list is ZeroFilled, move it to the free
+  // list.
+  bool move_cleaned_region_to_free_list_locked();
+  bool move_cleaned_region_to_free_list();
+
+  void put_free_region_on_list_locked(HeapRegion* r);
+  void put_free_region_on_list(HeapRegion* r);
+
+  // Remove and return the head element of the free list.
+  HeapRegion* pop_free_region_list_locked();
+
+  // If "zero_filled" is true, we first try the free list, then we try the
+  // unclean list, zero-filling the result.  If "zero_filled" is false, we
+  // first try the unclean list, then the zero-filled list.
+  HeapRegion* alloc_free_region_from_lists(bool zero_filled);
+
+  // Verify the integrity of the region lists.
+  void remove_allocated_regions_from_lists();
+  bool verify_region_lists();
+  bool verify_region_lists_locked();
+  size_t unclean_region_list_length();
+  size_t free_region_list_length();
+
+  // Perform a collection of the heap; intended for use in implementing
+  // "System.gc".  This probably implies as full a collection as the
+  // "CollectedHeap" supports.
+  virtual void collect(GCCause::Cause cause);
+
+  // The same as above but assume that the caller holds the Heap_lock.
+  void collect_locked(GCCause::Cause cause);
+
+  // This interface assumes that it's being called by the
+  // vm thread. It collects the heap assuming that the
+  // heap lock is already held and that we are executing in
+  // the context of the vm thread.
+  virtual void collect_as_vm_thread(GCCause::Cause cause);
+
+  // True iff a evacuation has failed in the most-recent collection.
+  bool evacuation_failed() { return _evacuation_failed; }
+
+  // Free a region if it is totally full of garbage.  Returns the number of
+  // bytes freed (0 ==> didn't free it).
+  size_t free_region_if_totally_empty(HeapRegion *hr);
+  void free_region_if_totally_empty_work(HeapRegion *hr,
+                                         size_t& pre_used,
+                                         size_t& cleared_h_regions,
+                                         size_t& freed_regions,
+                                         UncleanRegionList* list,
+                                         bool par = false);
+
+  // If we've done free region work that yields the given changes, update
+  // the relevant global variables.
+  void finish_free_region_work(size_t pre_used,
+                               size_t cleared_h_regions,
+                               size_t freed_regions,
+                               UncleanRegionList* list);
+
+
+  // Returns "TRUE" iff "p" points into the allocated area of the heap.
+  virtual bool is_in(const void* p) const;
+
+  // Return "TRUE" iff the given object address is within the collection
+  // set.
+  inline bool obj_in_cs(oop obj);
+
+  // Return "TRUE" iff the given object address is in the reserved
+  // region of g1 (excluding the permanent generation).
+  bool is_in_g1_reserved(const void* p) const {
+    return _g1_reserved.contains(p);
+  }
+
+  // Returns a MemRegion that corresponds to the space that  has been
+  // committed in the heap
+  MemRegion g1_committed() {
+    return _g1_committed;
+  }
+
+  NOT_PRODUCT( bool is_in_closed_subset(const void* p) const; )
+
+  // Dirty card table entries covering a list of young regions.
+  void dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list);
+
+  // This resets the card table to all zeros.  It is used after
+  // a collection pause which used the card table to claim cards.
+  void cleanUpCardTable();
+
+  // Iteration functions.
+
+  // Iterate over all the ref-containing fields of all objects, calling
+  // "cl.do_oop" on each.
+  virtual void oop_iterate(OopClosure* cl);
+
+  // Same as above, restricted to a memory region.
+  virtual void oop_iterate(MemRegion mr, OopClosure* cl);
+
+  // Iterate over all objects, calling "cl.do_object" on each.
+  virtual void object_iterate(ObjectClosure* cl);
+
+  // Iterate over all objects allocated since the last collection, calling
+  // "cl.do_object" on each.  The heap must have been initialized properly
+  // to support this function, or else this call will fail.
+  virtual void object_iterate_since_last_GC(ObjectClosure* cl);
+
+  // Iterate over all spaces in use in the heap, in ascending address order.
+  virtual void space_iterate(SpaceClosure* cl);
+
+  // Iterate over heap regions, in address order, terminating the
+  // iteration early if the "doHeapRegion" method returns "true".
+  void heap_region_iterate(HeapRegionClosure* blk);
+
+  // Iterate over heap regions starting with r (or the first region if "r"
+  // is NULL), in address order, terminating early if the "doHeapRegion"
+  // method returns "true".
+  void heap_region_iterate_from(HeapRegion* r, HeapRegionClosure* blk);
+
+  // As above but starting from the region at index idx.
+  void heap_region_iterate_from(int idx, HeapRegionClosure* blk);
+
+  HeapRegion* region_at(size_t idx);
+
+  // Divide the heap region sequence into "chunks" of some size (the number
+  // of regions divided by the number of parallel threads times some
+  // overpartition factor, currently 4).  Assumes that this will be called
+  // in parallel by ParallelGCThreads worker threads with discinct worker
+  // ids in the range [0..max(ParallelGCThreads-1, 1)], that all parallel
+  // calls will use the same "claim_value", and that that claim value is
+  // different from the claim_value of any heap region before the start of
+  // the iteration.  Applies "blk->doHeapRegion" to each of the regions, by
+  // attempting to claim the first region in each chunk, and, if
+  // successful, applying the closure to each region in the chunk (and
+  // setting the claim value of the second and subsequent regions of the
+  // chunk.)  For now requires that "doHeapRegion" always returns "false",
+  // i.e., that a closure never attempt to abort a traversal.
+  void heap_region_par_iterate_chunked(HeapRegionClosure* blk,
+                                       int worker,
+                                       jint claim_value);
+
+  // It resets all the region claim values to the default.
+  void reset_heap_region_claim_values();
+
+#ifdef ASSERT
+  bool check_heap_region_claim_values(jint claim_value);
+#endif // ASSERT
+
+  // Iterate over the regions (if any) in the current collection set.
+  void collection_set_iterate(HeapRegionClosure* blk);
+
+  // As above but starting from region r
+  void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk);
+
+  // Returns the first (lowest address) compactible space in the heap.
+  virtual CompactibleSpace* first_compactible_space();
+
+  // A CollectedHeap will contain some number of spaces.  This finds the
+  // space containing a given address, or else returns NULL.
+  virtual Space* space_containing(const void* addr) const;
+
+  // A G1CollectedHeap will contain some number of heap regions.  This
+  // finds the region containing a given address, or else returns NULL.
+  HeapRegion* heap_region_containing(const void* addr) const;
+
+  // Like the above, but requires "addr" to be in the heap (to avoid a
+  // null-check), and unlike the above, may return an continuing humongous
+  // region.
+  HeapRegion* heap_region_containing_raw(const void* addr) const;
+
+  // A CollectedHeap is divided into a dense sequence of "blocks"; that is,
+  // each address in the (reserved) heap is a member of exactly
+  // one block.  The defining characteristic of a block is that it is
+  // possible to find its size, and thus to progress forward to the next
+  // block.  (Blocks may be of different sizes.)  Thus, blocks may
+  // represent Java objects, or they might be free blocks in a
+  // free-list-based heap (or subheap), as long as the two kinds are
+  // distinguishable and the size of each is determinable.
+
+  // Returns the address of the start of the "block" that contains the
+  // address "addr".  We say "blocks" instead of "object" since some heaps
+  // may not pack objects densely; a chunk may either be an object or a
+  // non-object.
+  virtual HeapWord* block_start(const void* addr) const;
+
+  // Requires "addr" to be the start of a chunk, and returns its size.
+  // "addr + size" is required to be the start of a new chunk, or the end
+  // of the active area of the heap.
+  virtual size_t block_size(const HeapWord* addr) const;
+
+  // Requires "addr" to be the start of a block, and returns "TRUE" iff
+  // the block is an object.
+  virtual bool block_is_obj(const HeapWord* addr) const;
+
+  // Does this heap support heap inspection? (+PrintClassHistogram)
+  virtual bool supports_heap_inspection() const { return true; }
+
+  // Section on thread-local allocation buffers (TLABs)
+  // See CollectedHeap for semantics.
+
+  virtual bool supports_tlab_allocation() const;
+  virtual size_t tlab_capacity(Thread* thr) const;
+  virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
+  virtual HeapWord* allocate_new_tlab(size_t size);
+
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    // Since G1's TLAB's may, on occasion, come from non-young regions
+    // as well. (Is there a flag controlling that? XXX)
+    return false;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    // At least until perm gen collection is also G1-ified, at
+    // which point this should return false.
+    return true;
+  }
+
+  virtual bool allocs_are_zero_filled();
+
+  // The boundary between a "large" and "small" array of primitives, in
+  // words.
+  virtual size_t large_typearray_limit();
+
+  // All popular objects are guaranteed to have addresses below this
+  // boundary.
+  HeapWord* popular_object_boundary() {
+    return _popular_object_boundary;
+  }
+
+  // Declare the region as one that should be evacuated because its
+  // remembered set is too large.
+  void schedule_popular_region_evac(HeapRegion* r);
+  // If there is a popular region to evacuate it, remove it from the list
+  // and return it.
+  HeapRegion* popular_region_to_evac();
+  // Evacuate the given popular region.
+  void evac_popular_region(HeapRegion* r);
+
+  // Returns "true" iff the given word_size is "very large".
+  static bool isHumongous(size_t word_size) {
+    return word_size >= VeryLargeInWords;
+  }
+
+  // Update mod union table with the set of dirty cards.
+  void updateModUnion();
+
+  // Set the mod union bits corresponding to the given memRegion.  Note
+  // that this is always a safe operation, since it doesn't clear any
+  // bits.
+  void markModUnionRange(MemRegion mr);
+
+  // Records the fact that a marking phase is no longer in progress.
+  void set_marking_complete() {
+    _mark_in_progress = false;
+  }
+  void set_marking_started() {
+    _mark_in_progress = true;
+  }
+  bool mark_in_progress() {
+    return _mark_in_progress;
+  }
+
+  // Print the maximum heap capacity.
+  virtual size_t max_capacity() const;
+
+  virtual jlong millis_since_last_gc();
+
+  // Perform any cleanup actions necessary before allowing a verification.
+  virtual void prepare_for_verify();
+
+  // Perform verification.
+  virtual void verify(bool allow_dirty, bool silent);
+  virtual void print() const;
+  virtual void print_on(outputStream* st) const;
+
+  virtual void print_gc_threads_on(outputStream* st) const;
+  virtual void gc_threads_do(ThreadClosure* tc) const;
+
+  // Override
+  void print_tracing_info() const;
+
+  // If "addr" is a pointer into the (reserved?) heap, returns a positive
+  // number indicating the "arena" within the heap in which "addr" falls.
+  // Or else returns 0.
+  virtual int addr_to_arena_id(void* addr) const;
+
+  // Convenience function to be used in situations where the heap type can be
+  // asserted to be this type.
+  static G1CollectedHeap* heap();
+
+  void empty_young_list();
+  bool should_set_young_locked();
+
+  void set_region_short_lived_locked(HeapRegion* hr);
+  // add appropriate methods for any other surv rate groups
+
+  void young_list_rs_length_sampling_init() {
+    _young_list->rs_length_sampling_init();
+  }
+  bool young_list_rs_length_sampling_more() {
+    return _young_list->rs_length_sampling_more();
+  }
+  void young_list_rs_length_sampling_next() {
+    _young_list->rs_length_sampling_next();
+  }
+  size_t young_list_sampled_rs_lengths() {
+    return _young_list->sampled_rs_lengths();
+  }
+
+  size_t young_list_length()   { return _young_list->length(); }
+  size_t young_list_scan_only_length() {
+                                      return _young_list->scan_only_length(); }
+
+  HeapRegion* pop_region_from_young_list() {
+    return _young_list->pop_region();
+  }
+
+  HeapRegion* young_list_first_region() {
+    return _young_list->first_region();
+  }
+
+  // debugging
+  bool check_young_list_well_formed() {
+    return _young_list->check_list_well_formed();
+  }
+  bool check_young_list_empty(bool ignore_scan_only_list,
+                              bool check_sample = true);
+
+  // *** Stuff related to concurrent marking.  It's not clear to me that so
+  // many of these need to be public.
+
+  // The functions below are helper functions that a subclass of
+  // "CollectedHeap" can use in the implementation of its virtual
+  // functions.
+  // This performs a concurrent marking of the live objects in a
+  // bitmap off to the side.
+  void doConcurrentMark();
+
+  // This is called from the marksweep collector which then does
+  // a concurrent mark and verifies that the results agree with
+  // the stop the world marking.
+  void checkConcurrentMark();
+  void do_sync_mark();
+
+  bool isMarkedPrev(oop obj) const;
+  bool isMarkedNext(oop obj) const;
+
+  // Determine if an object is dead, given the object and also
+  // the region to which the object belongs. An object is dead
+  // iff a) it was not allocated since the last mark and b) it
+  // is not marked.
+
+  bool is_obj_dead(const oop obj, const HeapRegion* hr) const {
+    return
+      !hr->obj_allocated_since_prev_marking(obj) &&
+      !isMarkedPrev(obj);
+  }
+
+  // This is used when copying an object to survivor space.
+  // If the object is marked live, then we mark the copy live.
+  // If the object is allocated since the start of this mark
+  // cycle, then we mark the copy live.
+  // If the object has been around since the previous mark
+  // phase, and hasn't been marked yet during this phase,
+  // then we don't mark it, we just wait for the
+  // current marking cycle to get to it.
+
+  // This function returns true when an object has been
+  // around since the previous marking and hasn't yet
+  // been marked during this marking.
+
+  bool is_obj_ill(const oop obj, const HeapRegion* hr) const {
+    return
+      !hr->obj_allocated_since_next_marking(obj) &&
+      !isMarkedNext(obj);
+  }
+
+  // Determine if an object is dead, given only the object itself.
+  // This will find the region to which the object belongs and
+  // then call the region version of the same function.
+
+  // Added if it is in permanent gen it isn't dead.
+  // Added if it is NULL it isn't dead.
+
+  bool is_obj_dead(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    if (hr == NULL) {
+      if (Universe::heap()->is_in_permanent(obj))
+        return false;
+      else if (obj == NULL) return false;
+      else return true;
+    }
+    else return is_obj_dead(obj, hr);
+  }
+
+  bool is_obj_ill(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    if (hr == NULL) {
+      if (Universe::heap()->is_in_permanent(obj))
+        return false;
+      else if (obj == NULL) return false;
+      else return true;
+    }
+    else return is_obj_ill(obj, hr);
+  }
+
+  // The following is just to alert the verification code
+  // that a full collection has occurred and that the
+  // remembered sets are no longer up to date.
+  bool _full_collection;
+  void set_full_collection() { _full_collection = true;}
+  void clear_full_collection() {_full_collection = false;}
+  bool full_collection() {return _full_collection;}
+
+  ConcurrentMark* concurrent_mark() const { return _cm; }
+  ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
+
+public:
+  void stop_conc_gc_threads();
+
+  // <NEW PREDICTION>
+
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+  void check_if_region_is_too_expensive(double predicted_time_ms);
+  size_t pending_card_num();
+  size_t max_pending_card_num();
+  size_t cards_scanned();
+
+  // </NEW PREDICTION>
+
+protected:
+  size_t _max_heap_capacity;
+
+//  debug_only(static void check_for_valid_allocation_state();)
+
+public:
+  // Temporary: call to mark things unimplemented for the G1 heap (e.g.,
+  // MemoryService).  In productization, we can make this assert false
+  // to catch such places (as well as searching for calls to this...)
+  static void g1_unimplemented();
+
+};
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Inline functions for G1CollectedHeap
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing(const void* addr) const {
+  HeapRegion* hr = _hrs->addr_to_region(addr);
+  // hr can be null if addr in perm_gen
+  if (hr != NULL && hr->continuesHumongous()) {
+    hr = hr->humongous_start_region();
+  }
+  return hr;
+}
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
+  HeapRegion* res = _hrs->addr_to_region(addr);
+  assert(res != NULL, "addr outside of heap?");
+  return res;
+}
+
+inline bool G1CollectedHeap::obj_in_cs(oop obj) {
+  HeapRegion* r = _hrs->addr_to_region(obj);
+  return r != NULL && r->in_collection_set();
+}
+
+inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
+                                              bool permit_collection_pause) {
+  HeapWord* res = NULL;
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          Heap_lock->owned_by_self(), "pre-condition of the call" );
+
+  if (_cur_alloc_region != NULL) {
+
+    // If this allocation causes a region to become non empty,
+    // then we need to update our free_regions count.
+
+    if (_cur_alloc_region->is_empty()) {
+      res = _cur_alloc_region->allocate(word_size);
+      if (res != NULL)
+        _free_regions--;
+    } else {
+      res = _cur_alloc_region->allocate(word_size);
+    }
+  }
+  if (res != NULL) {
+    if (!SafepointSynchronize::is_at_safepoint()) {
+      assert( Heap_lock->owned_by_self(), "invariant" );
+      Heap_lock->unlock();
+    }
+    return res;
+  }
+  // attempt_allocation_slow will also unlock the heap lock when appropriate.
+  return attempt_allocation_slow(word_size, permit_collection_pause);
+}
+
+inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
+  return _task_queues->queue(i);
+}
+
+
+inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
+  return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
+}
+
+inline bool G1CollectedHeap::isMarkedNext(oop obj) const {
+  return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,3163 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1CollectorPolicy.cpp.incl"
+
+#define PREDICTIONS_VERBOSE 0
+
+// <NEW PREDICTION>
+
+// Different defaults for different number of GC threads
+// They were chosen by running GCOld and SPECjbb on debris with different
+//   numbers of GC threads and choosing them based on the results
+
+// all the same
+static double rs_length_diff_defaults[] = {
+  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+};
+
+static double cost_per_card_ms_defaults[] = {
+  0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
+};
+
+static double cost_per_scan_only_region_ms_defaults[] = {
+  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
+};
+
+// all the same
+static double fully_young_cards_per_entry_ratio_defaults[] = {
+  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
+};
+
+static double cost_per_entry_ms_defaults[] = {
+  0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
+};
+
+static double cost_per_byte_ms_defaults[] = {
+  0.00006, 0.00003, 0.00003, 0.000015, 0.000015, 0.00001, 0.00001, 0.000009
+};
+
+// these should be pretty consistent
+static double constant_other_time_ms_defaults[] = {
+  5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
+};
+
+
+static double young_other_cost_per_region_ms_defaults[] = {
+  0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1
+};
+
+static double non_young_other_cost_per_region_ms_defaults[] = {
+  1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30
+};
+
+// </NEW PREDICTION>
+
+G1CollectorPolicy::G1CollectorPolicy() :
+  _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1),
+  _n_pauses(0),
+  _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _all_pause_times_ms(new NumberSeq()),
+  _stop_world_start(0.0),
+  _all_stop_world_times_ms(new NumberSeq()),
+  _all_yield_times_ms(new NumberSeq()),
+
+  _all_mod_union_times_ms(new NumberSeq()),
+
+  _non_pop_summary(new NonPopSummary()),
+  _pop_summary(new PopSummary()),
+  _non_pop_abandoned_summary(new NonPopAbandonedSummary()),
+  _pop_abandoned_summary(new PopAbandonedSummary()),
+
+  _cur_clear_ct_time_ms(0.0),
+
+  _region_num_young(0),
+  _region_num_tenured(0),
+  _prev_region_num_young(0),
+  _prev_region_num_tenured(0),
+
+  _aux_num(10),
+  _all_aux_times_ms(new NumberSeq[_aux_num]),
+  _cur_aux_start_times_ms(new double[_aux_num]),
+  _cur_aux_times_ms(new double[_aux_num]),
+  _cur_aux_times_set(new bool[_aux_num]),
+
+  _pop_compute_rc_start(0.0),
+  _pop_evac_start(0.0),
+
+  _concurrent_mark_init_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  // <NEW PREDICTION>
+
+  _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _prev_collection_pause_end_ms(0.0),
+  _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_scan_only_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _fully_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _partially_young_cards_per_entry_ratio_seq(
+                                         new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _partially_young_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_byte_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_scan_only_region_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _non_young_other_cost_per_region_ms_seq(
+                                         new TruncatedSeq(TruncatedSeqLength)),
+
+  _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)),
+
+  _pause_time_target_ms((double) G1MaxPauseTimeMS),
+
+  // </NEW PREDICTION>
+
+  _in_young_gc_mode(false),
+  _full_young_gcs(true),
+  _full_young_pause_num(0),
+  _partial_young_pause_num(0),
+
+  _during_marking(false),
+  _in_marking_window(false),
+  _in_marking_window_im(false),
+
+  _known_garbage_ratio(0.0),
+  _known_garbage_bytes(0),
+
+  _young_gc_eff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _target_pause_time_ms(-1.0),
+
+   _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  _recent_avg_pause_time_ratio(0.0),
+  _num_markings(0),
+  _n_marks(0),
+  _n_pauses_at_mark_end(0),
+
+  _all_full_gc_times_ms(new NumberSeq()),
+
+  _conc_refine_enabled(0),
+  _conc_refine_zero_traversals(0),
+  _conc_refine_max_traversals(0),
+  _conc_refine_current_delta(G1ConcRefineInitialDelta),
+
+  // G1PausesBtwnConcMark defaults to -1
+  // so the hack is to do the cast  QQQ FIXME
+  _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
+  _n_marks_since_last_pause(0),
+  _conc_mark_initiated(false),
+  _should_initiate_conc_mark(false),
+  _should_revert_to_full_young_gcs(false),
+  _last_full_young_gc(false),
+
+  _prev_collection_pause_used_at_end_bytes(0),
+
+  _collection_set(NULL),
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+  _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived",
+                                                 G1YoungSurvRateNumRegionsSummary)),
+  _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor",
+                                              G1YoungSurvRateNumRegionsSummary))
+  // add here any more surv rate groups
+{
+  _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
+  _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
+
+  _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_only_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_only_regions_scanned = new double[_parallel_gc_threads];
+
+  _par_last_update_rs_start_times_ms = new double[_parallel_gc_threads];
+  _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
+  _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
+
+  _par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads];
+
+  _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
+
+  _par_last_termination_times_ms = new double[_parallel_gc_threads];
+
+  // we store the data from the first pass during popularity pauses
+  _pop_par_last_update_rs_start_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_update_rs_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
+
+  _pop_par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
+
+  _pop_par_last_closure_app_times_ms = new double[_parallel_gc_threads];
+
+  // start conservatively
+  _expensive_region_limit_ms = 0.5 * (double) G1MaxPauseTimeMS;
+
+  // <NEW PREDICTION>
+
+  int index;
+  if (ParallelGCThreads == 0)
+    index = 0;
+  else if (ParallelGCThreads > 8)
+    index = 7;
+  else
+    index = ParallelGCThreads - 1;
+
+  _pending_card_diff_seq->add(0.0);
+  _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
+  _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
+  _cost_per_scan_only_region_ms_seq->add(
+                                 cost_per_scan_only_region_ms_defaults[index]);
+  _fully_young_cards_per_entry_ratio_seq->add(
+                            fully_young_cards_per_entry_ratio_defaults[index]);
+  _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]);
+  _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
+  _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
+  _young_other_cost_per_region_ms_seq->add(
+                               young_other_cost_per_region_ms_defaults[index]);
+  _non_young_other_cost_per_region_ms_seq->add(
+                           non_young_other_cost_per_region_ms_defaults[index]);
+
+  // </NEW PREDICTION>
+
+  double time_slice  = (double) G1TimeSliceMS / 1000.0;
+  double max_gc_time = (double) G1MaxPauseTimeMS / 1000.0;
+  guarantee(max_gc_time < time_slice,
+            "Max GC time should not be greater than the time slice");
+  _mmu_tracker = new G1MMUTrackerQueue(time_slice, max_gc_time);
+  _sigma = (double) G1ConfidencePerc / 100.0;
+
+  // start conservatively (around 50ms is about right)
+  _concurrent_mark_init_times_ms->add(0.05);
+  _concurrent_mark_remark_times_ms->add(0.05);
+  _concurrent_mark_cleanup_times_ms->add(0.20);
+  _tenuring_threshold = MaxTenuringThreshold;
+
+  initialize_all();
+}
+
+// Increment "i", mod "len"
+static void inc_mod(int& i, int len) {
+  i++; if (i == len) i = 0;
+}
+
+void G1CollectorPolicy::initialize_flags() {
+  set_min_alignment(HeapRegion::GrainBytes);
+  set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name()));
+  CollectorPolicy::initialize_flags();
+}
+
+void G1CollectorPolicy::init() {
+  // Set aside an initial future to_space.
+  _g1 = G1CollectedHeap::heap();
+  size_t regions = Universe::heap()->capacity() / HeapRegion::GrainBytes;
+
+  assert(Heap_lock->owned_by_self(), "Locking discipline.");
+
+  if (G1SteadyStateUsed < 50) {
+    vm_exit_during_initialization("G1SteadyStateUsed must be at least 50%.");
+  }
+  if (UseConcMarkSweepGC) {
+    vm_exit_during_initialization("-XX:+UseG1GC is incompatible with "
+                                  "-XX:+UseConcMarkSweepGC.");
+  }
+
+  if (G1Gen) {
+    _in_young_gc_mode = true;
+
+    if (G1YoungGenSize == 0) {
+      set_adaptive_young_list_length(true);
+      _young_list_fixed_length = 0;
+    } else {
+      set_adaptive_young_list_length(false);
+      _young_list_fixed_length = (G1YoungGenSize / HeapRegion::GrainBytes);
+    }
+     _free_regions_at_end_of_collection = _g1->free_regions();
+     _scan_only_regions_at_end_of_collection = 0;
+     calculate_young_list_min_length();
+     guarantee( _young_list_min_length == 0, "invariant, not enough info" );
+     calculate_young_list_target_config();
+   } else {
+     _young_list_fixed_length = 0;
+    _in_young_gc_mode = false;
+  }
+}
+
+void G1CollectorPolicy::calculate_young_list_min_length() {
+  _young_list_min_length = 0;
+
+  if (!adaptive_young_list_length())
+    return;
+
+  if (_alloc_rate_ms_seq->num() > 3) {
+    double now_sec = os::elapsedTime();
+    double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0;
+    double alloc_rate_ms = predict_alloc_rate_ms();
+    int min_regions = (int) ceil(alloc_rate_ms * when_ms);
+    int current_region_num = (int) _g1->young_list_length();
+    _young_list_min_length = min_regions + current_region_num;
+  }
+}
+
+void G1CollectorPolicy::calculate_young_list_target_config() {
+  if (adaptive_young_list_length()) {
+    size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
+    calculate_young_list_target_config(rs_lengths);
+  } else {
+    if (full_young_gcs())
+      _young_list_target_length = _young_list_fixed_length;
+    else
+      _young_list_target_length = _young_list_fixed_length / 2;
+    _young_list_target_length = MAX2(_young_list_target_length, (size_t)1);
+    size_t so_length = calculate_optimal_so_length(_young_list_target_length);
+    guarantee( so_length < _young_list_target_length, "invariant" );
+    _young_list_so_prefix_length = so_length;
+  }
+}
+
+// This method calculate the optimal scan-only set for a fixed young
+// gen size. I couldn't work out how to reuse the more elaborate one,
+// i.e. calculate_young_list_target_config(rs_length), as the loops are
+// fundamentally different (the other one finds a config for different
+// S-O lengths, whereas here we need to do the opposite).
+size_t G1CollectorPolicy::calculate_optimal_so_length(
+                                                    size_t young_list_length) {
+  if (!G1UseScanOnlyPrefix)
+    return 0;
+
+  if (_all_pause_times_ms->num() < 3) {
+    // we won't use a scan-only set at the beginning to allow the rest
+    // of the predictors to warm up
+    return 0;
+  }
+
+  if (_cost_per_scan_only_region_ms_seq->num() < 3) {
+    // then, we'll only set the S-O set to 1 for a little bit of time,
+    // to get enough information on the scanning cost
+    return 1;
+  }
+
+  size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
+  size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
+  size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
+  size_t scanned_cards;
+  if (full_young_gcs())
+    scanned_cards = predict_young_card_num(adj_rs_lengths);
+  else
+    scanned_cards = predict_non_young_card_num(adj_rs_lengths);
+  double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
+                                                     scanned_cards);
+
+  size_t so_length = 0;
+  double max_gc_eff = 0.0;
+  for (size_t i = 0; i < young_list_length; ++i) {
+    double gc_eff = 0.0;
+    double pause_time_ms = 0.0;
+    predict_gc_eff(young_list_length, i, base_time_ms,
+                   &gc_eff, &pause_time_ms);
+    if (gc_eff > max_gc_eff) {
+      max_gc_eff = gc_eff;
+      so_length = i;
+    }
+  }
+
+  // set it to 95% of the optimal to make sure we sample the "area"
+  // around the optimal length to get up-to-date survival rate data
+  return so_length * 950 / 1000;
+}
+
+// This is a really cool piece of code! It finds the best
+// target configuration (young length / scan-only prefix length) so
+// that GC efficiency is maximized and that we also meet a pause
+// time. It's a triple nested loop. These loops are explained below
+// from the inside-out :-)
+//
+// (a) The innermost loop will try to find the optimal young length
+// for a fixed S-O length. It uses a binary search to speed up the
+// process. We assume that, for a fixed S-O length, as we add more
+// young regions to the CSet, the GC efficiency will only go up (I'll
+// skip the proof). So, using a binary search to optimize this process
+// makes perfect sense.
+//
+// (b) The middle loop will fix the S-O length before calling the
+// innermost one. It will vary it between two parameters, increasing
+// it by a given increment.
+//
+// (c) The outermost loop will call the middle loop three times.
+//   (1) The first time it will explore all possible S-O length values
+//   from 0 to as large as it can get, using a coarse increment (to
+//   quickly "home in" to where the optimal seems to be).
+//   (2) The second time it will explore the values around the optimal
+//   that was found by the first iteration using a fine increment.
+//   (3) Once the optimal config has been determined by the second
+//   iteration, we'll redo the calculation, but setting the S-O length
+//   to 95% of the optimal to make sure we sample the "area"
+//   around the optimal length to get up-to-date survival rate data
+//
+// Termination conditions for the iterations are several: the pause
+// time is over the limit, we do not have enough to-space, etc.
+
+void G1CollectorPolicy::calculate_young_list_target_config(size_t rs_lengths) {
+  guarantee( adaptive_young_list_length(), "pre-condition" );
+
+  double start_time_sec = os::elapsedTime();
+  size_t min_reserve_perc = MAX2((size_t)2, (size_t)G1MinReservePerc);
+  min_reserve_perc = MIN2((size_t) 50, min_reserve_perc);
+  size_t reserve_regions =
+    (size_t) ((double) min_reserve_perc * (double) _g1->n_regions() / 100.0);
+
+  if (full_young_gcs() && _free_regions_at_end_of_collection > 0) {
+    // we are in fully-young mode and there are free regions in the heap
+
+    size_t min_so_length = 0;
+    size_t max_so_length = 0;
+
+    if (G1UseScanOnlyPrefix) {
+      if (_all_pause_times_ms->num() < 3) {
+        // we won't use a scan-only set at the beginning to allow the rest
+        // of the predictors to warm up
+        min_so_length = 0;
+        max_so_length = 0;
+      } else if (_cost_per_scan_only_region_ms_seq->num() < 3) {
+        // then, we'll only set the S-O set to 1 for a little bit of time,
+        // to get enough information on the scanning cost
+        min_so_length = 1;
+        max_so_length = 1;
+      } else if (_in_marking_window || _last_full_young_gc) {
+        // no S-O prefix during a marking phase either, as at the end
+        // of the marking phase we'll have to use a very small young
+        // length target to fill up the rest of the CSet with
+        // non-young regions and, if we have lots of scan-only regions
+        // left-over, we will not be able to add any more non-young
+        // regions.
+        min_so_length = 0;
+        max_so_length = 0;
+      } else {
+        // this is the common case; we'll never reach the maximum, we
+        // one of the end conditions will fire well before that
+        // (hopefully!)
+        min_so_length = 0;
+        max_so_length = _free_regions_at_end_of_collection - 1;
+      }
+    } else {
+      // no S-O prefix, as the switch is not set, but we still need to
+      // do one iteration to calculate the best young target that
+      // meets the pause time; this way we reuse the same code instead
+      // of replicating it
+      min_so_length = 0;
+      max_so_length = 0;
+    }
+
+    double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+    size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
+    size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
+    size_t scanned_cards;
+    if (full_young_gcs())
+      scanned_cards = predict_young_card_num(adj_rs_lengths);
+    else
+      scanned_cards = predict_non_young_card_num(adj_rs_lengths);
+    // calculate this once, so that we don't have to recalculate it in
+    // the innermost loop
+    double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
+                                                       scanned_cards);
+
+    // the result
+    size_t final_young_length = 0;
+    size_t final_so_length = 0;
+    double final_gc_eff = 0.0;
+    // we'll also keep track of how many times we go into the inner loop
+    // this is for profiling reasons
+    size_t calculations = 0;
+
+    // this determines which of the three iterations the outer loop is in
+    typedef enum {
+      pass_type_coarse,
+      pass_type_fine,
+      pass_type_final
+    } pass_type_t;
+
+    // range of the outer loop's iteration
+    size_t from_so_length   = min_so_length;
+    size_t to_so_length     = max_so_length;
+    guarantee( from_so_length <= to_so_length, "invariant" );
+
+    // this will keep the S-O length that's found by the second
+    // iteration of the outer loop; we'll keep it just in case the third
+    // iteration fails to find something
+    size_t fine_so_length   = 0;
+
+    // the increment step for the coarse (first) iteration
+    size_t so_coarse_increments = 5;
+
+    // the common case, we'll start with the coarse iteration
+    pass_type_t pass = pass_type_coarse;
+    size_t so_length_incr = so_coarse_increments;
+
+    if (from_so_length == to_so_length) {
+      // not point in doing the coarse iteration, we'll go directly into
+      // the fine one (we essentially trying to find the optimal young
+      // length for a fixed S-O length).
+      so_length_incr = 1;
+      pass = pass_type_final;
+    } else if (to_so_length - from_so_length < 3 * so_coarse_increments) {
+      // again, the range is too short so no point in foind the coarse
+      // iteration either
+      so_length_incr = 1;
+      pass = pass_type_fine;
+    }
+
+    bool done = false;
+    // this is the outermost loop
+    while (!done) {
+#if 0
+      // leave this in for debugging, just in case
+      gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT
+                             ", incr " SIZE_FORMAT ", pass %s",
+                             from_so_length, to_so_length, so_length_incr,
+                             (pass == pass_type_coarse) ? "coarse" :
+                             (pass == pass_type_fine) ? "fine" : "final");
+#endif // 0
+
+      size_t so_length = from_so_length;
+      size_t init_free_regions =
+        MAX2((size_t)0,
+             _free_regions_at_end_of_collection +
+             _scan_only_regions_at_end_of_collection - reserve_regions);
+
+      // this determines whether a configuration was found
+      bool gc_eff_set = false;
+      // this is the middle loop
+      while (so_length <= to_so_length) {
+        // base time, which excludes region-related time; again we
+        // calculate it once to avoid recalculating it in the
+        // innermost loop
+        double base_time_with_so_ms =
+                           base_time_ms + predict_scan_only_time_ms(so_length);
+        // it's already over the pause target, go around
+        if (base_time_with_so_ms > target_pause_time_ms)
+          break;
+
+        size_t starting_young_length = so_length+1;
+
+        // we make sure that the short young length that makes sense
+        // (one more than the S-O length) is feasible
+        size_t min_young_length = starting_young_length;
+        double min_gc_eff;
+        bool min_ok;
+        ++calculations;
+        min_ok = predict_gc_eff(min_young_length, so_length,
+                                base_time_with_so_ms,
+                                init_free_regions, target_pause_time_ms,
+                                &min_gc_eff);
+
+        if (min_ok) {
+          // the shortest young length is indeed feasible; we'll know
+          // set up the max young length and we'll do a binary search
+          // between min_young_length and max_young_length
+          size_t max_young_length = _free_regions_at_end_of_collection - 1;
+          double max_gc_eff = 0.0;
+          bool max_ok = false;
+
+          // the innermost loop! (finally!)
+          while (max_young_length > min_young_length) {
+            // we'll make sure that min_young_length is always at a
+            // feasible config
+            guarantee( min_ok, "invariant" );
+
+            ++calculations;
+            max_ok = predict_gc_eff(max_young_length, so_length,
+                                    base_time_with_so_ms,
+                                    init_free_regions, target_pause_time_ms,
+                                    &max_gc_eff);
+
+            size_t diff = (max_young_length - min_young_length) / 2;
+            if (max_ok) {
+              min_young_length = max_young_length;
+              min_gc_eff = max_gc_eff;
+              min_ok = true;
+            }
+            max_young_length = min_young_length + diff;
+          }
+
+          // the innermost loop found a config
+          guarantee( min_ok, "invariant" );
+          if (min_gc_eff > final_gc_eff) {
+            // it's the best config so far, so we'll keep it
+            final_gc_eff = min_gc_eff;
+            final_young_length = min_young_length;
+            final_so_length = so_length;
+            gc_eff_set = true;
+          }
+        }
+
+        // incremental the fixed S-O length and go around
+        so_length += so_length_incr;
+      }
+
+      // this is the end of the outermost loop and we need to decide
+      // what to do during the next iteration
+      if (pass == pass_type_coarse) {
+        // we just did the coarse pass (first iteration)
+
+        if (!gc_eff_set)
+          // we didn't find a feasible config so we'll just bail out; of
+          // course, it might be the case that we missed it; but I'd say
+          // it's a bit unlikely
+          done = true;
+        else {
+          // We did find a feasible config with optimal GC eff during
+          // the first pass. So the second pass we'll only consider the
+          // S-O lengths around that config with a fine increment.
+
+          guarantee( so_length_incr == so_coarse_increments, "invariant" );
+          guarantee( final_so_length >= min_so_length, "invariant" );
+
+#if 0
+          // leave this in for debugging, just in case
+          gclog_or_tty->print_cr("  coarse pass: SO length " SIZE_FORMAT,
+                                 final_so_length);
+#endif // 0
+
+          from_so_length =
+            (final_so_length - min_so_length > so_coarse_increments) ?
+            final_so_length - so_coarse_increments + 1 : min_so_length;
+          to_so_length =
+            (max_so_length - final_so_length > so_coarse_increments) ?
+            final_so_length + so_coarse_increments - 1 : max_so_length;
+
+          pass = pass_type_fine;
+          so_length_incr = 1;
+        }
+      } else if (pass == pass_type_fine) {
+        // we just finished the second pass
+
+        if (!gc_eff_set) {
+          // we didn't find a feasible config (yes, it's possible;
+          // notice that, sometimes, we go directly into the fine
+          // iteration and skip the coarse one) so we bail out
+          done = true;
+        } else {
+          // We did find a feasible config with optimal GC eff
+          guarantee( so_length_incr == 1, "invariant" );
+
+          if (final_so_length == 0) {
+            // The config is of an empty S-O set, so we'll just bail out
+            done = true;
+          } else {
+            // we'll go around once more, setting the S-O length to 95%
+            // of the optimal
+            size_t new_so_length = 950 * final_so_length / 1000;
+
+#if 0
+            // leave this in for debugging, just in case
+            gclog_or_tty->print_cr("  fine pass: SO length " SIZE_FORMAT
+                                   ", setting it to " SIZE_FORMAT,
+                                    final_so_length, new_so_length);
+#endif // 0
+
+            from_so_length = new_so_length;
+            to_so_length = new_so_length;
+            fine_so_length = final_so_length;
+
+            pass = pass_type_final;
+          }
+        }
+      } else if (pass == pass_type_final) {
+        // we just finished the final (third) pass
+
+        if (!gc_eff_set)
+          // we didn't find a feasible config, so we'll just use the one
+          // we found during the second pass, which we saved
+          final_so_length = fine_so_length;
+
+        // and we're done!
+        done = true;
+      } else {
+        guarantee( false, "should never reach here" );
+      }
+
+      // we now go around the outermost loop
+    }
+
+    // we should have at least one region in the target young length
+    _young_list_target_length = MAX2((size_t) 1, final_young_length);
+    if (final_so_length >= final_young_length)
+      // and we need to ensure that the S-O length is not greater than
+      // the target young length (this is being a bit careful)
+      final_so_length = 0;
+    _young_list_so_prefix_length = final_so_length;
+    guarantee( !_in_marking_window || !_last_full_young_gc ||
+               _young_list_so_prefix_length == 0, "invariant" );
+
+    // let's keep an eye of how long we spend on this calculation
+    // right now, I assume that we'll print it when we need it; we
+    // should really adde it to the breakdown of a pause
+    double end_time_sec = os::elapsedTime();
+    double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0;
+
+#if 0
+    // leave this in for debugging, just in case
+    gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT
+                           ", SO = " SIZE_FORMAT ", "
+                           "elapsed %1.2lf ms, calcs: " SIZE_FORMAT " (%s%s) "
+                           SIZE_FORMAT SIZE_FORMAT,
+                           target_pause_time_ms,
+                           _young_list_target_length - _young_list_so_prefix_length,
+                           _young_list_so_prefix_length,
+                           elapsed_time_ms,
+                           calculations,
+                           full_young_gcs() ? "full" : "partial",
+                           should_initiate_conc_mark() ? " i-m" : "",
+                           in_marking_window(),
+                           in_marking_window_im());
+#endif // 0
+
+    if (_young_list_target_length < _young_list_min_length) {
+      // bummer; this means that, if we do a pause when the optimal
+      // config dictates, we'll violate the pause spacing target (the
+      // min length was calculate based on the application's current
+      // alloc rate);
+
+      // so, we have to bite the bullet, and allocate the minimum
+      // number. We'll violate our target, but we just can't meet it.
+
+      size_t so_length = 0;
+      // a note further up explains why we do not want an S-O length
+      // during marking
+      if (!_in_marking_window && !_last_full_young_gc)
+        // but we can still try to see whether we can find an optimal
+        // S-O length
+        so_length = calculate_optimal_so_length(_young_list_min_length);
+
+#if 0
+      // leave this in for debugging, just in case
+      gclog_or_tty->print_cr("adjusted target length from "
+                             SIZE_FORMAT " to " SIZE_FORMAT
+                             ", SO " SIZE_FORMAT,
+                             _young_list_target_length, _young_list_min_length,
+                             so_length);
+#endif // 0
+
+      _young_list_target_length =
+        MAX2(_young_list_min_length, (size_t)1);
+      _young_list_so_prefix_length = so_length;
+    }
+  } else {
+    // we are in a partially-young mode or we've run out of regions (due
+    // to evacuation failure)
+
+#if 0
+    // leave this in for debugging, just in case
+    gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT
+                           ", SO " SIZE_FORMAT,
+                           _young_list_min_length, 0);
+#endif // 0
+
+    // we'll do the pause as soon as possible and with no S-O prefix
+    // (see above for the reasons behind the latter)
+    _young_list_target_length =
+      MAX2(_young_list_min_length, (size_t) 1);
+    _young_list_so_prefix_length = 0;
+  }
+
+  _rs_lengths_prediction = rs_lengths;
+}
+
+// This is used by: calculate_optimal_so_length(length). It returns
+// the GC eff and predicted pause time for a particular config
+void
+G1CollectorPolicy::predict_gc_eff(size_t young_length,
+                                  size_t so_length,
+                                  double base_time_ms,
+                                  double* ret_gc_eff,
+                                  double* ret_pause_time_ms) {
+  double so_time_ms = predict_scan_only_time_ms(so_length);
+  double accum_surv_rate_adj = 0.0;
+  if (so_length > 0)
+    accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1));
+  double accum_surv_rate =
+    accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj;
+  size_t bytes_to_copy =
+    (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
+  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
+  double young_other_time_ms =
+                       predict_young_other_time_ms(young_length - so_length);
+  double pause_time_ms =
+                base_time_ms + so_time_ms + copy_time_ms + young_other_time_ms;
+  size_t reclaimed_bytes =
+    (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy;
+  double gc_eff = (double) reclaimed_bytes / pause_time_ms;
+
+  *ret_gc_eff = gc_eff;
+  *ret_pause_time_ms = pause_time_ms;
+}
+
+// This is used by: calculate_young_list_target_config(rs_length). It
+// returns the GC eff of a particular config. It returns false if that
+// config violates any of the end conditions of the search in the
+// calling method, or true upon success. The end conditions were put
+// here since it's called twice and it was best not to replicate them
+// in the caller. Also, passing the parameteres avoids having to
+// recalculate them in the innermost loop.
+bool
+G1CollectorPolicy::predict_gc_eff(size_t young_length,
+                                  size_t so_length,
+                                  double base_time_with_so_ms,
+                                  size_t init_free_regions,
+                                  double target_pause_time_ms,
+                                  double* ret_gc_eff) {
+  *ret_gc_eff = 0.0;
+
+  if (young_length >= init_free_regions)
+    // end condition 1: not enough space for the young regions
+    return false;
+
+  double accum_surv_rate_adj = 0.0;
+  if (so_length > 0)
+    accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1));
+  double accum_surv_rate =
+    accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj;
+  size_t bytes_to_copy =
+    (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
+  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
+  double young_other_time_ms =
+                       predict_young_other_time_ms(young_length - so_length);
+  double pause_time_ms =
+                   base_time_with_so_ms + copy_time_ms + young_other_time_ms;
+
+  if (pause_time_ms > target_pause_time_ms)
+    // end condition 2: over the target pause time
+    return false;
+
+  size_t reclaimed_bytes =
+    (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy;
+  size_t free_bytes =
+                 (init_free_regions - young_length) * HeapRegion::GrainBytes;
+
+  if ((2.0 + sigma()) * (double) bytes_to_copy > (double) free_bytes)
+    // end condition 3: out of to-space (conservatively)
+    return false;
+
+  // success!
+  double gc_eff = (double) reclaimed_bytes / pause_time_ms;
+  *ret_gc_eff = gc_eff;
+
+  return true;
+}
+
+void G1CollectorPolicy::check_prediction_validity() {
+  guarantee( adaptive_young_list_length(), "should not call this otherwise" );
+
+  size_t rs_lengths = _g1->young_list_sampled_rs_lengths();
+  if (rs_lengths > _rs_lengths_prediction) {
+    // add 10% to avoid having to recalculate often
+    size_t rs_lengths_prediction = rs_lengths * 1100 / 1000;
+    calculate_young_list_target_config(rs_lengths_prediction);
+  }
+}
+
+HeapWord* G1CollectorPolicy::mem_allocate_work(size_t size,
+                                               bool is_tlab,
+                                               bool* gc_overhead_limit_was_exceeded) {
+  guarantee(false, "Not using this policy feature yet.");
+  return NULL;
+}
+
+// This method controls how a collector handles one or more
+// of its generations being fully allocated.
+HeapWord* G1CollectorPolicy::satisfy_failed_allocation(size_t size,
+                                                       bool is_tlab) {
+  guarantee(false, "Not using this policy feature yet.");
+  return NULL;
+}
+
+
+#ifndef PRODUCT
+bool G1CollectorPolicy::verify_young_ages() {
+  HeapRegion* head = _g1->young_list_first_region();
+  return
+    verify_young_ages(head, _short_lived_surv_rate_group);
+  // also call verify_young_ages on any additional surv rate groups
+}
+
+bool
+G1CollectorPolicy::verify_young_ages(HeapRegion* head,
+                                     SurvRateGroup *surv_rate_group) {
+  guarantee( surv_rate_group != NULL, "pre-condition" );
+
+  const char* name = surv_rate_group->name();
+  bool ret = true;
+  int prev_age = -1;
+
+  for (HeapRegion* curr = head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    SurvRateGroup* group = curr->surv_rate_group();
+    if (group == NULL && !curr->is_survivor()) {
+      gclog_or_tty->print_cr("## %s: encountered NULL surv_rate_group", name);
+      ret = false;
+    }
+
+    if (surv_rate_group == group) {
+      int age = curr->age_in_surv_rate_group();
+
+      if (age < 0) {
+        gclog_or_tty->print_cr("## %s: encountered negative age", name);
+        ret = false;
+      }
+
+      if (age <= prev_age) {
+        gclog_or_tty->print_cr("## %s: region ages are not strictly increasing "
+                               "(%d, %d)", name, age, prev_age);
+        ret = false;
+      }
+      prev_age = age;
+    }
+  }
+
+  return ret;
+}
+#endif // PRODUCT
+
+void G1CollectorPolicy::record_full_collection_start() {
+  _cur_collection_start_sec = os::elapsedTime();
+  // Release the future to-space so that it is available for compaction into.
+  _g1->set_full_collection();
+}
+
+void G1CollectorPolicy::record_full_collection_end() {
+  // Consider this like a collection pause for the purposes of allocation
+  // since last pause.
+  double end_sec = os::elapsedTime();
+  double full_gc_time_sec = end_sec - _cur_collection_start_sec;
+  double full_gc_time_ms = full_gc_time_sec * 1000.0;
+
+  checkpoint_conc_overhead();
+
+  _all_full_gc_times_ms->add(full_gc_time_ms);
+
+  update_recent_gc_times(end_sec, full_gc_time_sec);
+
+  _g1->clear_full_collection();
+
+  // "Nuke" the heuristics that control the fully/partially young GC
+  // transitions and make sure we start with fully young GCs after the
+  // Full GC.
+  set_full_young_gcs(true);
+  _last_full_young_gc = false;
+  _should_revert_to_full_young_gcs = false;
+  _should_initiate_conc_mark = false;
+  _known_garbage_bytes = 0;
+  _known_garbage_ratio = 0.0;
+  _in_marking_window = false;
+  _in_marking_window_im = false;
+
+  _short_lived_surv_rate_group->record_scan_only_prefix(0);
+  _short_lived_surv_rate_group->start_adding_regions();
+  // also call this on any additional surv rate groups
+
+  _prev_region_num_young   = _region_num_young;
+  _prev_region_num_tenured = _region_num_tenured;
+
+  _free_regions_at_end_of_collection = _g1->free_regions();
+  _scan_only_regions_at_end_of_collection = 0;
+  calculate_young_list_min_length();
+  calculate_young_list_target_config();
+ }
+
+void G1CollectorPolicy::record_pop_compute_rc_start() {
+  _pop_compute_rc_start = os::elapsedTime();
+}
+void G1CollectorPolicy::record_pop_compute_rc_end() {
+  double ms = (os::elapsedTime() - _pop_compute_rc_start)*1000.0;
+  _cur_popular_compute_rc_time_ms = ms;
+  _pop_compute_rc_start = 0.0;
+}
+void G1CollectorPolicy::record_pop_evac_start() {
+  _pop_evac_start = os::elapsedTime();
+}
+void G1CollectorPolicy::record_pop_evac_end() {
+  double ms = (os::elapsedTime() - _pop_evac_start)*1000.0;
+  _cur_popular_evac_time_ms = ms;
+  _pop_evac_start = 0.0;
+}
+
+void G1CollectorPolicy::record_before_bytes(size_t bytes) {
+  _bytes_in_to_space_before_gc += bytes;
+}
+
+void G1CollectorPolicy::record_after_bytes(size_t bytes) {
+  _bytes_in_to_space_after_gc += bytes;
+}
+
+void G1CollectorPolicy::record_stop_world_start() {
+  _stop_world_start = os::elapsedTime();
+}
+
+void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
+                                                      size_t start_used) {
+  if (PrintGCDetails) {
+    gclog_or_tty->stamp(PrintGCTimeStamps);
+    gclog_or_tty->print("[GC pause");
+    if (in_young_gc_mode())
+      gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial");
+  }
+
+  assert(_g1->used_regions() == _g1->recalculate_used_regions(),
+         "sanity");
+
+  double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0;
+  _all_stop_world_times_ms->add(s_w_t_ms);
+  _stop_world_start = 0.0;
+
+  _cur_collection_start_sec = start_time_sec;
+  _cur_collection_pause_used_at_start_bytes = start_used;
+  _cur_collection_pause_used_regions_at_start = _g1->used_regions();
+  _pending_cards = _g1->pending_card_num();
+  _max_pending_cards = _g1->max_pending_card_num();
+
+  _bytes_in_to_space_before_gc = 0;
+  _bytes_in_to_space_after_gc = 0;
+  _bytes_in_collection_set_before_gc = 0;
+
+#ifdef DEBUG
+  // initialise these to something well known so that we can spot
+  // if they are not set properly
+
+  for (int i = 0; i < _parallel_gc_threads; ++i) {
+    _par_last_ext_root_scan_times_ms[i] = -666.0;
+    _par_last_mark_stack_scan_times_ms[i] = -666.0;
+    _par_last_scan_only_times_ms[i] = -666.0;
+    _par_last_scan_only_regions_scanned[i] = -666.0;
+    _par_last_update_rs_start_times_ms[i] = -666.0;
+    _par_last_update_rs_times_ms[i] = -666.0;
+    _par_last_update_rs_processed_buffers[i] = -666.0;
+    _par_last_scan_rs_start_times_ms[i] = -666.0;
+    _par_last_scan_rs_times_ms[i] = -666.0;
+    _par_last_scan_new_refs_times_ms[i] = -666.0;
+    _par_last_obj_copy_times_ms[i] = -666.0;
+    _par_last_termination_times_ms[i] = -666.0;
+
+    _pop_par_last_update_rs_start_times_ms[i] = -666.0;
+    _pop_par_last_update_rs_times_ms[i] = -666.0;
+    _pop_par_last_update_rs_processed_buffers[i] = -666.0;
+    _pop_par_last_scan_rs_start_times_ms[i] = -666.0;
+    _pop_par_last_scan_rs_times_ms[i] = -666.0;
+    _pop_par_last_closure_app_times_ms[i] = -666.0;
+  }
+#endif
+
+  for (int i = 0; i < _aux_num; ++i) {
+    _cur_aux_times_ms[i] = 0.0;
+    _cur_aux_times_set[i] = false;
+  }
+
+  _satb_drain_time_set = false;
+  _last_satb_drain_processed_buffers = -1;
+
+  if (in_young_gc_mode())
+    _last_young_gc_full = false;
+
+
+  // do that for any other surv rate groups
+  _short_lived_surv_rate_group->stop_adding_regions();
+  size_t short_lived_so_length = _young_list_so_prefix_length;
+  _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length);
+  tag_scan_only(short_lived_so_length);
+
+  assert( verify_young_ages(), "region age verification" );
+}
+
+void G1CollectorPolicy::tag_scan_only(size_t short_lived_scan_only_length) {
+  // done in a way that it can be extended for other surv rate groups too...
+
+  HeapRegion* head = _g1->young_list_first_region();
+  bool finished_short_lived = (short_lived_scan_only_length == 0);
+
+  if (finished_short_lived)
+    return;
+
+  for (HeapRegion* curr = head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    SurvRateGroup* surv_rate_group = curr->surv_rate_group();
+    int age = curr->age_in_surv_rate_group();
+
+    if (surv_rate_group == _short_lived_surv_rate_group) {
+      if ((size_t)age < short_lived_scan_only_length)
+        curr->set_scan_only();
+      else
+        finished_short_lived = true;
+    }
+
+
+    if (finished_short_lived)
+      return;
+  }
+
+  guarantee( false, "we should never reach here" );
+}
+
+void G1CollectorPolicy::record_popular_pause_preamble_start() {
+  _cur_popular_preamble_start_ms = os::elapsedTime() * 1000.0;
+}
+
+void G1CollectorPolicy::record_popular_pause_preamble_end() {
+  _cur_popular_preamble_time_ms =
+    (os::elapsedTime() * 1000.0) - _cur_popular_preamble_start_ms;
+
+  // copy the recorded statistics of the first pass to temporary arrays
+  for (int i = 0; i < _parallel_gc_threads; ++i) {
+    _pop_par_last_update_rs_start_times_ms[i] = _par_last_update_rs_start_times_ms[i];
+    _pop_par_last_update_rs_times_ms[i] = _par_last_update_rs_times_ms[i];
+    _pop_par_last_update_rs_processed_buffers[i] = _par_last_update_rs_processed_buffers[i];
+    _pop_par_last_scan_rs_start_times_ms[i] = _par_last_scan_rs_start_times_ms[i];
+    _pop_par_last_scan_rs_times_ms[i] = _par_last_scan_rs_times_ms[i];
+    _pop_par_last_closure_app_times_ms[i] = _par_last_obj_copy_times_ms[i];
+  }
+}
+
+void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) {
+  _mark_closure_time_ms = mark_closure_time_ms;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_start() {
+  _mark_init_start_sec = os::elapsedTime();
+  guarantee(!in_young_gc_mode(), "should not do be here in young GC mode");
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double
+                                                   mark_init_elapsed_time_ms) {
+  _during_marking = true;
+  _should_initiate_conc_mark = false;
+  _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_end() {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_init_start_sec) * 1000.0;
+  _concurrent_mark_init_times_ms->add(elapsed_time_ms);
+  checkpoint_conc_overhead();
+  record_concurrent_mark_init_end_pre(elapsed_time_ms);
+
+  _mmu_tracker->add_pause(_mark_init_start_sec, end_time_sec, true);
+}
+
+void G1CollectorPolicy::record_concurrent_mark_remark_start() {
+  _mark_remark_start_sec = os::elapsedTime();
+  _during_marking = false;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_remark_end() {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_remark_start_sec)*1000.0;
+  checkpoint_conc_overhead();
+  _concurrent_mark_remark_times_ms->add(elapsed_time_ms);
+  _cur_mark_stop_world_time_ms += elapsed_time_ms;
+  _prev_collection_pause_end_ms += elapsed_time_ms;
+
+  _mmu_tracker->add_pause(_mark_remark_start_sec, end_time_sec, true);
+}
+
+void G1CollectorPolicy::record_concurrent_mark_cleanup_start() {
+  _mark_cleanup_start_sec = os::elapsedTime();
+}
+
+void
+G1CollectorPolicy::record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                      size_t max_live_bytes) {
+  record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes);
+  record_concurrent_mark_cleanup_end_work2();
+}
+
+void
+G1CollectorPolicy::
+record_concurrent_mark_cleanup_end_work1(size_t freed_bytes,
+                                         size_t max_live_bytes) {
+  if (_n_marks < 2) _n_marks++;
+  if (G1PolicyVerbose > 0)
+    gclog_or_tty->print_cr("At end of marking, max_live is " SIZE_FORMAT " MB "
+                           " (of " SIZE_FORMAT " MB heap).",
+                           max_live_bytes/M, _g1->capacity()/M);
+}
+
+// The important thing about this is that it includes "os::elapsedTime".
+void G1CollectorPolicy::record_concurrent_mark_cleanup_end_work2() {
+  checkpoint_conc_overhead();
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_cleanup_start_sec)*1000.0;
+  _concurrent_mark_cleanup_times_ms->add(elapsed_time_ms);
+  _cur_mark_stop_world_time_ms += elapsed_time_ms;
+  _prev_collection_pause_end_ms += elapsed_time_ms;
+
+  _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_time_sec, true);
+
+  _num_markings++;
+
+  // We did a marking, so reset the "since_last_mark" variables.
+  double considerConcMarkCost = 1.0;
+  // If there are available processors, concurrent activity is free...
+  if (Threads::number_of_non_daemon_threads() * 2 <
+      os::active_processor_count()) {
+    considerConcMarkCost = 0.0;
+  }
+  _n_pauses_at_mark_end = _n_pauses;
+  _n_marks_since_last_pause++;
+  _conc_mark_initiated = false;
+}
+
+void
+G1CollectorPolicy::record_concurrent_mark_cleanup_completed() {
+  if (in_young_gc_mode()) {
+    _should_revert_to_full_young_gcs = false;
+    _last_full_young_gc = true;
+    _in_marking_window = false;
+    if (adaptive_young_list_length())
+      calculate_young_list_target_config();
+  }
+}
+
+void G1CollectorPolicy::record_concurrent_pause() {
+  if (_stop_world_start > 0.0) {
+    double yield_ms = (os::elapsedTime() - _stop_world_start) * 1000.0;
+    _all_yield_times_ms->add(yield_ms);
+  }
+}
+
+void G1CollectorPolicy::record_concurrent_pause_end() {
+}
+
+void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() {
+  _cur_CH_strong_roots_end_sec = os::elapsedTime();
+  _cur_CH_strong_roots_dur_ms =
+    (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0;
+}
+
+void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() {
+  _cur_G1_strong_roots_end_sec = os::elapsedTime();
+  _cur_G1_strong_roots_dur_ms =
+    (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0;
+}
+
+template<class T>
+T sum_of(T* sum_arr, int start, int n, int N) {
+  T sum = (T)0;
+  for (int i = 0; i < n; i++) {
+    int j = (start + i) % N;
+    sum += sum_arr[j];
+  }
+  return sum;
+}
+
+void G1CollectorPolicy::print_par_stats (int level,
+                                         const char* str,
+                                         double* data,
+                                         bool summary) {
+  double min = data[0], max = data[0];
+  double total = 0.0;
+  int j;
+  for (j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print("[%s (ms):", str);
+  for (uint i = 0; i < ParallelGCThreads; ++i) {
+    double val = data[i];
+    if (val < min)
+      min = val;
+    if (val > max)
+      max = val;
+    total += val;
+    gclog_or_tty->print("  %3.1lf", val);
+  }
+  if (summary) {
+    gclog_or_tty->print_cr("");
+    double avg = total / (double) ParallelGCThreads;
+    gclog_or_tty->print(" ");
+    for (j = 0; j < level; ++j)
+      gclog_or_tty->print("   ");
+    gclog_or_tty->print("Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf",
+                        avg, min, max);
+  }
+  gclog_or_tty->print_cr("]");
+}
+
+void G1CollectorPolicy::print_par_buffers (int level,
+                                         const char* str,
+                                         double* data,
+                                         bool summary) {
+  double min = data[0], max = data[0];
+  double total = 0.0;
+  int j;
+  for (j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print("[%s :", str);
+  for (uint i = 0; i < ParallelGCThreads; ++i) {
+    double val = data[i];
+    if (val < min)
+      min = val;
+    if (val > max)
+      max = val;
+    total += val;
+    gclog_or_tty->print(" %d", (int) val);
+  }
+  if (summary) {
+    gclog_or_tty->print_cr("");
+    double avg = total / (double) ParallelGCThreads;
+    gclog_or_tty->print(" ");
+    for (j = 0; j < level; ++j)
+      gclog_or_tty->print("   ");
+    gclog_or_tty->print("Sum: %d, Avg: %d, Min: %d, Max: %d",
+               (int)total, (int)avg, (int)min, (int)max);
+  }
+  gclog_or_tty->print_cr("]");
+}
+
+void G1CollectorPolicy::print_stats (int level,
+                                     const char* str,
+                                     double value) {
+  for (int j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print_cr("[%s: %5.1lf ms]", str, value);
+}
+
+void G1CollectorPolicy::print_stats (int level,
+                                     const char* str,
+                                     int value) {
+  for (int j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print_cr("[%s: %d]", str, value);
+}
+
+double G1CollectorPolicy::avg_value (double* data) {
+  if (ParallelGCThreads > 0) {
+    double ret = 0.0;
+    for (uint i = 0; i < ParallelGCThreads; ++i)
+      ret += data[i];
+    return ret / (double) ParallelGCThreads;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::max_value (double* data) {
+  if (ParallelGCThreads > 0) {
+    double ret = data[0];
+    for (uint i = 1; i < ParallelGCThreads; ++i)
+      if (data[i] > ret)
+        ret = data[i];
+    return ret;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::sum_of_values (double* data) {
+  if (ParallelGCThreads > 0) {
+    double sum = 0.0;
+    for (uint i = 0; i < ParallelGCThreads; i++)
+      sum += data[i];
+    return sum;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::max_sum (double* data1,
+                                   double* data2) {
+  double ret = data1[0] + data2[0];
+
+  if (ParallelGCThreads > 0) {
+    for (uint i = 1; i < ParallelGCThreads; ++i) {
+      double data = data1[i] + data2[i];
+      if (data > ret)
+        ret = data;
+    }
+  }
+  return ret;
+}
+
+// Anything below that is considered to be zero
+#define MIN_TIMER_GRANULARITY 0.0000001
+
+void G1CollectorPolicy::record_collection_pause_end(bool popular,
+                                                    bool abandoned) {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_ms = _last_pause_time_ms;
+  bool parallel = ParallelGCThreads > 0;
+  double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0;
+  size_t rs_size =
+    _cur_collection_pause_used_regions_at_start - collection_set_size();
+  size_t cur_used_bytes = _g1->used();
+  assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
+  bool last_pause_included_initial_mark = false;
+
+#ifndef PRODUCT
+  if (G1YoungSurvRateVerbose) {
+    gclog_or_tty->print_cr("");
+    _short_lived_surv_rate_group->print();
+    // do that for any other surv rate groups too
+  }
+#endif // PRODUCT
+
+  checkpoint_conc_overhead();
+
+  if (in_young_gc_mode()) {
+    last_pause_included_initial_mark = _should_initiate_conc_mark;
+    if (last_pause_included_initial_mark)
+      record_concurrent_mark_init_end_pre(0.0);
+
+    size_t min_used_targ =
+      (_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta);
+
+    if (cur_used_bytes > min_used_targ) {
+      if (cur_used_bytes <= _prev_collection_pause_used_at_end_bytes) {
+      } else if (!_g1->mark_in_progress() && !_last_full_young_gc) {
+        _should_initiate_conc_mark = true;
+      }
+    }
+
+    _prev_collection_pause_used_at_end_bytes = cur_used_bytes;
+  }
+
+  _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
+                          end_time_sec, false);
+
+  guarantee(_cur_collection_pause_used_regions_at_start >=
+            collection_set_size(),
+            "Negative RS size?");
+
+  // This assert is exempted when we're doing parallel collection pauses,
+  // because the fragmentation caused by the parallel GC allocation buffers
+  // can lead to more memory being used during collection than was used
+  // before. Best leave this out until the fragmentation problem is fixed.
+  // Pauses in which evacuation failed can also lead to negative
+  // collections, since no space is reclaimed from a region containing an
+  // object whose evacuation failed.
+  // Further, we're now always doing parallel collection.  But I'm still
+  // leaving this here as a placeholder for a more precise assertion later.
+  // (DLD, 10/05.)
+  assert((true || parallel) // Always using GC LABs now.
+         || _g1->evacuation_failed()
+         || _cur_collection_pause_used_at_start_bytes >= cur_used_bytes,
+         "Negative collection");
+
+  size_t freed_bytes =
+    _cur_collection_pause_used_at_start_bytes - cur_used_bytes;
+  size_t surviving_bytes = _collection_set_bytes_used_before - freed_bytes;
+  double survival_fraction =
+    (double)surviving_bytes/
+    (double)_collection_set_bytes_used_before;
+
+  _n_pauses++;
+
+  if (!abandoned) {
+    _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
+    _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
+    _recent_evac_times_ms->add(evac_ms);
+    _recent_pause_times_ms->add(elapsed_ms);
+
+    _recent_rs_sizes->add(rs_size);
+
+    // We exempt parallel collection from this check because Alloc Buffer
+    // fragmentation can produce negative collections.  Same with evac
+    // failure.
+    // Further, we're now always doing parallel collection.  But I'm still
+    // leaving this here as a placeholder for a more precise assertion later.
+    // (DLD, 10/05.
+    assert((true || parallel)
+           || _g1->evacuation_failed()
+           || surviving_bytes <= _collection_set_bytes_used_before,
+           "Or else negative collection!");
+    _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before);
+    _recent_CS_bytes_surviving->add(surviving_bytes);
+
+    // this is where we update the allocation rate of the application
+    double app_time_ms =
+      (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
+    if (app_time_ms < MIN_TIMER_GRANULARITY) {
+      // This usually happens due to the timer not having the required
+      // granularity. Some Linuxes are the usual culprits.
+      // We'll just set it to something (arbitrarily) small.
+      app_time_ms = 1.0;
+    }
+    size_t regions_allocated =
+      (_region_num_young - _prev_region_num_young) +
+      (_region_num_tenured - _prev_region_num_tenured);
+    double alloc_rate_ms = (double) regions_allocated / app_time_ms;
+    _alloc_rate_ms_seq->add(alloc_rate_ms);
+    _prev_region_num_young   = _region_num_young;
+    _prev_region_num_tenured = _region_num_tenured;
+
+    double interval_ms =
+      (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
+    update_recent_gc_times(end_time_sec, elapsed_ms);
+    _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms;
+    assert(recent_avg_pause_time_ratio() < 1.00, "All GC?");
+  }
+
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
+  }
+
+  PauseSummary* summary;
+  if (!abandoned && !popular)
+    summary = _non_pop_summary;
+  else if (!abandoned && popular)
+    summary = _pop_summary;
+  else if (abandoned && !popular)
+    summary = _non_pop_abandoned_summary;
+  else if (abandoned && popular)
+    summary = _pop_abandoned_summary;
+  else
+    guarantee(false, "should not get here!");
+
+  double pop_update_rs_time;
+  double pop_update_rs_processed_buffers;
+  double pop_scan_rs_time;
+  double pop_closure_app_time;
+  double pop_other_time;
+
+  if (popular) {
+    PopPreambleSummary* preamble_summary = summary->pop_preamble_summary();
+    guarantee(preamble_summary != NULL, "should not be null!");
+
+    pop_update_rs_time = avg_value(_pop_par_last_update_rs_times_ms);
+    pop_update_rs_processed_buffers =
+      sum_of_values(_pop_par_last_update_rs_processed_buffers);
+    pop_scan_rs_time = avg_value(_pop_par_last_scan_rs_times_ms);
+    pop_closure_app_time = avg_value(_pop_par_last_closure_app_times_ms);
+    pop_other_time = _cur_popular_preamble_time_ms -
+      (pop_update_rs_time + pop_scan_rs_time + pop_closure_app_time +
+       _cur_popular_evac_time_ms);
+
+    preamble_summary->record_pop_preamble_time_ms(_cur_popular_preamble_time_ms);
+    preamble_summary->record_pop_update_rs_time_ms(pop_update_rs_time);
+    preamble_summary->record_pop_scan_rs_time_ms(pop_scan_rs_time);
+    preamble_summary->record_pop_closure_app_time_ms(pop_closure_app_time);
+    preamble_summary->record_pop_evacuation_time_ms(_cur_popular_evac_time_ms);
+    preamble_summary->record_pop_other_time_ms(pop_other_time);
+  }
+
+  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
+  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double scan_only_time = avg_value(_par_last_scan_only_times_ms);
+  double scan_only_regions_scanned =
+    sum_of_values(_par_last_scan_only_regions_scanned);
+  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
+  double update_rs_processed_buffers =
+    sum_of_values(_par_last_update_rs_processed_buffers);
+  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
+  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
+  double termination_time = avg_value(_par_last_termination_times_ms);
+
+  double parallel_other_time;
+  if (!abandoned) {
+    MainBodySummary* body_summary = summary->main_body_summary();
+    guarantee(body_summary != NULL, "should not be null!");
+
+    if (_satb_drain_time_set)
+      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    else
+      body_summary->record_satb_drain_time_ms(0.0);
+    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
+    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_scan_only_time_ms(scan_only_time);
+    body_summary->record_update_rs_time_ms(update_rs_time);
+    body_summary->record_scan_rs_time_ms(scan_rs_time);
+    body_summary->record_obj_copy_time_ms(obj_copy_time);
+    if (parallel) {
+      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
+      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
+      body_summary->record_termination_time_ms(termination_time);
+      parallel_other_time = _cur_collection_par_time_ms -
+        (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
+         scan_only_time + scan_rs_time + obj_copy_time + termination_time);
+      body_summary->record_parallel_other_time_ms(parallel_other_time);
+    }
+    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
+  }
+
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("      ET: %10.6f ms           (avg: %10.6f ms)\n"
+                           "        CH Strong: %10.6f ms    (avg: %10.6f ms)\n"
+                           "        G1 Strong: %10.6f ms    (avg: %10.6f ms)\n"
+                           "        Evac:      %10.6f ms    (avg: %10.6f ms)\n"
+                           "       ET-RS:  %10.6f ms      (avg: %10.6f ms)\n"
+                           "      |RS|: " SIZE_FORMAT,
+                           elapsed_ms, recent_avg_time_for_pauses_ms(),
+                           _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(),
+                           _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(),
+                           evac_ms, recent_avg_time_for_evac_ms(),
+                           scan_rs_time,
+                           recent_avg_time_for_pauses_ms() -
+                           recent_avg_time_for_G1_strong_ms(),
+                           rs_size);
+
+    gclog_or_tty->print_cr("       Used at start: " SIZE_FORMAT"K"
+                           "       At end " SIZE_FORMAT "K\n"
+                           "       garbage      : " SIZE_FORMAT "K"
+                           "       of     " SIZE_FORMAT "K\n"
+                           "       survival     : %6.2f%%  (%6.2f%% avg)",
+                           _cur_collection_pause_used_at_start_bytes/K,
+                           _g1->used()/K, freed_bytes/K,
+                           _collection_set_bytes_used_before/K,
+                           survival_fraction*100.0,
+                           recent_avg_survival_fraction()*100.0);
+    gclog_or_tty->print_cr("       Recent %% gc pause time: %6.2f",
+                           recent_avg_pause_time_ratio() * 100.0);
+  }
+
+  double other_time_ms = elapsed_ms;
+  if (popular)
+    other_time_ms -= _cur_popular_preamble_time_ms;
+
+  if (!abandoned) {
+    if (_satb_drain_time_set)
+      other_time_ms -= _cur_satb_drain_time_ms;
+
+    if (parallel)
+      other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
+    else
+      other_time_ms -=
+        update_rs_time +
+        ext_root_scan_time + mark_stack_scan_time + scan_only_time +
+        scan_rs_time + obj_copy_time;
+  }
+
+  if (PrintGCDetails) {
+    gclog_or_tty->print_cr("%s%s, %1.8lf secs]",
+                           (popular && !abandoned) ? " (popular)" :
+                           (!popular && abandoned) ? " (abandoned)" :
+                           (popular && abandoned) ? " (popular/abandoned)" : "",
+                           (last_pause_included_initial_mark) ? " (initial-mark)" : "",
+                           elapsed_ms / 1000.0);
+
+    if (!abandoned) {
+      if (_satb_drain_time_set)
+        print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
+      if (_last_satb_drain_processed_buffers >= 0)
+        print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
+    }
+    if (popular)
+      print_stats(1, "Popularity Preamble", _cur_popular_preamble_time_ms);
+    if (parallel) {
+      if (popular) {
+        print_par_stats(2, "Update RS (Start)", _pop_par_last_update_rs_start_times_ms, false);
+        print_par_stats(2, "Update RS", _pop_par_last_update_rs_times_ms);
+        if (G1RSBarrierUseQueue)
+          print_par_buffers(3, "Processed Buffers",
+                            _pop_par_last_update_rs_processed_buffers, true);
+        print_par_stats(2, "Scan RS", _pop_par_last_scan_rs_times_ms);
+        print_par_stats(2, "Closure app", _pop_par_last_closure_app_times_ms);
+        print_stats(2, "Evacuation", _cur_popular_evac_time_ms);
+        print_stats(2, "Other", pop_other_time);
+      }
+      if (!abandoned) {
+        print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
+        if (!popular) {
+          print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false);
+          print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
+          if (G1RSBarrierUseQueue)
+            print_par_buffers(3, "Processed Buffers",
+                              _par_last_update_rs_processed_buffers, true);
+        }
+        print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
+        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms);
+        print_par_buffers(3, "Scan-Only Regions",
+                          _par_last_scan_only_regions_scanned, true);
+        print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
+        print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
+        print_par_stats(2, "Termination", _par_last_termination_times_ms);
+        print_stats(2, "Other", parallel_other_time);
+        print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+      }
+    } else {
+      if (popular) {
+        print_stats(2, "Update RS", pop_update_rs_time);
+        if (G1RSBarrierUseQueue)
+          print_stats(3, "Processed Buffers",
+                      (int)pop_update_rs_processed_buffers);
+        print_stats(2, "Scan RS", pop_scan_rs_time);
+        print_stats(2, "Closure App", pop_closure_app_time);
+        print_stats(2, "Evacuation", _cur_popular_evac_time_ms);
+        print_stats(2, "Other", pop_other_time);
+      }
+      if (!abandoned) {
+        if (!popular) {
+          print_stats(1, "Update RS", update_rs_time);
+          if (G1RSBarrierUseQueue)
+            print_stats(2, "Processed Buffers",
+                        (int)update_rs_processed_buffers);
+        }
+        print_stats(1, "Ext Root Scanning", ext_root_scan_time);
+        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "Scan-Only Scanning", scan_only_time);
+        print_stats(1, "Scan RS", scan_rs_time);
+        print_stats(1, "Object Copying", obj_copy_time);
+      }
+    }
+    print_stats(1, "Other", other_time_ms);
+    for (int i = 0; i < _aux_num; ++i) {
+      if (_cur_aux_times_set[i]) {
+        char buffer[96];
+        sprintf(buffer, "Aux%d", i);
+        print_stats(1, buffer, _cur_aux_times_ms[i]);
+      }
+    }
+  }
+  if (PrintGCDetails)
+    gclog_or_tty->print("   [");
+  if (PrintGC || PrintGCDetails)
+    _g1->print_size_transition(gclog_or_tty,
+                               _cur_collection_pause_used_at_start_bytes,
+                               _g1->used(), _g1->capacity());
+  if (PrintGCDetails)
+    gclog_or_tty->print_cr("]");
+
+  _all_pause_times_ms->add(elapsed_ms);
+  summary->record_total_time_ms(elapsed_ms);
+  summary->record_other_time_ms(other_time_ms);
+  for (int i = 0; i < _aux_num; ++i)
+    if (_cur_aux_times_set[i])
+      _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
+
+  // Reset marks-between-pauses counter.
+  _n_marks_since_last_pause = 0;
+
+  // Update the efficiency-since-mark vars.
+  double proc_ms = elapsed_ms * (double) _parallel_gc_threads;
+  if (elapsed_ms < MIN_TIMER_GRANULARITY) {
+    // This usually happens due to the timer not having the required
+    // granularity. Some Linuxes are the usual culprits.
+    // We'll just set it to something (arbitrarily) small.
+    proc_ms = 1.0;
+  }
+  double cur_efficiency = (double) freed_bytes / proc_ms;
+
+  bool new_in_marking_window = _in_marking_window;
+  bool new_in_marking_window_im = false;
+  if (_should_initiate_conc_mark) {
+    new_in_marking_window = true;
+    new_in_marking_window_im = true;
+  }
+
+  if (in_young_gc_mode()) {
+    if (_last_full_young_gc) {
+      set_full_young_gcs(false);
+      _last_full_young_gc = false;
+    }
+
+    if ( !_last_young_gc_full ) {
+      if ( _should_revert_to_full_young_gcs ||
+           _known_garbage_ratio < 0.05 ||
+           (adaptive_young_list_length() &&
+           (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) ) {
+        set_full_young_gcs(true);
+      }
+    }
+    _should_revert_to_full_young_gcs = false;
+
+    if (_last_young_gc_full && !_during_marking)
+      _young_gc_eff_seq->add(cur_efficiency);
+  }
+
+  _short_lived_surv_rate_group->start_adding_regions();
+  // do that for any other surv rate groupsx
+
+  // <NEW PREDICTION>
+
+  if (!popular && !abandoned) {
+    double pause_time_ms = elapsed_ms;
+
+    size_t diff = 0;
+    if (_max_pending_cards >= _pending_cards)
+      diff = _max_pending_cards - _pending_cards;
+    _pending_card_diff_seq->add((double) diff);
+
+    double cost_per_card_ms = 0.0;
+    if (_pending_cards > 0) {
+      cost_per_card_ms = update_rs_time / (double) _pending_cards;
+      _cost_per_card_ms_seq->add(cost_per_card_ms);
+    }
+
+    double cost_per_scan_only_region_ms = 0.0;
+    if (scan_only_regions_scanned > 0.0) {
+      cost_per_scan_only_region_ms =
+        scan_only_time / scan_only_regions_scanned;
+      if (_in_marking_window_im)
+        _cost_per_scan_only_region_ms_during_cm_seq->add(cost_per_scan_only_region_ms);
+      else
+        _cost_per_scan_only_region_ms_seq->add(cost_per_scan_only_region_ms);
+    }
+
+    size_t cards_scanned = _g1->cards_scanned();
+
+    double cost_per_entry_ms = 0.0;
+    if (cards_scanned > 10) {
+      cost_per_entry_ms = scan_rs_time / (double) cards_scanned;
+      if (_last_young_gc_full)
+        _cost_per_entry_ms_seq->add(cost_per_entry_ms);
+      else
+        _partially_young_cost_per_entry_ms_seq->add(cost_per_entry_ms);
+    }
+
+    if (_max_rs_lengths > 0) {
+      double cards_per_entry_ratio =
+        (double) cards_scanned / (double) _max_rs_lengths;
+      if (_last_young_gc_full)
+        _fully_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+      else
+        _partially_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+    }
+
+    size_t rs_length_diff = _max_rs_lengths - _recorded_rs_lengths;
+    if (rs_length_diff >= 0)
+      _rs_length_diff_seq->add((double) rs_length_diff);
+
+    size_t copied_bytes = surviving_bytes;
+    double cost_per_byte_ms = 0.0;
+    if (copied_bytes > 0) {
+      cost_per_byte_ms = obj_copy_time / (double) copied_bytes;
+      if (_in_marking_window)
+        _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
+      else
+        _cost_per_byte_ms_seq->add(cost_per_byte_ms);
+    }
+
+    double all_other_time_ms = pause_time_ms -
+      (update_rs_time + scan_only_time + scan_rs_time + obj_copy_time +
+       _mark_closure_time_ms + termination_time);
+
+    double young_other_time_ms = 0.0;
+    if (_recorded_young_regions > 0) {
+      young_other_time_ms =
+        _recorded_young_cset_choice_time_ms +
+        _recorded_young_free_cset_time_ms;
+      _young_other_cost_per_region_ms_seq->add(young_other_time_ms /
+                                             (double) _recorded_young_regions);
+    }
+    double non_young_other_time_ms = 0.0;
+    if (_recorded_non_young_regions > 0) {
+      non_young_other_time_ms =
+        _recorded_non_young_cset_choice_time_ms +
+        _recorded_non_young_free_cset_time_ms;
+
+      _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms /
+                                         (double) _recorded_non_young_regions);
+    }
+
+    double constant_other_time_ms = all_other_time_ms -
+      (young_other_time_ms + non_young_other_time_ms);
+    _constant_other_time_ms_seq->add(constant_other_time_ms);
+
+    double survival_ratio = 0.0;
+    if (_bytes_in_collection_set_before_gc > 0) {
+      survival_ratio = (double) bytes_in_to_space_during_gc() /
+        (double) _bytes_in_collection_set_before_gc;
+    }
+
+    _pending_cards_seq->add((double) _pending_cards);
+    _scanned_cards_seq->add((double) cards_scanned);
+    _rs_lengths_seq->add((double) _max_rs_lengths);
+
+    double expensive_region_limit_ms =
+      (double) G1MaxPauseTimeMS - predict_constant_other_time_ms();
+    if (expensive_region_limit_ms < 0.0) {
+      // this means that the other time was predicted to be longer than
+      // than the max pause time
+      expensive_region_limit_ms = (double) G1MaxPauseTimeMS;
+    }
+    _expensive_region_limit_ms = expensive_region_limit_ms;
+
+    if (PREDICTIONS_VERBOSE) {
+      gclog_or_tty->print_cr("");
+      gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d "
+                    "REGIONS %d %d %d %d "
+                    "PENDING_CARDS %d %d "
+                    "CARDS_SCANNED %d %d "
+                    "RS_LENGTHS %d %d "
+                    "SCAN_ONLY_SCAN %1.6lf %1.6lf "
+                    "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf "
+                    "SURVIVAL_RATIO %1.6lf %1.6lf "
+                    "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf "
+                    "OTHER_YOUNG %1.6lf %1.6lf "
+                    "OTHER_NON_YOUNG %1.6lf %1.6lf "
+                    "VTIME_DIFF %1.6lf TERMINATION %1.6lf "
+                    "ELAPSED %1.6lf %1.6lf ",
+                    _cur_collection_start_sec,
+                    (!_last_young_gc_full) ? 2 :
+                    (last_pause_included_initial_mark) ? 1 : 0,
+                    _recorded_region_num,
+                    _recorded_young_regions,
+                    _recorded_scan_only_regions,
+                    _recorded_non_young_regions,
+                    _predicted_pending_cards, _pending_cards,
+                    _predicted_cards_scanned, cards_scanned,
+                    _predicted_rs_lengths, _max_rs_lengths,
+                    _predicted_scan_only_scan_time_ms, scan_only_time,
+                    _predicted_rs_update_time_ms, update_rs_time,
+                    _predicted_rs_scan_time_ms, scan_rs_time,
+                    _predicted_survival_ratio, survival_ratio,
+                    _predicted_object_copy_time_ms, obj_copy_time,
+                    _predicted_constant_other_time_ms, constant_other_time_ms,
+                    _predicted_young_other_time_ms, young_other_time_ms,
+                    _predicted_non_young_other_time_ms,
+                    non_young_other_time_ms,
+                    _vtime_diff_ms, termination_time,
+                    _predicted_pause_time_ms, elapsed_ms);
+    }
+
+    if (G1PolicyVerbose > 0) {
+      gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms",
+                    _predicted_pause_time_ms,
+                    (_within_target) ? "within" : "outside",
+                    elapsed_ms);
+    }
+
+  }
+
+  _in_marking_window = new_in_marking_window;
+  _in_marking_window_im = new_in_marking_window_im;
+  _free_regions_at_end_of_collection = _g1->free_regions();
+  _scan_only_regions_at_end_of_collection = _g1->young_list_length();
+  calculate_young_list_min_length();
+  calculate_young_list_target_config();
+
+  // </NEW PREDICTION>
+
+  _target_pause_time_ms = -1.0;
+
+  // TODO: calculate tenuring threshold
+  _tenuring_threshold = MaxTenuringThreshold;
+}
+
+// <NEW PREDICTION>
+
+double
+G1CollectorPolicy::
+predict_young_collection_elapsed_time_ms(size_t adjustment) {
+  guarantee( adjustment == 0 || adjustment == 1, "invariant" );
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  size_t young_num = g1h->young_list_length();
+  if (young_num == 0)
+    return 0.0;
+
+  young_num += adjustment;
+  size_t pending_cards = predict_pending_cards();
+  size_t rs_lengths = g1h->young_list_sampled_rs_lengths() +
+                      predict_rs_length_diff();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_lengths);
+  else
+    card_num = predict_non_young_card_num(rs_lengths);
+  size_t young_byte_size = young_num * HeapRegion::GrainBytes;
+  double accum_yg_surv_rate =
+    _short_lived_surv_rate_group->accum_surv_rate(adjustment);
+
+  size_t bytes_to_copy =
+    (size_t) (accum_yg_surv_rate * (double) HeapRegion::GrainBytes);
+
+  return
+    predict_rs_update_time_ms(pending_cards) +
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy) +
+    predict_young_other_time_ms(young_num) +
+    predict_constant_other_time_ms();
+}
+
+double
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) {
+  size_t rs_length = predict_rs_length_diff();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_length);
+  else
+    card_num = predict_non_young_card_num(rs_length);
+  return predict_base_elapsed_time_ms(pending_cards, card_num);
+}
+
+double
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
+                                                size_t scanned_cards) {
+  return
+    predict_rs_update_time_ms(pending_cards) +
+    predict_rs_scan_time_ms(scanned_cards) +
+    predict_constant_other_time_ms();
+}
+
+double
+G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
+                                                  bool young) {
+  size_t rs_length = hr->rem_set()->occupied();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_length);
+  else
+    card_num = predict_non_young_card_num(rs_length);
+  size_t bytes_to_copy = predict_bytes_to_copy(hr);
+
+  double region_elapsed_time_ms =
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy);
+
+  if (young)
+    region_elapsed_time_ms += predict_young_other_time_ms(1);
+  else
+    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
+
+  return region_elapsed_time_ms;
+}
+
+size_t
+G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) {
+  size_t bytes_to_copy;
+  if (hr->is_marked())
+    bytes_to_copy = hr->max_live_bytes();
+  else {
+    guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
+               "invariant" );
+    int age = hr->age_in_surv_rate_group();
+    double yg_surv_rate = predict_yg_surv_rate(age);
+    bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
+  }
+
+  return bytes_to_copy;
+}
+
+void
+G1CollectorPolicy::start_recording_regions() {
+  _recorded_rs_lengths            = 0;
+  _recorded_scan_only_regions     = 0;
+  _recorded_young_regions         = 0;
+  _recorded_non_young_regions     = 0;
+
+#if PREDICTIONS_VERBOSE
+  _predicted_rs_lengths           = 0;
+  _predicted_cards_scanned        = 0;
+
+  _recorded_marked_bytes          = 0;
+  _recorded_young_bytes           = 0;
+  _predicted_bytes_to_copy        = 0;
+#endif // PREDICTIONS_VERBOSE
+}
+
+void
+G1CollectorPolicy::record_cset_region(HeapRegion* hr, bool young) {
+  if (young) {
+    ++_recorded_young_regions;
+  } else {
+    ++_recorded_non_young_regions;
+  }
+#if PREDICTIONS_VERBOSE
+  if (young) {
+    _recorded_young_bytes += hr->asSpace()->used();
+  } else {
+    _recorded_marked_bytes += hr->max_live_bytes();
+  }
+  _predicted_bytes_to_copy += predict_bytes_to_copy(hr);
+#endif // PREDICTIONS_VERBOSE
+
+  size_t rs_length = hr->rem_set()->occupied();
+  _recorded_rs_lengths += rs_length;
+}
+
+void
+G1CollectorPolicy::record_scan_only_regions(size_t scan_only_length) {
+  _recorded_scan_only_regions = scan_only_length;
+}
+
+void
+G1CollectorPolicy::end_recording_regions() {
+#if PREDICTIONS_VERBOSE
+  _predicted_pending_cards = predict_pending_cards();
+  _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff();
+  if (full_young_gcs())
+    _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths);
+  else
+    _predicted_cards_scanned +=
+      predict_non_young_card_num(_predicted_rs_lengths);
+  _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;
+
+  _predicted_young_survival_ratio = 0.0;
+  for (int i = 0; i < _recorded_young_regions; ++i)
+    _predicted_young_survival_ratio += predict_yg_surv_rate(i);
+  _predicted_young_survival_ratio /= (double) _recorded_young_regions;
+
+  _predicted_scan_only_scan_time_ms =
+    predict_scan_only_time_ms(_recorded_scan_only_regions);
+  _predicted_rs_update_time_ms =
+    predict_rs_update_time_ms(_g1->pending_card_num());
+  _predicted_rs_scan_time_ms =
+    predict_rs_scan_time_ms(_predicted_cards_scanned);
+  _predicted_object_copy_time_ms =
+    predict_object_copy_time_ms(_predicted_bytes_to_copy);
+  _predicted_constant_other_time_ms =
+    predict_constant_other_time_ms();
+  _predicted_young_other_time_ms =
+    predict_young_other_time_ms(_recorded_young_regions);
+  _predicted_non_young_other_time_ms =
+    predict_non_young_other_time_ms(_recorded_non_young_regions);
+
+  _predicted_pause_time_ms =
+    _predicted_scan_only_scan_time_ms +
+    _predicted_rs_update_time_ms +
+    _predicted_rs_scan_time_ms +
+    _predicted_object_copy_time_ms +
+    _predicted_constant_other_time_ms +
+    _predicted_young_other_time_ms +
+    _predicted_non_young_other_time_ms;
+#endif // PREDICTIONS_VERBOSE
+}
+
+void G1CollectorPolicy::check_if_region_is_too_expensive(double
+                                                           predicted_time_ms) {
+  // I don't think we need to do this when in young GC mode since
+  // marking will be initiated next time we hit the soft limit anyway...
+  if (predicted_time_ms > _expensive_region_limit_ms) {
+    if (!in_young_gc_mode()) {
+        set_full_young_gcs(true);
+      _should_initiate_conc_mark = true;
+    } else
+      // no point in doing another partial one
+      _should_revert_to_full_young_gcs = true;
+  }
+}
+
+// </NEW PREDICTION>
+
+
+void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
+                                               double elapsed_ms) {
+  _recent_gc_times_ms->add(elapsed_ms);
+  _recent_prev_end_times_for_all_gcs_sec->add(end_time_sec);
+  _prev_collection_pause_end_ms = end_time_sec * 1000.0;
+}
+
+double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
+  if (_recent_pause_times_ms->num() == 0) return (double) G1MaxPauseTimeMS;
+  else return _recent_pause_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() {
+  if (_recent_CH_strong_roots_times_ms->num() == 0)
+    return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_CH_strong_roots_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() {
+  if (_recent_G1_strong_roots_times_ms->num() == 0)
+    return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_G1_strong_roots_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_evac_ms() {
+  if (_recent_evac_times_ms->num() == 0) return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_evac_times_ms->avg();
+}
+
+int G1CollectorPolicy::number_of_recent_gcs() {
+  assert(_recent_CH_strong_roots_times_ms->num() ==
+         _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync");
+  assert(_recent_G1_strong_roots_times_ms->num() ==
+         _recent_evac_times_ms->num(), "Sequence out of sync");
+  assert(_recent_evac_times_ms->num() ==
+         _recent_pause_times_ms->num(), "Sequence out of sync");
+  assert(_recent_pause_times_ms->num() ==
+         _recent_CS_bytes_used_before->num(), "Sequence out of sync");
+  assert(_recent_CS_bytes_used_before->num() ==
+         _recent_CS_bytes_surviving->num(), "Sequence out of sync");
+  return _recent_pause_times_ms->num();
+}
+
+double G1CollectorPolicy::recent_avg_survival_fraction() {
+  return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving,
+                                           _recent_CS_bytes_used_before);
+}
+
+double G1CollectorPolicy::last_survival_fraction() {
+  return last_survival_fraction_work(_recent_CS_bytes_surviving,
+                                     _recent_CS_bytes_used_before);
+}
+
+double
+G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving,
+                                                     TruncatedSeq* before) {
+  assert(surviving->num() == before->num(), "Sequence out of sync");
+  if (before->sum() > 0.0) {
+      double recent_survival_rate = surviving->sum() / before->sum();
+      // We exempt parallel collection from this check because Alloc Buffer
+      // fragmentation can produce negative collections.
+      // Further, we're now always doing parallel collection.  But I'm still
+      // leaving this here as a placeholder for a more precise assertion later.
+      // (DLD, 10/05.)
+      assert((true || ParallelGCThreads > 0) ||
+             _g1->evacuation_failed() ||
+             recent_survival_rate <= 1.0, "Or bad frac");
+      return recent_survival_rate;
+  } else {
+    return 1.0; // Be conservative.
+  }
+}
+
+double
+G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving,
+                                               TruncatedSeq* before) {
+  assert(surviving->num() == before->num(), "Sequence out of sync");
+  if (surviving->num() > 0 && before->last() > 0.0) {
+    double last_survival_rate = surviving->last() / before->last();
+    // We exempt parallel collection from this check because Alloc Buffer
+    // fragmentation can produce negative collections.
+    // Further, we're now always doing parallel collection.  But I'm still
+    // leaving this here as a placeholder for a more precise assertion later.
+    // (DLD, 10/05.)
+    assert((true || ParallelGCThreads > 0) ||
+           last_survival_rate <= 1.0, "Or bad frac");
+    return last_survival_rate;
+  } else {
+    return 1.0;
+  }
+}
+
+static const int survival_min_obs = 5;
+static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 };
+static const double min_survival_rate = 0.1;
+
+double
+G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg,
+                                                           double latest) {
+  double res = avg;
+  if (number_of_recent_gcs() < survival_min_obs) {
+    res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]);
+  }
+  res = MAX2(res, latest);
+  res = MAX2(res, min_survival_rate);
+  // In the parallel case, LAB fragmentation can produce "negative
+  // collections"; so can evac failure.  Cap at 1.0
+  res = MIN2(res, 1.0);
+  return res;
+}
+
+size_t G1CollectorPolicy::expansion_amount() {
+  if ((int)(recent_avg_pause_time_ratio() * 100.0) > G1GCPct) {
+    // We will double the existing space, or take G1ExpandByPctOfAvail % of
+    // the available expansion space, whichever is smaller, bounded below
+    // by a minimum expansion (unless that's all that's left.)
+    const size_t min_expand_bytes = 1*M;
+    size_t reserved_bytes = _g1->g1_reserved_obj_bytes();
+    size_t committed_bytes = _g1->capacity();
+    size_t uncommitted_bytes = reserved_bytes - committed_bytes;
+    size_t expand_bytes;
+    size_t expand_bytes_via_pct =
+      uncommitted_bytes * G1ExpandByPctOfAvail / 100;
+    expand_bytes = MIN2(expand_bytes_via_pct, committed_bytes);
+    expand_bytes = MAX2(expand_bytes, min_expand_bytes);
+    expand_bytes = MIN2(expand_bytes, uncommitted_bytes);
+    if (G1PolicyVerbose > 1) {
+      gclog_or_tty->print("Decided to expand: ratio = %5.2f, "
+                 "committed = %d%s, uncommited = %d%s, via pct = %d%s.\n"
+                 "                   Answer = %d.\n",
+                 recent_avg_pause_time_ratio(),
+                 byte_size_in_proper_unit(committed_bytes),
+                 proper_unit_for_byte_size(committed_bytes),
+                 byte_size_in_proper_unit(uncommitted_bytes),
+                 proper_unit_for_byte_size(uncommitted_bytes),
+                 byte_size_in_proper_unit(expand_bytes_via_pct),
+                 proper_unit_for_byte_size(expand_bytes_via_pct),
+                 byte_size_in_proper_unit(expand_bytes),
+                 proper_unit_for_byte_size(expand_bytes));
+    }
+    return expand_bytes;
+  } else {
+    return 0;
+  }
+}
+
+void G1CollectorPolicy::note_start_of_mark_thread() {
+  _mark_thread_startup_sec = os::elapsedTime();
+}
+
+class CountCSClosure: public HeapRegionClosure {
+  G1CollectorPolicy* _g1_policy;
+public:
+  CountCSClosure(G1CollectorPolicy* g1_policy) :
+    _g1_policy(g1_policy) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _g1_policy->_bytes_in_collection_set_before_gc += r->used();
+    return false;
+  }
+};
+
+void G1CollectorPolicy::count_CS_bytes_used() {
+  CountCSClosure cs_closure(this);
+  _g1->collection_set_iterate(&cs_closure);
+}
+
+static void print_indent(int level) {
+  for (int j = 0; j < level+1; ++j)
+    gclog_or_tty->print("   ");
+}
+
+void G1CollectorPolicy::print_summary (int level,
+                                       const char* str,
+                                       NumberSeq* seq) const {
+  double sum = seq->sum();
+  print_indent(level);
+  gclog_or_tty->print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)",
+                str, sum / 1000.0, seq->avg());
+}
+
+void G1CollectorPolicy::print_summary_sd (int level,
+                                          const char* str,
+                                          NumberSeq* seq) const {
+  print_summary(level, str, seq);
+  print_indent(level + 5);
+  gclog_or_tty->print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
+                seq->num(), seq->sd(), seq->maximum());
+}
+
+void G1CollectorPolicy::check_other_times(int level,
+                                        NumberSeq* other_times_ms,
+                                        NumberSeq* calc_other_times_ms) const {
+  bool should_print = false;
+
+  double max_sum = MAX2(fabs(other_times_ms->sum()),
+                        fabs(calc_other_times_ms->sum()));
+  double min_sum = MIN2(fabs(other_times_ms->sum()),
+                        fabs(calc_other_times_ms->sum()));
+  double sum_ratio = max_sum / min_sum;
+  if (sum_ratio > 1.1) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER SUM DOESN'T MATCH RECORDED ###");
+  }
+
+  double max_avg = MAX2(fabs(other_times_ms->avg()),
+                        fabs(calc_other_times_ms->avg()));
+  double min_avg = MIN2(fabs(other_times_ms->avg()),
+                        fabs(calc_other_times_ms->avg()));
+  double avg_ratio = max_avg / min_avg;
+  if (avg_ratio > 1.1) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER AVG DOESN'T MATCH RECORDED ###");
+  }
+
+  if (other_times_ms->sum() < -0.01) {
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## RECORDED OTHER SUM IS NEGATIVE ###");
+  }
+
+  if (other_times_ms->avg() < -0.01) {
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## RECORDED OTHER AVG IS NEGATIVE ###");
+  }
+
+  if (calc_other_times_ms->sum() < -0.01) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER SUM IS NEGATIVE ###");
+  }
+
+  if (calc_other_times_ms->avg() < -0.01) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER AVG IS NEGATIVE ###");
+  }
+
+  if (should_print)
+    print_summary(level, "Other(Calc)", calc_other_times_ms);
+}
+
+void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
+  bool parallel = ParallelGCThreads > 0;
+  MainBodySummary*    body_summary = summary->main_body_summary();
+  PopPreambleSummary* preamble_summary = summary->pop_preamble_summary();
+
+  if (summary->get_total_seq()->num() > 0) {
+    print_summary_sd(0,
+                     (preamble_summary == NULL) ? "Non-Popular Pauses" :
+                     "Popular Pauses",
+                     summary->get_total_seq());
+    if (preamble_summary != NULL) {
+      print_summary(1, "Popularity Preamble",
+                    preamble_summary->get_pop_preamble_seq());
+      print_summary(2, "Update RS", preamble_summary->get_pop_update_rs_seq());
+      print_summary(2, "Scan RS", preamble_summary->get_pop_scan_rs_seq());
+      print_summary(2, "Closure App",
+                    preamble_summary->get_pop_closure_app_seq());
+      print_summary(2, "Evacuation",
+                    preamble_summary->get_pop_evacuation_seq());
+      print_summary(2, "Other", preamble_summary->get_pop_other_seq());
+      {
+        NumberSeq* other_parts[] = {
+          preamble_summary->get_pop_update_rs_seq(),
+          preamble_summary->get_pop_scan_rs_seq(),
+          preamble_summary->get_pop_closure_app_seq(),
+          preamble_summary->get_pop_evacuation_seq()
+        };
+        NumberSeq calc_other_times_ms(preamble_summary->get_pop_preamble_seq(),
+                                      4, other_parts);
+        check_other_times(2, preamble_summary->get_pop_other_seq(),
+                          &calc_other_times_ms);
+      }
+    }
+    if (body_summary != NULL) {
+      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
+      if (parallel) {
+        print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
+        print_summary(2, "Update RS", body_summary->get_update_rs_seq());
+        print_summary(2, "Ext Root Scanning",
+                      body_summary->get_ext_root_scan_seq());
+        print_summary(2, "Mark Stack Scanning",
+                      body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "Scan-Only Scanning",
+                      body_summary->get_scan_only_seq());
+        print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
+        print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
+        print_summary(2, "Termination", body_summary->get_termination_seq());
+        print_summary(2, "Other", body_summary->get_parallel_other_seq());
+        {
+          NumberSeq* other_parts[] = {
+            body_summary->get_update_rs_seq(),
+            body_summary->get_ext_root_scan_seq(),
+            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_scan_only_seq(),
+            body_summary->get_scan_rs_seq(),
+            body_summary->get_obj_copy_seq(),
+            body_summary->get_termination_seq()
+          };
+          NumberSeq calc_other_times_ms(body_summary->get_parallel_seq(),
+                                        7, other_parts);
+          check_other_times(2, body_summary->get_parallel_other_seq(),
+                            &calc_other_times_ms);
+        }
+        print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq());
+        print_summary(1, "Clear CT", body_summary->get_clear_ct_seq());
+      } else {
+        print_summary(1, "Update RS", body_summary->get_update_rs_seq());
+        print_summary(1, "Ext Root Scanning",
+                      body_summary->get_ext_root_scan_seq());
+        print_summary(1, "Mark Stack Scanning",
+                      body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "Scan-Only Scanning",
+                      body_summary->get_scan_only_seq());
+        print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
+        print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
+      }
+    }
+    print_summary(1, "Other", summary->get_other_seq());
+    {
+      NumberSeq calc_other_times_ms;
+      if (body_summary != NULL) {
+        // not abandoned
+        if (parallel) {
+          // parallel
+          NumberSeq* other_parts[] = {
+            body_summary->get_satb_drain_seq(),
+            (preamble_summary == NULL) ? NULL :
+              preamble_summary->get_pop_preamble_seq(),
+            body_summary->get_parallel_seq(),
+            body_summary->get_clear_ct_seq()
+          };
+          calc_other_times_ms = NumberSeq (summary->get_total_seq(),
+                                          4, other_parts);
+        } else {
+          // serial
+          NumberSeq* other_parts[] = {
+            body_summary->get_satb_drain_seq(),
+            (preamble_summary == NULL) ? NULL :
+              preamble_summary->get_pop_preamble_seq(),
+            body_summary->get_update_rs_seq(),
+            body_summary->get_ext_root_scan_seq(),
+            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_scan_only_seq(),
+            body_summary->get_scan_rs_seq(),
+            body_summary->get_obj_copy_seq()
+          };
+          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                          8, other_parts);
+        }
+      } else {
+        // abandoned
+        NumberSeq* other_parts[] = {
+          (preamble_summary == NULL) ? NULL :
+            preamble_summary->get_pop_preamble_seq()
+        };
+        calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                        1, other_parts);
+      }
+      check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
+    }
+  } else {
+    print_indent(0);
+    gclog_or_tty->print_cr("none");
+  }
+  gclog_or_tty->print_cr("");
+}
+
+void
+G1CollectorPolicy::print_abandoned_summary(PauseSummary* non_pop_summary,
+                                           PauseSummary* pop_summary) const {
+  bool printed = false;
+  if (non_pop_summary->get_total_seq()->num() > 0) {
+    printed = true;
+    print_summary(non_pop_summary);
+  }
+  if (pop_summary->get_total_seq()->num() > 0) {
+    printed = true;
+    print_summary(pop_summary);
+  }
+
+  if (!printed) {
+    print_indent(0);
+    gclog_or_tty->print_cr("none");
+    gclog_or_tty->print_cr("");
+  }
+}
+
+void G1CollectorPolicy::print_tracing_info() const {
+  if (TraceGen0Time) {
+    gclog_or_tty->print_cr("ALL PAUSES");
+    print_summary_sd(0, "Total", _all_pause_times_ms);
+    gclog_or_tty->print_cr("");
+    gclog_or_tty->print_cr("");
+    gclog_or_tty->print_cr("   Full Young GC Pauses:    %8d", _full_young_pause_num);
+    gclog_or_tty->print_cr("   Partial Young GC Pauses: %8d", _partial_young_pause_num);
+    gclog_or_tty->print_cr("");
+
+    gclog_or_tty->print_cr("NON-POPULAR PAUSES");
+    print_summary(_non_pop_summary);
+
+    gclog_or_tty->print_cr("POPULAR PAUSES");
+    print_summary(_pop_summary);
+
+    gclog_or_tty->print_cr("ABANDONED PAUSES");
+    print_abandoned_summary(_non_pop_abandoned_summary,
+                            _pop_abandoned_summary);
+
+    gclog_or_tty->print_cr("MISC");
+    print_summary_sd(0, "Stop World", _all_stop_world_times_ms);
+    print_summary_sd(0, "Yields", _all_yield_times_ms);
+    for (int i = 0; i < _aux_num; ++i) {
+      if (_all_aux_times_ms[i].num() > 0) {
+        char buffer[96];
+        sprintf(buffer, "Aux%d", i);
+        print_summary_sd(0, buffer, &_all_aux_times_ms[i]);
+      }
+    }
+
+    size_t all_region_num = _region_num_young + _region_num_tenured;
+    gclog_or_tty->print_cr("   New Regions %8d, Young %8d (%6.2lf%%), "
+               "Tenured %8d (%6.2lf%%)",
+               all_region_num,
+               _region_num_young,
+               (double) _region_num_young / (double) all_region_num * 100.0,
+               _region_num_tenured,
+               (double) _region_num_tenured / (double) all_region_num * 100.0);
+
+    if (!G1RSBarrierUseQueue) {
+      gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) "
+                    "did zero traversals.",
+                    _conc_refine_enabled, _conc_refine_zero_traversals,
+                    _conc_refine_enabled > 0 ?
+                    100.0 * (float)_conc_refine_zero_traversals/
+                    (float)_conc_refine_enabled : 0.0);
+      gclog_or_tty->print_cr("  Max # of traversals = %d.",
+                    _conc_refine_max_traversals);
+      gclog_or_tty->print_cr("");
+    }
+  }
+  if (TraceGen1Time) {
+    if (_all_full_gc_times_ms->num() > 0) {
+      gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s",
+                 _all_full_gc_times_ms->num(),
+                 _all_full_gc_times_ms->sum() / 1000.0);
+      gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times_ms->avg());
+      gclog_or_tty->print_cr("                     [std. dev = %8.2f ms, max = %8.2f ms]",
+                    _all_full_gc_times_ms->sd(),
+                    _all_full_gc_times_ms->maximum());
+    }
+  }
+}
+
+void G1CollectorPolicy::print_yg_surv_rate_info() const {
+#ifndef PRODUCT
+  _short_lived_surv_rate_group->print_surv_rate_summary();
+  // add this call for any other surv rate groups
+#endif // PRODUCT
+}
+
+void G1CollectorPolicy::update_conc_refine_data() {
+  unsigned traversals = _g1->concurrent_g1_refine()->disable();
+  if (traversals == 0) _conc_refine_zero_traversals++;
+  _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals,
+                                     (size_t)traversals);
+
+  if (G1PolicyVerbose > 1)
+    gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals);
+  double multiplier = 1.0;
+  if (traversals == 0) {
+    multiplier = 4.0;
+  } else if (traversals > (size_t)G1ConcRefineTargTraversals) {
+    multiplier = 1.0/1.5;
+  } else if (traversals < (size_t)G1ConcRefineTargTraversals) {
+    multiplier = 1.5;
+  }
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("  Multiplier = %7.2f.", multiplier);
+    gclog_or_tty->print("  Delta went from %d regions to ",
+               _conc_refine_current_delta);
+  }
+  _conc_refine_current_delta =
+    MIN2(_g1->n_regions(),
+         (size_t)(_conc_refine_current_delta * multiplier));
+  _conc_refine_current_delta =
+    MAX2(_conc_refine_current_delta, (size_t)1);
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta);
+  }
+  _conc_refine_enabled++;
+}
+
+void G1CollectorPolicy::set_single_region_collection_set(HeapRegion* hr) {
+  assert(collection_set() == NULL, "Must be no current CS.");
+  _collection_set_size = 0;
+  _collection_set_bytes_used_before = 0;
+  add_to_collection_set(hr);
+  count_CS_bytes_used();
+}
+
+bool
+G1CollectorPolicy::should_add_next_region_to_young_list() {
+  assert(in_young_gc_mode(), "should be in young GC mode");
+  bool ret;
+  size_t young_list_length = _g1->young_list_length();
+
+  if (young_list_length < _young_list_target_length) {
+    ret = true;
+    ++_region_num_young;
+  } else {
+    ret = false;
+    ++_region_num_tenured;
+  }
+
+  return ret;
+}
+
+#ifndef PRODUCT
+// for debugging, bit of a hack...
+static char*
+region_num_to_mbs(int length) {
+  static char buffer[64];
+  double bytes = (double) (length * HeapRegion::GrainBytes);
+  double mbs = bytes / (double) (1024 * 1024);
+  sprintf(buffer, "%7.2lfMB", mbs);
+  return buffer;
+}
+#endif // PRODUCT
+
+void
+G1CollectorPolicy::checkpoint_conc_overhead() {
+  double conc_overhead = 0.0;
+  if (G1AccountConcurrentOverhead)
+    conc_overhead = COTracker::totalPredConcOverhead();
+  _mmu_tracker->update_conc_overhead(conc_overhead);
+#if 0
+  gclog_or_tty->print(" CO %1.4lf TARGET %1.4lf",
+             conc_overhead, _mmu_tracker->max_gc_time());
+#endif
+}
+
+
+uint G1CollectorPolicy::max_regions(int purpose) {
+  switch (purpose) {
+    case GCAllocForSurvived:
+      return G1MaxSurvivorRegions;
+    case GCAllocForTenured:
+      return UINT_MAX;
+    default:
+      return UINT_MAX;
+  };
+}
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+set_single_region_collection_set(HeapRegion* hr) {
+  G1CollectorPolicy::set_single_region_collection_set(hr);
+  _collectionSetChooser->removeRegion(hr);
+}
+
+
+bool
+G1CollectorPolicy_BestRegionsFirst::should_do_collection_pause(size_t
+                                                               word_size) {
+  assert(_g1->regions_accounted_for(), "Region leakage!");
+  // Initiate a pause when we reach the steady-state "used" target.
+  size_t used_hard = (_g1->capacity() / 100) * G1SteadyStateUsed;
+  size_t used_soft =
+   MAX2((_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta),
+        used_hard/2);
+  size_t used = _g1->used();
+
+  double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+
+  size_t young_list_length = _g1->young_list_length();
+  bool reached_target_length = young_list_length >= _young_list_target_length;
+
+  if (in_young_gc_mode()) {
+    if (reached_target_length) {
+      assert( young_list_length > 0 && _g1->young_list_length() > 0,
+              "invariant" );
+      _target_pause_time_ms = max_pause_time_ms;
+      return true;
+    }
+  } else {
+    guarantee( false, "should not reach here" );
+  }
+
+  return false;
+}
+
+#ifndef PRODUCT
+class HRSortIndexIsOKClosure: public HeapRegionClosure {
+  CollectionSetChooser* _chooser;
+public:
+  HRSortIndexIsOKClosure(CollectionSetChooser* chooser) :
+    _chooser(chooser) {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      assert(_chooser->regionProperlyOrdered(r), "Ought to be.");
+    }
+    return false;
+  }
+};
+
+bool G1CollectorPolicy_BestRegionsFirst::assertMarkedBytesDataOK() {
+  HRSortIndexIsOKClosure cl(_collectionSetChooser);
+  _g1->heap_region_iterate(&cl);
+  return true;
+}
+#endif
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+record_collection_pause_start(double start_time_sec, size_t start_used) {
+  G1CollectorPolicy::record_collection_pause_start(start_time_sec, start_used);
+}
+
+class NextNonCSElemFinder: public HeapRegionClosure {
+  HeapRegion* _res;
+public:
+  NextNonCSElemFinder(): _res(NULL) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set()) {
+      _res = r;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  HeapRegion* res() { return _res; }
+};
+
+class KnownGarbageClosure: public HeapRegionClosure {
+  CollectionSetChooser* _hrSorted;
+
+public:
+  KnownGarbageClosure(CollectionSetChooser* hrSorted) :
+    _hrSorted(hrSorted)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    // We only include humongous regions in collection
+    // sets when concurrent mark shows that their contained object is
+    // unreachable.
+
+    // Do we have any marking information for this region?
+    if (r->is_marked()) {
+      // We don't include humongous regions in collection
+      // sets because we collect them immediately at the end of a marking
+      // cycle.  We also don't include young regions because we *must*
+      // include them in the next collection pause.
+      if (!r->isHumongous() && !r->is_young()) {
+        _hrSorted->addMarkedHeapRegion(r);
+      }
+    }
+    return false;
+  }
+};
+
+class ParKnownGarbageHRClosure: public HeapRegionClosure {
+  CollectionSetChooser* _hrSorted;
+  jint _marked_regions_added;
+  jint _chunk_size;
+  jint _cur_chunk_idx;
+  jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end)
+  int _worker;
+  int _invokes;
+
+  void get_new_chunk() {
+    _cur_chunk_idx = _hrSorted->getParMarkedHeapRegionChunk(_chunk_size);
+    _cur_chunk_end = _cur_chunk_idx + _chunk_size;
+  }
+  void add_region(HeapRegion* r) {
+    if (_cur_chunk_idx == _cur_chunk_end) {
+      get_new_chunk();
+    }
+    assert(_cur_chunk_idx < _cur_chunk_end, "postcondition");
+    _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r);
+    _marked_regions_added++;
+    _cur_chunk_idx++;
+  }
+
+public:
+  ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted,
+                           jint chunk_size,
+                           int worker) :
+    _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
+    _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0),
+    _invokes(0)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    // We only include humongous regions in collection
+    // sets when concurrent mark shows that their contained object is
+    // unreachable.
+    _invokes++;
+
+    // Do we have any marking information for this region?
+    if (r->is_marked()) {
+      // We don't include humongous regions in collection
+      // sets because we collect them immediately at the end of a marking
+      // cycle.
+      // We also do not include young regions in collection sets
+      if (!r->isHumongous() && !r->is_young()) {
+        add_region(r);
+      }
+    }
+    return false;
+  }
+  jint marked_regions_added() { return _marked_regions_added; }
+  int invokes() { return _invokes; }
+};
+
+class ParKnownGarbageTask: public AbstractGangTask {
+  CollectionSetChooser* _hrSorted;
+  jint _chunk_size;
+  G1CollectedHeap* _g1;
+public:
+  ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) :
+    AbstractGangTask("ParKnownGarbageTask"),
+    _hrSorted(hrSorted), _chunk_size(chunk_size),
+    _g1(G1CollectedHeap::heap())
+  {}
+
+  void work(int i) {
+    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i);
+    // Back to zero for the claim value.
+    _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i,
+                                         HeapRegion::InitialClaimValue);
+    jint regions_added = parKnownGarbageCl.marked_regions_added();
+    _hrSorted->incNumMarkedHeapRegions(regions_added);
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Thread %d called %d times, added %d regions to list.\n",
+                 i, parKnownGarbageCl.invokes(), regions_added);
+    }
+  }
+};
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                   size_t max_live_bytes) {
+  double start;
+  if (G1PrintParCleanupStats) start = os::elapsedTime();
+  record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes);
+
+  _collectionSetChooser->clearMarkedHeapRegions();
+  double clear_marked_end;
+  if (G1PrintParCleanupStats) {
+    clear_marked_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  clear marked regions + work1: %8.3f ms.",
+                  (clear_marked_end - start)*1000.0);
+  }
+  if (ParallelGCThreads > 0) {
+    const size_t OverpartitionFactor = 4;
+    const size_t MinChunkSize = 8;
+    const size_t ChunkSize =
+      MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
+           MinChunkSize);
+    _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(),
+                                                             ChunkSize);
+    ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser,
+                                            (int) ChunkSize);
+    _g1->workers()->run_task(&parKnownGarbageTask);
+
+    assert(_g1->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+           "sanity check");
+  } else {
+    KnownGarbageClosure knownGarbagecl(_collectionSetChooser);
+    _g1->heap_region_iterate(&knownGarbagecl);
+  }
+  double known_garbage_end;
+  if (G1PrintParCleanupStats) {
+    known_garbage_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  compute known garbage: %8.3f ms.",
+                  (known_garbage_end - clear_marked_end)*1000.0);
+  }
+  _collectionSetChooser->sortMarkedHeapRegions();
+  double sort_end;
+  if (G1PrintParCleanupStats) {
+    sort_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  sorting: %8.3f ms.",
+                  (sort_end - known_garbage_end)*1000.0);
+  }
+
+  record_concurrent_mark_cleanup_end_work2();
+  double work2_end;
+  if (G1PrintParCleanupStats) {
+    work2_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  work2: %8.3f ms.",
+                  (work2_end - sort_end)*1000.0);
+  }
+}
+
+// Add the heap region to the collection set and return the conservative
+// estimate of the number of live bytes.
+void G1CollectorPolicy::
+add_to_collection_set(HeapRegion* hr) {
+  if (G1TraceRegions) {
+    gclog_or_tty->print_cr("added region to cset %d:["PTR_FORMAT", "PTR_FORMAT"], "
+                  "top "PTR_FORMAT", young %s",
+                  hr->hrs_index(), hr->bottom(), hr->end(),
+                  hr->top(), (hr->is_young()) ? "YES" : "NO");
+  }
+
+  if (_g1->mark_in_progress())
+    _g1->concurrent_mark()->registerCSetRegion(hr);
+
+  assert(!hr->in_collection_set(),
+              "should not already be in the CSet");
+  hr->set_in_collection_set(true);
+  hr->set_next_in_collection_set(_collection_set);
+  _collection_set = hr;
+  _collection_set_size++;
+  _collection_set_bytes_used_before += hr->used();
+}
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+choose_collection_set(HeapRegion* pop_region) {
+  double non_young_start_time_sec;
+  start_recording_regions();
+
+  if (pop_region != NULL) {
+    _target_pause_time_ms = (double) G1MaxPauseTimeMS;
+  } else {
+    guarantee(_target_pause_time_ms > -1.0,
+              "_target_pause_time_ms should have been set!");
+  }
+
+  // pop region is either null (and so is CS), or else it *is* the CS.
+  assert(_collection_set == pop_region, "Precondition");
+
+  double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
+  double predicted_pause_time_ms = base_time_ms;
+
+  double target_time_ms = _target_pause_time_ms;
+  double time_remaining_ms = target_time_ms - base_time_ms;
+
+  // the 10% and 50% values are arbitrary...
+  if (time_remaining_ms < 0.10*target_time_ms) {
+    time_remaining_ms = 0.50 * target_time_ms;
+    _within_target = false;
+  } else {
+    _within_target = true;
+  }
+
+  // We figure out the number of bytes available for future to-space.
+  // For new regions without marking information, we must assume the
+  // worst-case of complete survival.  If we have marking information for a
+  // region, we can bound the amount of live data.  We can add a number of
+  // such regions, as long as the sum of the live data bounds does not
+  // exceed the available evacuation space.
+  size_t max_live_bytes = _g1->free_regions() * HeapRegion::GrainBytes;
+
+  size_t expansion_bytes =
+    _g1->expansion_regions() * HeapRegion::GrainBytes;
+
+  if (pop_region == NULL) {
+    _collection_set_bytes_used_before = 0;
+    _collection_set_size = 0;
+  }
+
+  // Adjust for expansion and slop.
+  max_live_bytes = max_live_bytes + expansion_bytes;
+
+  assert(pop_region != NULL || _g1->regions_accounted_for(), "Region leakage!");
+
+  HeapRegion* hr;
+  if (in_young_gc_mode()) {
+    double young_start_time_sec = os::elapsedTime();
+
+    if (G1PolicyVerbose > 0) {
+      gclog_or_tty->print_cr("Adding %d young regions to the CSet",
+                    _g1->young_list_length());
+    }
+    _young_cset_length  = 0;
+    _last_young_gc_full = full_young_gcs() ? true : false;
+    if (_last_young_gc_full)
+      ++_full_young_pause_num;
+    else
+      ++_partial_young_pause_num;
+    hr = _g1->pop_region_from_young_list();
+    while (hr != NULL) {
+
+      assert( hr->young_index_in_cset() == -1, "invariant" );
+      assert( hr->age_in_surv_rate_group() != -1, "invariant" );
+      hr->set_young_index_in_cset((int) _young_cset_length);
+
+      ++_young_cset_length;
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, true);
+      time_remaining_ms -= predicted_time_ms;
+      predicted_pause_time_ms += predicted_time_ms;
+      if (hr == pop_region) {
+        // The popular region was young.  Skip over it.
+        assert(hr->in_collection_set(), "It's the pop region.");
+      } else {
+        assert(!hr->in_collection_set(), "It's not the pop region.");
+        add_to_collection_set(hr);
+        record_cset_region(hr, true);
+      }
+      max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes);
+      if (G1PolicyVerbose > 0) {
+        gclog_or_tty->print_cr("  Added [" PTR_FORMAT ", " PTR_FORMAT") to CS.",
+                      hr->bottom(), hr->end());
+        gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
+                      max_live_bytes/K);
+      }
+      hr = _g1->pop_region_from_young_list();
+    }
+
+    record_scan_only_regions(_g1->young_list_scan_only_length());
+
+    double young_end_time_sec = os::elapsedTime();
+    _recorded_young_cset_choice_time_ms =
+      (young_end_time_sec - young_start_time_sec) * 1000.0;
+
+    non_young_start_time_sec = os::elapsedTime();
+
+    if (_young_cset_length > 0 && _last_young_gc_full) {
+      // don't bother adding more regions...
+      goto choose_collection_set_end;
+    }
+  } else if (pop_region != NULL) {
+    // We're not in young mode, and we chose a popular region; don't choose
+    // any more.
+    return;
+  }
+
+  if (!in_young_gc_mode() || !full_young_gcs()) {
+    bool should_continue = true;
+    NumberSeq seq;
+    double avg_prediction = 100000000000000000.0; // something very large
+    do {
+      hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
+                                                      avg_prediction);
+      if (hr != NULL && !hr->popular()) {
+        double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+        time_remaining_ms -= predicted_time_ms;
+        predicted_pause_time_ms += predicted_time_ms;
+        add_to_collection_set(hr);
+        record_cset_region(hr, false);
+        max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes);
+        if (G1PolicyVerbose > 0) {
+          gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
+                        max_live_bytes/K);
+        }
+        seq.add(predicted_time_ms);
+        avg_prediction = seq.avg() + seq.sd();
+      }
+      should_continue =
+        ( hr != NULL) &&
+        ( (adaptive_young_list_length()) ? time_remaining_ms > 0.0
+          : _collection_set_size < _young_list_fixed_length );
+    } while (should_continue);
+
+    if (!adaptive_young_list_length() &&
+        _collection_set_size < _young_list_fixed_length)
+      _should_revert_to_full_young_gcs  = true;
+  }
+
+choose_collection_set_end:
+  count_CS_bytes_used();
+
+  end_recording_regions();
+
+  double non_young_end_time_sec = os::elapsedTime();
+  _recorded_non_young_cset_choice_time_ms =
+    (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
+}
+
+void G1CollectorPolicy_BestRegionsFirst::record_full_collection_end() {
+  G1CollectorPolicy::record_full_collection_end();
+  _collectionSetChooser->updateAfterFullCollection();
+}
+
+void G1CollectorPolicy_BestRegionsFirst::
+expand_if_possible(size_t numRegions) {
+  size_t expansion_bytes = numRegions * HeapRegion::GrainBytes;
+  _g1->expand(expansion_bytes);
+}
+
+void G1CollectorPolicy_BestRegionsFirst::
+record_collection_pause_end(bool popular, bool abandoned) {
+  G1CollectorPolicy::record_collection_pause_end(popular, abandoned);
+  assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,1199 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A G1CollectorPolicy makes policy decisions that determine the
+// characteristics of the collector.  Examples include:
+//   * choice of collection set.
+//   * when to collect.
+
+class HeapRegion;
+class CollectionSetChooser;
+
+// Yes, this is a bit unpleasant... but it saves replicating the same thing
+// over and over again and introducing subtle problems through small typos and
+// cutting and pasting mistakes. The macros below introduces a number
+// sequnce into the following two classes and the methods that access it.
+
+#define define_num_seq(name)                                                  \
+private:                                                                      \
+  NumberSeq _all_##name##_times_ms;                                           \
+public:                                                                       \
+  void record_##name##_time_ms(double ms) {                                   \
+    _all_##name##_times_ms.add(ms);                                           \
+  }                                                                           \
+  NumberSeq* get_##name##_seq() {                                             \
+    return &_all_##name##_times_ms;                                           \
+  }
+
+class MainBodySummary;
+class PopPreambleSummary;
+
+class PauseSummary {
+  define_num_seq(total)
+    define_num_seq(other)
+
+public:
+  virtual MainBodySummary*    main_body_summary()    { return NULL; }
+  virtual PopPreambleSummary* pop_preamble_summary() { return NULL; }
+};
+
+class MainBodySummary {
+  define_num_seq(satb_drain) // optional
+  define_num_seq(parallel) // parallel only
+    define_num_seq(ext_root_scan)
+    define_num_seq(mark_stack_scan)
+    define_num_seq(scan_only)
+    define_num_seq(update_rs)
+    define_num_seq(scan_rs)
+    define_num_seq(scan_new_refs) // Only for temp use; added to
+                                  // in parallel case.
+    define_num_seq(obj_copy)
+    define_num_seq(termination) // parallel only
+    define_num_seq(parallel_other) // parallel only
+  define_num_seq(mark_closure)
+  define_num_seq(clear_ct)  // parallel only
+};
+
+class PopPreambleSummary {
+  define_num_seq(pop_preamble)
+    define_num_seq(pop_update_rs)
+    define_num_seq(pop_scan_rs)
+    define_num_seq(pop_closure_app)
+    define_num_seq(pop_evacuation)
+    define_num_seq(pop_other)
+};
+
+class NonPopSummary: public PauseSummary,
+                     public MainBodySummary {
+public:
+  virtual MainBodySummary*    main_body_summary()    { return this; }
+};
+
+class PopSummary: public PauseSummary,
+                  public MainBodySummary,
+                  public PopPreambleSummary {
+public:
+  virtual MainBodySummary*    main_body_summary()    { return this; }
+  virtual PopPreambleSummary* pop_preamble_summary() { return this; }
+};
+
+class NonPopAbandonedSummary: public PauseSummary {
+};
+
+class PopAbandonedSummary: public PauseSummary,
+                           public PopPreambleSummary {
+public:
+  virtual PopPreambleSummary* pop_preamble_summary() { return this; }
+};
+
+class G1CollectorPolicy: public CollectorPolicy {
+protected:
+  // The number of pauses during the execution.
+  long _n_pauses;
+
+  // either equal to the number of parallel threads, if ParallelGCThreads
+  // has been set, or 1 otherwise
+  int _parallel_gc_threads;
+
+  enum SomePrivateConstants {
+    NumPrevPausesForHeuristics = 10,
+    NumPrevGCsForHeuristics = 10,
+    NumAPIs = HeapRegion::MaxAge
+  };
+
+  G1MMUTracker* _mmu_tracker;
+
+  void initialize_flags();
+
+  void initialize_all() {
+    initialize_flags();
+    initialize_size_info();
+    initialize_perm_generation(PermGen::MarkSweepCompact);
+  }
+
+  virtual size_t default_init_heap_size() {
+    // Pick some reasonable default.
+    return 8*M;
+  }
+
+
+  double _cur_collection_start_sec;
+  size_t _cur_collection_pause_used_at_start_bytes;
+  size_t _cur_collection_pause_used_regions_at_start;
+  size_t _prev_collection_pause_used_at_end_bytes;
+  double _cur_collection_par_time_ms;
+  double _cur_satb_drain_time_ms;
+  double _cur_clear_ct_time_ms;
+  bool   _satb_drain_time_set;
+  double _cur_popular_preamble_start_ms;
+  double _cur_popular_preamble_time_ms;
+  double _cur_popular_compute_rc_time_ms;
+  double _cur_popular_evac_time_ms;
+
+  double _cur_CH_strong_roots_end_sec;
+  double _cur_CH_strong_roots_dur_ms;
+  double _cur_G1_strong_roots_end_sec;
+  double _cur_G1_strong_roots_dur_ms;
+
+  // Statistics for recent GC pauses.  See below for how indexed.
+  TruncatedSeq* _recent_CH_strong_roots_times_ms;
+  TruncatedSeq* _recent_G1_strong_roots_times_ms;
+  TruncatedSeq* _recent_evac_times_ms;
+  // These exclude marking times.
+  TruncatedSeq* _recent_pause_times_ms;
+  TruncatedSeq* _recent_gc_times_ms;
+
+  TruncatedSeq* _recent_CS_bytes_used_before;
+  TruncatedSeq* _recent_CS_bytes_surviving;
+
+  TruncatedSeq* _recent_rs_sizes;
+
+  TruncatedSeq* _concurrent_mark_init_times_ms;
+  TruncatedSeq* _concurrent_mark_remark_times_ms;
+  TruncatedSeq* _concurrent_mark_cleanup_times_ms;
+
+  NonPopSummary*           _non_pop_summary;
+  PopSummary*              _pop_summary;
+  NonPopAbandonedSummary*  _non_pop_abandoned_summary;
+  PopAbandonedSummary*     _pop_abandoned_summary;
+
+  NumberSeq* _all_pause_times_ms;
+  NumberSeq* _all_full_gc_times_ms;
+  double _stop_world_start;
+  NumberSeq* _all_stop_world_times_ms;
+  NumberSeq* _all_yield_times_ms;
+
+  size_t     _region_num_young;
+  size_t     _region_num_tenured;
+  size_t     _prev_region_num_young;
+  size_t     _prev_region_num_tenured;
+
+  NumberSeq* _all_mod_union_times_ms;
+
+  int        _aux_num;
+  NumberSeq* _all_aux_times_ms;
+  double*    _cur_aux_start_times_ms;
+  double*    _cur_aux_times_ms;
+  bool*      _cur_aux_times_set;
+
+  double* _par_last_ext_root_scan_times_ms;
+  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_scan_only_times_ms;
+  double* _par_last_scan_only_regions_scanned;
+  double* _par_last_update_rs_start_times_ms;
+  double* _par_last_update_rs_times_ms;
+  double* _par_last_update_rs_processed_buffers;
+  double* _par_last_scan_rs_start_times_ms;
+  double* _par_last_scan_rs_times_ms;
+  double* _par_last_scan_new_refs_times_ms;
+  double* _par_last_obj_copy_times_ms;
+  double* _par_last_termination_times_ms;
+
+  // there are two pases during popular pauses, so we need to store
+  // somewhere the results of the first pass
+  double* _pop_par_last_update_rs_start_times_ms;
+  double* _pop_par_last_update_rs_times_ms;
+  double* _pop_par_last_update_rs_processed_buffers;
+  double* _pop_par_last_scan_rs_start_times_ms;
+  double* _pop_par_last_scan_rs_times_ms;
+  double* _pop_par_last_closure_app_times_ms;
+
+  double _pop_compute_rc_start;
+  double _pop_evac_start;
+
+  // indicates that we are in young GC mode
+  bool _in_young_gc_mode;
+
+  // indicates whether we are in full young or partially young GC mode
+  bool _full_young_gcs;
+
+  // if true, then it tries to dynamically adjust the length of the
+  // young list
+  bool _adaptive_young_list_length;
+  size_t _young_list_min_length;
+  size_t _young_list_target_length;
+  size_t _young_list_so_prefix_length;
+  size_t _young_list_fixed_length;
+
+  size_t _young_cset_length;
+  bool   _last_young_gc_full;
+
+  double _target_pause_time_ms;
+
+  unsigned              _full_young_pause_num;
+  unsigned              _partial_young_pause_num;
+
+  bool                  _during_marking;
+  bool                  _in_marking_window;
+  bool                  _in_marking_window_im;
+
+  SurvRateGroup*        _short_lived_surv_rate_group;
+  SurvRateGroup*        _survivor_surv_rate_group;
+  // add here any more surv rate groups
+
+  bool during_marking() {
+    return _during_marking;
+  }
+
+  // <NEW PREDICTION>
+
+private:
+  enum PredictionConstants {
+    TruncatedSeqLength = 10
+  };
+
+  TruncatedSeq* _alloc_rate_ms_seq;
+  double        _prev_collection_pause_end_ms;
+
+  TruncatedSeq* _pending_card_diff_seq;
+  TruncatedSeq* _rs_length_diff_seq;
+  TruncatedSeq* _cost_per_card_ms_seq;
+  TruncatedSeq* _cost_per_scan_only_region_ms_seq;
+  TruncatedSeq* _fully_young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _partially_young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _cost_per_entry_ms_seq;
+  TruncatedSeq* _partially_young_cost_per_entry_ms_seq;
+  TruncatedSeq* _cost_per_byte_ms_seq;
+  TruncatedSeq* _constant_other_time_ms_seq;
+  TruncatedSeq* _young_other_cost_per_region_ms_seq;
+  TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
+
+  TruncatedSeq* _pending_cards_seq;
+  TruncatedSeq* _scanned_cards_seq;
+  TruncatedSeq* _rs_lengths_seq;
+
+  TruncatedSeq* _cost_per_byte_ms_during_cm_seq;
+  TruncatedSeq* _cost_per_scan_only_region_ms_during_cm_seq;
+
+  TruncatedSeq* _young_gc_eff_seq;
+
+  TruncatedSeq* _max_conc_overhead_seq;
+
+  size_t _recorded_young_regions;
+  size_t _recorded_scan_only_regions;
+  size_t _recorded_non_young_regions;
+  size_t _recorded_region_num;
+
+  size_t _free_regions_at_end_of_collection;
+  size_t _scan_only_regions_at_end_of_collection;
+
+  size_t _recorded_rs_lengths;
+  size_t _max_rs_lengths;
+
+  size_t _recorded_marked_bytes;
+  size_t _recorded_young_bytes;
+
+  size_t _predicted_pending_cards;
+  size_t _predicted_cards_scanned;
+  size_t _predicted_rs_lengths;
+  size_t _predicted_bytes_to_copy;
+
+  double _predicted_survival_ratio;
+  double _predicted_rs_update_time_ms;
+  double _predicted_rs_scan_time_ms;
+  double _predicted_scan_only_scan_time_ms;
+  double _predicted_object_copy_time_ms;
+  double _predicted_constant_other_time_ms;
+  double _predicted_young_other_time_ms;
+  double _predicted_non_young_other_time_ms;
+  double _predicted_pause_time_ms;
+
+  double _vtime_diff_ms;
+
+  double _recorded_young_free_cset_time_ms;
+  double _recorded_non_young_free_cset_time_ms;
+
+  double _sigma;
+  double _expensive_region_limit_ms;
+
+  size_t _rs_lengths_prediction;
+
+  size_t _known_garbage_bytes;
+  double _known_garbage_ratio;
+
+  double sigma() {
+    return _sigma;
+  }
+
+  // A function that prevents us putting too much stock in small sample
+  // sets.  Returns a number between 2.0 and 1.0, depending on the number
+  // of samples.  5 or more samples yields one; fewer scales linearly from
+  // 2.0 at 1 sample to 1.0 at 5.
+  double confidence_factor(int samples) {
+    if (samples > 4) return 1.0;
+    else return  1.0 + sigma() * ((double)(5 - samples))/2.0;
+  }
+
+  double get_new_neg_prediction(TruncatedSeq* seq) {
+    return seq->davg() - sigma() * seq->dsd();
+  }
+
+#ifndef PRODUCT
+  bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group);
+#endif // PRODUCT
+
+protected:
+  double _pause_time_target_ms;
+  double _recorded_young_cset_choice_time_ms;
+  double _recorded_non_young_cset_choice_time_ms;
+  bool   _within_target;
+  size_t _pending_cards;
+  size_t _max_pending_cards;
+
+public:
+
+  void set_region_short_lived(HeapRegion* hr) {
+    hr->install_surv_rate_group(_short_lived_surv_rate_group);
+  }
+
+  void set_region_survivors(HeapRegion* hr) {
+    hr->install_surv_rate_group(_survivor_surv_rate_group);
+  }
+
+#ifndef PRODUCT
+  bool verify_young_ages();
+#endif // PRODUCT
+
+  void tag_scan_only(size_t short_lived_scan_only_length);
+
+  double get_new_prediction(TruncatedSeq* seq) {
+    return MAX2(seq->davg() + sigma() * seq->dsd(),
+                seq->davg() * confidence_factor(seq->num()));
+  }
+
+  size_t young_cset_length() {
+    return _young_cset_length;
+  }
+
+  void record_max_rs_lengths(size_t rs_lengths) {
+    _max_rs_lengths = rs_lengths;
+  }
+
+  size_t predict_pending_card_diff() {
+    double prediction = get_new_neg_prediction(_pending_card_diff_seq);
+    if (prediction < 0.00001)
+      return 0;
+    else
+      return (size_t) prediction;
+  }
+
+  size_t predict_pending_cards() {
+    size_t max_pending_card_num = _g1->max_pending_card_num();
+    size_t diff = predict_pending_card_diff();
+    size_t prediction;
+    if (diff > max_pending_card_num)
+      prediction = max_pending_card_num;
+    else
+      prediction = max_pending_card_num - diff;
+
+    return prediction;
+  }
+
+  size_t predict_rs_length_diff() {
+    return (size_t) get_new_prediction(_rs_length_diff_seq);
+  }
+
+  double predict_alloc_rate_ms() {
+    return get_new_prediction(_alloc_rate_ms_seq);
+  }
+
+  double predict_cost_per_card_ms() {
+    return get_new_prediction(_cost_per_card_ms_seq);
+  }
+
+  double predict_rs_update_time_ms(size_t pending_cards) {
+    return (double) pending_cards * predict_cost_per_card_ms();
+  }
+
+  double predict_fully_young_cards_per_entry_ratio() {
+    return get_new_prediction(_fully_young_cards_per_entry_ratio_seq);
+  }
+
+  double predict_partially_young_cards_per_entry_ratio() {
+    if (_partially_young_cards_per_entry_ratio_seq->num() < 2)
+      return predict_fully_young_cards_per_entry_ratio();
+    else
+      return get_new_prediction(_partially_young_cards_per_entry_ratio_seq);
+  }
+
+  size_t predict_young_card_num(size_t rs_length) {
+    return (size_t) ((double) rs_length *
+                     predict_fully_young_cards_per_entry_ratio());
+  }
+
+  size_t predict_non_young_card_num(size_t rs_length) {
+    return (size_t) ((double) rs_length *
+                     predict_partially_young_cards_per_entry_ratio());
+  }
+
+  double predict_rs_scan_time_ms(size_t card_num) {
+    if (full_young_gcs())
+      return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
+    else
+      return predict_partially_young_rs_scan_time_ms(card_num);
+  }
+
+  double predict_partially_young_rs_scan_time_ms(size_t card_num) {
+    if (_partially_young_cost_per_entry_ms_seq->num() < 3)
+      return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
+    else
+      return (double) card_num *
+        get_new_prediction(_partially_young_cost_per_entry_ms_seq);
+  }
+
+  double predict_scan_only_time_ms_during_cm(size_t scan_only_region_num) {
+    if (_cost_per_scan_only_region_ms_during_cm_seq->num() < 3)
+      return 1.5 * (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_seq);
+    else
+      return (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_during_cm_seq);
+  }
+
+  double predict_scan_only_time_ms(size_t scan_only_region_num) {
+    if (_in_marking_window_im)
+      return predict_scan_only_time_ms_during_cm(scan_only_region_num);
+    else
+      return (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_seq);
+  }
+
+  double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) {
+    if (_cost_per_byte_ms_during_cm_seq->num() < 3)
+      return 1.1 * (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_seq);
+    else
+      return (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_during_cm_seq);
+  }
+
+  double predict_object_copy_time_ms(size_t bytes_to_copy) {
+    if (_in_marking_window && !_in_marking_window_im)
+      return predict_object_copy_time_ms_during_cm(bytes_to_copy);
+    else
+      return (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_seq);
+  }
+
+  double predict_constant_other_time_ms() {
+    return get_new_prediction(_constant_other_time_ms_seq);
+  }
+
+  double predict_young_other_time_ms(size_t young_num) {
+    return
+      (double) young_num *
+      get_new_prediction(_young_other_cost_per_region_ms_seq);
+  }
+
+  double predict_non_young_other_time_ms(size_t non_young_num) {
+    return
+      (double) non_young_num *
+      get_new_prediction(_non_young_other_cost_per_region_ms_seq);
+  }
+
+  void check_if_region_is_too_expensive(double predicted_time_ms);
+
+  double predict_young_collection_elapsed_time_ms(size_t adjustment);
+  double predict_base_elapsed_time_ms(size_t pending_cards);
+  double predict_base_elapsed_time_ms(size_t pending_cards,
+                                      size_t scanned_cards);
+  size_t predict_bytes_to_copy(HeapRegion* hr);
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+
+  // for use by: calculate_optimal_so_length(length)
+  void predict_gc_eff(size_t young_region_num,
+                      size_t so_length,
+                      double base_time_ms,
+                      double *gc_eff,
+                      double *pause_time_ms);
+
+  // for use by: calculate_young_list_target_config(rs_length)
+  bool predict_gc_eff(size_t young_region_num,
+                      size_t so_length,
+                      double base_time_with_so_ms,
+                      size_t init_free_regions,
+                      double target_pause_time_ms,
+                      double* gc_eff);
+
+  void start_recording_regions();
+  void record_cset_region(HeapRegion* hr, bool young);
+  void record_scan_only_regions(size_t scan_only_length);
+  void end_recording_regions();
+
+  void record_vtime_diff_ms(double vtime_diff_ms) {
+    _vtime_diff_ms = vtime_diff_ms;
+  }
+
+  void record_young_free_cset_time_ms(double time_ms) {
+    _recorded_young_free_cset_time_ms = time_ms;
+  }
+
+  void record_non_young_free_cset_time_ms(double time_ms) {
+    _recorded_non_young_free_cset_time_ms = time_ms;
+  }
+
+  double predict_young_gc_eff() {
+    return get_new_neg_prediction(_young_gc_eff_seq);
+  }
+
+  // </NEW PREDICTION>
+
+public:
+  void cset_regions_freed() {
+    bool propagate = _last_young_gc_full && !_in_marking_window;
+    _short_lived_surv_rate_group->all_surviving_words_recorded(propagate);
+    _survivor_surv_rate_group->all_surviving_words_recorded(propagate);
+    // also call it on any more surv rate groups
+  }
+
+  void set_known_garbage_bytes(size_t known_garbage_bytes) {
+    _known_garbage_bytes = known_garbage_bytes;
+    size_t heap_bytes = _g1->capacity();
+    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
+  }
+
+  void decrease_known_garbage_bytes(size_t known_garbage_bytes) {
+    guarantee( _known_garbage_bytes >= known_garbage_bytes, "invariant" );
+
+    _known_garbage_bytes -= known_garbage_bytes;
+    size_t heap_bytes = _g1->capacity();
+    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
+  }
+
+  G1MMUTracker* mmu_tracker() {
+    return _mmu_tracker;
+  }
+
+  double predict_init_time_ms() {
+    return get_new_prediction(_concurrent_mark_init_times_ms);
+  }
+
+  double predict_remark_time_ms() {
+    return get_new_prediction(_concurrent_mark_remark_times_ms);
+  }
+
+  double predict_cleanup_time_ms() {
+    return get_new_prediction(_concurrent_mark_cleanup_times_ms);
+  }
+
+  // Returns an estimate of the survival rate of the region at yg-age
+  // "yg_age".
+  double predict_yg_surv_rate(int age) {
+    TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age);
+    if (seq->num() == 0)
+      gclog_or_tty->print("BARF! age is %d", age);
+    guarantee( seq->num() > 0, "invariant" );
+    double pred = get_new_prediction(seq);
+    if (pred > 1.0)
+      pred = 1.0;
+    return pred;
+  }
+
+  double accum_yg_surv_rate_pred(int age) {
+    return _short_lived_surv_rate_group->accum_surv_rate_pred(age);
+  }
+
+protected:
+  void print_stats (int level, const char* str, double value);
+  void print_stats (int level, const char* str, int value);
+  void print_par_stats (int level, const char* str, double* data) {
+    print_par_stats(level, str, data, true);
+  }
+  void print_par_stats (int level, const char* str, double* data, bool summary);
+  void print_par_buffers (int level, const char* str, double* data, bool summary);
+
+  void check_other_times(int level,
+                         NumberSeq* other_times_ms,
+                         NumberSeq* calc_other_times_ms) const;
+
+  void print_summary (PauseSummary* stats) const;
+  void print_abandoned_summary(PauseSummary* non_pop_summary,
+                               PauseSummary* pop_summary) const;
+
+  void print_summary (int level, const char* str, NumberSeq* seq) const;
+  void print_summary_sd (int level, const char* str, NumberSeq* seq) const;
+
+  double avg_value (double* data);
+  double max_value (double* data);
+  double sum_of_values (double* data);
+  double max_sum (double* data1, double* data2);
+
+  int _last_satb_drain_processed_buffers;
+  int _last_update_rs_processed_buffers;
+  double _last_pause_time_ms;
+
+  size_t _bytes_in_to_space_before_gc;
+  size_t _bytes_in_to_space_after_gc;
+  size_t bytes_in_to_space_during_gc() {
+    return
+      _bytes_in_to_space_after_gc - _bytes_in_to_space_before_gc;
+  }
+  size_t _bytes_in_collection_set_before_gc;
+  // Used to count used bytes in CS.
+  friend class CountCSClosure;
+
+  // Statistics kept per GC stoppage, pause or full.
+  TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec;
+
+  // We track markings.
+  int _num_markings;
+  double _mark_thread_startup_sec;       // Time at startup of marking thread
+
+  // Add a new GC of the given duration and end time to the record.
+  void update_recent_gc_times(double end_time_sec, double elapsed_ms);
+
+  // The head of the list (via "next_in_collection_set()") representing the
+  // current collection set.
+  HeapRegion* _collection_set;
+  size_t _collection_set_size;
+  size_t _collection_set_bytes_used_before;
+
+  // Info about marking.
+  int _n_marks; // Sticky at 2, so we know when we've done at least 2.
+
+  // The number of collection pauses at the end of the last mark.
+  size_t _n_pauses_at_mark_end;
+
+  // ==== This section is for stats related to starting Conc Refinement on time.
+  size_t _conc_refine_enabled;
+  size_t _conc_refine_zero_traversals;
+  size_t _conc_refine_max_traversals;
+  // In # of heap regions.
+  size_t _conc_refine_current_delta;
+
+  // At the beginning of a collection pause, update the variables above,
+  // especially the "delta".
+  void update_conc_refine_data();
+  // ====
+
+  // Stash a pointer to the g1 heap.
+  G1CollectedHeap* _g1;
+
+  // The average time in ms per collection pause, averaged over recent pauses.
+  double recent_avg_time_for_pauses_ms();
+
+  // The average time in ms for processing CollectedHeap strong roots, per
+  // collection pause, averaged over recent pauses.
+  double recent_avg_time_for_CH_strong_ms();
+
+  // The average time in ms for processing the G1 remembered set, per
+  // pause, averaged over recent pauses.
+  double recent_avg_time_for_G1_strong_ms();
+
+  // The average time in ms for "evacuating followers", per pause, averaged
+  // over recent pauses.
+  double recent_avg_time_for_evac_ms();
+
+  // The number of "recent" GCs recorded in the number sequences
+  int number_of_recent_gcs();
+
+  // The average survival ratio, computed by the total number of bytes
+  // suriviving / total number of bytes before collection over the last
+  // several recent pauses.
+  double recent_avg_survival_fraction();
+  // The survival fraction of the most recent pause; if there have been no
+  // pauses, returns 1.0.
+  double last_survival_fraction();
+
+  // Returns a "conservative" estimate of the recent survival rate, i.e.,
+  // one that may be higher than "recent_avg_survival_fraction".
+  // This is conservative in several ways:
+  //   If there have been few pauses, it will assume a potential high
+  //     variance, and err on the side of caution.
+  //   It puts a lower bound (currently 0.1) on the value it will return.
+  //   To try to detect phase changes, if the most recent pause ("latest") has a
+  //     higher-than average ("avg") survival rate, it returns that rate.
+  // "work" version is a utility function; young is restricted to young regions.
+  double conservative_avg_survival_fraction_work(double avg,
+                                                 double latest);
+
+  // The arguments are the two sequences that keep track of the number of bytes
+  //   surviving and the total number of bytes before collection, resp.,
+  //   over the last evereal recent pauses
+  // Returns the survival rate for the category in the most recent pause.
+  // If there have been no pauses, returns 1.0.
+  double last_survival_fraction_work(TruncatedSeq* surviving,
+                                     TruncatedSeq* before);
+
+  // The arguments are the two sequences that keep track of the number of bytes
+  //   surviving and the total number of bytes before collection, resp.,
+  //   over the last several recent pauses
+  // Returns the average survival ration over the last several recent pauses
+  // If there have been no pauses, return 1.0
+  double recent_avg_survival_fraction_work(TruncatedSeq* surviving,
+                                           TruncatedSeq* before);
+
+  double conservative_avg_survival_fraction() {
+    double avg = recent_avg_survival_fraction();
+    double latest = last_survival_fraction();
+    return conservative_avg_survival_fraction_work(avg, latest);
+  }
+
+  // The ratio of gc time to elapsed time, computed over recent pauses.
+  double _recent_avg_pause_time_ratio;
+
+  double recent_avg_pause_time_ratio() {
+    return _recent_avg_pause_time_ratio;
+  }
+
+  // Number of pauses between concurrent marking.
+  size_t _pauses_btwn_concurrent_mark;
+
+  size_t _n_marks_since_last_pause;
+
+  // True iff CM has been initiated.
+  bool _conc_mark_initiated;
+
+  // True iff CM should be initiated
+  bool _should_initiate_conc_mark;
+  bool _should_revert_to_full_young_gcs;
+  bool _last_full_young_gc;
+
+  // This set of variables tracks the collector efficiency, in order to
+  // determine whether we should initiate a new marking.
+  double _cur_mark_stop_world_time_ms;
+  double _mark_init_start_sec;
+  double _mark_remark_start_sec;
+  double _mark_cleanup_start_sec;
+  double _mark_closure_time_ms;
+
+  void   calculate_young_list_min_length();
+  void   calculate_young_list_target_config();
+  void   calculate_young_list_target_config(size_t rs_lengths);
+  size_t calculate_optimal_so_length(size_t young_list_length);
+
+public:
+
+  G1CollectorPolicy();
+
+  virtual G1CollectorPolicy* as_g1_policy() { return this; }
+
+  virtual CollectorPolicy::Name kind() {
+    return CollectorPolicy::G1CollectorPolicyKind;
+  }
+
+  void check_prediction_validity();
+
+  size_t bytes_in_collection_set() {
+    return _bytes_in_collection_set_before_gc;
+  }
+
+  size_t bytes_in_to_space() {
+    return bytes_in_to_space_during_gc();
+  }
+
+  unsigned calc_gc_alloc_time_stamp() {
+    return _all_pause_times_ms->num() + 1;
+  }
+
+protected:
+
+  // Count the number of bytes used in the CS.
+  void count_CS_bytes_used();
+
+  // Together these do the base cleanup-recording work.  Subclasses might
+  // want to put something between them.
+  void record_concurrent_mark_cleanup_end_work1(size_t freed_bytes,
+                                                size_t max_live_bytes);
+  void record_concurrent_mark_cleanup_end_work2();
+
+public:
+
+  virtual void init();
+
+  virtual HeapWord* mem_allocate_work(size_t size,
+                                      bool is_tlab,
+                                      bool* gc_overhead_limit_was_exceeded);
+
+  // This method controls how a collector handles one or more
+  // of its generations being fully allocated.
+  virtual HeapWord* satisfy_failed_allocation(size_t size,
+                                              bool is_tlab);
+
+  BarrierSet::Name barrier_set_name() { return BarrierSet::G1SATBCTLogging; }
+
+  GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
+
+  // The number of collection pauses so far.
+  long n_pauses() const { return _n_pauses; }
+
+  // Update the heuristic info to record a collection pause of the given
+  // start time, where the given number of bytes were used at the start.
+  // This may involve changing the desired size of a collection set.
+
+  virtual void record_stop_world_start();
+
+  virtual void record_collection_pause_start(double start_time_sec,
+                                             size_t start_used);
+
+  virtual void record_popular_pause_preamble_start();
+  virtual void record_popular_pause_preamble_end();
+
+  // Must currently be called while the world is stopped.
+  virtual void record_concurrent_mark_init_start();
+  virtual void record_concurrent_mark_init_end();
+  void record_concurrent_mark_init_end_pre(double
+                                           mark_init_elapsed_time_ms);
+
+  void record_mark_closure_time(double mark_closure_time_ms);
+
+  virtual void record_concurrent_mark_remark_start();
+  virtual void record_concurrent_mark_remark_end();
+
+  virtual void record_concurrent_mark_cleanup_start();
+  virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                  size_t max_live_bytes);
+  virtual void record_concurrent_mark_cleanup_completed();
+
+  virtual void record_concurrent_pause();
+  virtual void record_concurrent_pause_end();
+
+  virtual void record_collection_pause_end_CH_strong_roots();
+  virtual void record_collection_pause_end_G1_strong_roots();
+
+  virtual void record_collection_pause_end(bool popular, bool abandoned);
+
+  // Record the fact that a full collection occurred.
+  virtual void record_full_collection_start();
+  virtual void record_full_collection_end();
+
+  void record_ext_root_scan_time(int worker_i, double ms) {
+    _par_last_ext_root_scan_times_ms[worker_i] = ms;
+  }
+
+  void record_mark_stack_scan_time(int worker_i, double ms) {
+    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  }
+
+  void record_scan_only_time(int worker_i, double ms, int n) {
+    _par_last_scan_only_times_ms[worker_i] = ms;
+    _par_last_scan_only_regions_scanned[worker_i] = (double) n;
+  }
+
+  void record_satb_drain_time(double ms) {
+    _cur_satb_drain_time_ms = ms;
+    _satb_drain_time_set    = true;
+  }
+
+  void record_satb_drain_processed_buffers (int processed_buffers) {
+    _last_satb_drain_processed_buffers = processed_buffers;
+  }
+
+  void record_mod_union_time(double ms) {
+    _all_mod_union_times_ms->add(ms);
+  }
+
+  void record_update_rs_start_time(int thread, double ms) {
+    _par_last_update_rs_start_times_ms[thread] = ms;
+  }
+
+  void record_update_rs_time(int thread, double ms) {
+    _par_last_update_rs_times_ms[thread] = ms;
+  }
+
+  void record_update_rs_processed_buffers (int thread,
+                                           double processed_buffers) {
+    _par_last_update_rs_processed_buffers[thread] = processed_buffers;
+  }
+
+  void record_scan_rs_start_time(int thread, double ms) {
+    _par_last_scan_rs_start_times_ms[thread] = ms;
+  }
+
+  void record_scan_rs_time(int thread, double ms) {
+    _par_last_scan_rs_times_ms[thread] = ms;
+  }
+
+  void record_scan_new_refs_time(int thread, double ms) {
+    _par_last_scan_new_refs_times_ms[thread] = ms;
+  }
+
+  double get_scan_new_refs_time(int thread) {
+    return _par_last_scan_new_refs_times_ms[thread];
+  }
+
+  void reset_obj_copy_time(int thread) {
+    _par_last_obj_copy_times_ms[thread] = 0.0;
+  }
+
+  void reset_obj_copy_time() {
+    reset_obj_copy_time(0);
+  }
+
+  void record_obj_copy_time(int thread, double ms) {
+    _par_last_obj_copy_times_ms[thread] += ms;
+  }
+
+  void record_obj_copy_time(double ms) {
+    record_obj_copy_time(0, ms);
+  }
+
+  void record_termination_time(int thread, double ms) {
+    _par_last_termination_times_ms[thread] = ms;
+  }
+
+  void record_termination_time(double ms) {
+    record_termination_time(0, ms);
+  }
+
+  void record_pause_time(double ms) {
+    _last_pause_time_ms = ms;
+  }
+
+  void record_clear_ct_time(double ms) {
+    _cur_clear_ct_time_ms = ms;
+  }
+
+  void record_par_time(double ms) {
+    _cur_collection_par_time_ms = ms;
+  }
+
+  void record_aux_start_time(int i) {
+    guarantee(i < _aux_num, "should be within range");
+    _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;
+  }
+
+  void record_aux_end_time(int i) {
+    guarantee(i < _aux_num, "should be within range");
+    double ms = os::elapsedTime() * 1000.0 - _cur_aux_start_times_ms[i];
+    _cur_aux_times_set[i] = true;
+    _cur_aux_times_ms[i] += ms;
+  }
+
+  void record_pop_compute_rc_start();
+  void record_pop_compute_rc_end();
+
+  void record_pop_evac_start();
+  void record_pop_evac_end();
+
+  // Record the fact that "bytes" bytes allocated in a region.
+  void record_before_bytes(size_t bytes);
+  void record_after_bytes(size_t bytes);
+
+  // Returns "true" if this is a good time to do a collection pause.
+  // The "word_size" argument, if non-zero, indicates the size of an
+  // allocation request that is prompting this query.
+  virtual bool should_do_collection_pause(size_t word_size) = 0;
+
+  // Choose a new collection set.  Marks the chosen regions as being
+  // "in_collection_set", and links them together.  The head and number of
+  // the collection set are available via access methods.
+  // If "pop_region" is non-NULL, it is a popular region that has already
+  // been added to the collection set.
+  virtual void choose_collection_set(HeapRegion* pop_region = NULL) = 0;
+
+  void clear_collection_set() { _collection_set = NULL; }
+
+  // The head of the list (via "next_in_collection_set()") representing the
+  // current collection set.
+  HeapRegion* collection_set() { return _collection_set; }
+
+  // Sets the collection set to the given single region.
+  virtual void set_single_region_collection_set(HeapRegion* hr);
+
+  // The number of elements in the current collection set.
+  size_t collection_set_size() { return _collection_set_size; }
+
+  // Add "hr" to the CS.
+  void add_to_collection_set(HeapRegion* hr);
+
+  bool should_initiate_conc_mark()      { return _should_initiate_conc_mark; }
+  void set_should_initiate_conc_mark()  { _should_initiate_conc_mark = true; }
+  void unset_should_initiate_conc_mark(){ _should_initiate_conc_mark = false; }
+
+  void checkpoint_conc_overhead();
+
+  // If an expansion would be appropriate, because recent GC overhead had
+  // exceeded the desired limit, return an amount to expand by.
+  virtual size_t expansion_amount();
+
+  // note start of mark thread
+  void note_start_of_mark_thread();
+
+  // The marked bytes of the "r" has changed; reclassify it's desirability
+  // for marking.  Also asserts that "r" is eligible for a CS.
+  virtual void note_change_in_marked_bytes(HeapRegion* r) = 0;
+
+#ifndef PRODUCT
+  // Check any appropriate marked bytes info, asserting false if
+  // something's wrong, else returning "true".
+  virtual bool assertMarkedBytesDataOK() = 0;
+#endif
+
+  // Print tracing information.
+  void print_tracing_info() const;
+
+  // Print stats on young survival ratio
+  void print_yg_surv_rate_info() const;
+
+  void finished_recalculating_age_indexes() {
+    _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+    // do that for any other surv rate groups
+  }
+
+  bool should_add_next_region_to_young_list();
+
+  bool in_young_gc_mode() {
+    return _in_young_gc_mode;
+  }
+  void set_in_young_gc_mode(bool in_young_gc_mode) {
+    _in_young_gc_mode = in_young_gc_mode;
+  }
+
+  bool full_young_gcs() {
+    return _full_young_gcs;
+  }
+  void set_full_young_gcs(bool full_young_gcs) {
+    _full_young_gcs = full_young_gcs;
+  }
+
+  bool adaptive_young_list_length() {
+    return _adaptive_young_list_length;
+  }
+  void set_adaptive_young_list_length(bool adaptive_young_list_length) {
+    _adaptive_young_list_length = adaptive_young_list_length;
+  }
+
+  inline double get_gc_eff_factor() {
+    double ratio = _known_garbage_ratio;
+
+    double square = ratio * ratio;
+    // square = square * square;
+    double ret = square * 9.0 + 1.0;
+#if 0
+    gclog_or_tty->print_cr("ratio = %1.2lf, ret = %1.2lf", ratio, ret);
+#endif // 0
+    guarantee(0.0 <= ret && ret < 10.0, "invariant!");
+    return ret;
+  }
+
+  //
+  // Survivor regions policy.
+  //
+protected:
+
+  // Current tenuring threshold, set to 0 if the collector reaches the
+  // maximum amount of suvivors regions.
+  int _tenuring_threshold;
+
+public:
+
+  inline GCAllocPurpose
+    evacuation_destination(HeapRegion* src_region, int age, size_t word_sz) {
+      if (age < _tenuring_threshold && src_region->is_young()) {
+        return GCAllocForSurvived;
+      } else {
+        return GCAllocForTenured;
+      }
+  }
+
+  inline bool track_object_age(GCAllocPurpose purpose) {
+    return purpose == GCAllocForSurvived;
+  }
+
+  inline GCAllocPurpose alternative_purpose(int purpose) {
+    return GCAllocForTenured;
+  }
+
+  uint max_regions(int purpose);
+
+  // The limit on regions for a particular purpose is reached.
+  void note_alloc_region_limit_reached(int purpose) {
+    if (purpose == GCAllocForSurvived) {
+      _tenuring_threshold = 0;
+    }
+  }
+
+  void note_start_adding_survivor_regions() {
+    _survivor_surv_rate_group->start_adding_regions();
+  }
+
+  void note_stop_adding_survivor_regions() {
+    _survivor_surv_rate_group->stop_adding_regions();
+  }
+};
+
+// This encapsulates a particular strategy for a g1 Collector.
+//
+//      Start a concurrent mark when our heap size is n bytes
+//            greater then our heap size was at the last concurrent
+//            mark.  Where n is a function of the CMSTriggerRatio
+//            and the MinHeapFreeRatio.
+//
+//      Start a g1 collection pause when we have allocated the
+//            average number of bytes currently being freed in
+//            a collection, but only if it is at least one region
+//            full
+//
+//      Resize Heap based on desired
+//      allocation space, where desired allocation space is
+//      a function of survival rate and desired future to size.
+//
+//      Choose collection set by first picking all older regions
+//      which have a survival rate which beats our projected young
+//      survival rate.  Then fill out the number of needed regions
+//      with young regions.
+
+class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy {
+  CollectionSetChooser* _collectionSetChooser;
+  // If the estimated is less then desirable, resize if possible.
+  void expand_if_possible(size_t numRegions);
+
+  virtual void choose_collection_set(HeapRegion* pop_region = NULL);
+  virtual void record_collection_pause_start(double start_time_sec,
+                                             size_t start_used);
+  virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                  size_t max_live_bytes);
+  virtual void record_full_collection_end();
+
+public:
+  G1CollectorPolicy_BestRegionsFirst() {
+    _collectionSetChooser = new CollectionSetChooser();
+  }
+  void record_collection_pause_end(bool popular, bool abandoned);
+  bool should_do_collection_pause(size_t word_size);
+  virtual void set_single_region_collection_set(HeapRegion* hr);
+  // This is not needed any more, after the CSet choosing code was
+  // changed to use the pause prediction work. But let's leave the
+  // hook in just in case.
+  void note_change_in_marked_bytes(HeapRegion* r) { }
+#ifndef PRODUCT
+  bool assertMarkedBytesDataOK();
+#endif
+};
+
+// This should move to some place more general...
+
+// If we have "n" measurements, and we've kept track of their "sum" and the
+// "sum_of_squares" of the measurements, this returns the variance of the
+// sequence.
+inline double variance(int n, double sum_of_squares, double sum) {
+  double n_d = (double)n;
+  double avg = sum/n_d;
+  return (sum_of_squares - 2.0 * avg * sum + n_d * avg * avg) / n_d;
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MMUTracker.cpp.incl"
+
+#define _DISABLE_MMU                             0
+
+// can't rely on comparing doubles with tolerating a small margin for error
+#define SMALL_MARGIN 0.0000001
+#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN )
+#define is_double_leq(_val1, _val2) is_double_leq_0((_val1) - (_val2))
+#define is_double_geq(_val1, _val2) is_double_leq_0((_val2) - (_val1))
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+G1MMUTracker::G1MMUTracker(double time_slice, double max_gc_time) :
+  _time_slice(time_slice),
+  _max_gc_time(max_gc_time),
+  _conc_overhead_time_sec(0.0) { }
+
+void
+G1MMUTracker::update_conc_overhead(double conc_overhead) {
+  double conc_overhead_time_sec = _time_slice * conc_overhead;
+  if (conc_overhead_time_sec > 0.9 * _max_gc_time) {
+    // We are screwed, as we only seem to have <10% of the soft
+    // real-time goal available for pauses. Let's admit defeat and
+    // allow something more generous as a pause target.
+    conc_overhead_time_sec = 0.75 * _max_gc_time;
+  }
+
+  _conc_overhead_time_sec = conc_overhead_time_sec;
+}
+
+G1MMUTrackerQueue::G1MMUTrackerQueue(double time_slice, double max_gc_time) :
+  G1MMUTracker(time_slice, max_gc_time),
+  _head_index(0),
+  _tail_index(trim_index(_head_index+1)),
+  _no_entries(0) { }
+
+void G1MMUTrackerQueue::remove_expired_entries(double current_time) {
+  double limit = current_time - _time_slice;
+  while (_no_entries > 0) {
+    if (is_double_geq(limit, _array[_tail_index].end_time())) {
+      _tail_index = trim_index(_tail_index + 1);
+      --_no_entries;
+    } else
+      return;
+  }
+  guarantee(_no_entries == 0, "should have no entries in the array");
+}
+
+double G1MMUTrackerQueue::calculate_gc_time(double current_time) {
+  double gc_time = 0.0;
+  double limit = current_time - _time_slice;
+  for (int i = 0; i < _no_entries; ++i) {
+    int index = trim_index(_tail_index + i);
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        gc_time += elem->duration();
+      else
+        gc_time += elem->end_time() - limit;
+    }
+  }
+  return gc_time;
+}
+
+void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) {
+  double longest_allowed = longest_pause_internal(start);
+  if (longest_allowed < 0.0)
+    longest_allowed = 0.0;
+  double duration = end - start;
+
+  remove_expired_entries(end);
+  if (_no_entries == QueueLength) {
+    // OK, right now when we fill up we bomb out
+    // there are a few ways of dealing with this "gracefully"
+    //   increase the array size (:-)
+    //   remove the oldest entry (this might allow more GC time for
+    //     the time slice than what's allowed)
+    //   concolidate the two entries with the minimum gap between them
+    //     (this mighte allow less GC time than what's allowed)
+    guarantee(0, "array full, currently we can't recover");
+  }
+  _head_index = trim_index(_head_index + 1);
+  ++_no_entries;
+  _array[_head_index] = G1MMUTrackerQueueElem(start, end);
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::longest_pause(double current_time) {
+  if (_DISABLE_MMU)
+    return _max_gc_time;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return longest_pause_internal(current_time);
+}
+
+double G1MMUTrackerQueue::longest_pause_internal(double current_time) {
+  double target_time = _max_gc_time;
+
+  while( 1 ) {
+    double gc_time =
+      calculate_gc_time(current_time + target_time) + _conc_overhead_time_sec;
+    double diff = target_time + gc_time - _max_gc_time;
+    if (!is_double_leq_0(diff)) {
+      target_time -= diff;
+      if (is_double_leq_0(target_time)) {
+        target_time = -1.0;
+        break;
+      }
+    } else {
+      break;
+    }
+  }
+
+  return target_time;
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) {
+  if (_DISABLE_MMU)
+    return 0.0;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return when_internal(current_time, pause_time);
+}
+
+double G1MMUTrackerQueue::when_internal(double current_time,
+                                        double pause_time) {
+  // if the pause is over the maximum, just assume that it's the maximum
+  double adjusted_pause_time =
+    (pause_time > max_gc_time()) ? max_gc_time() : pause_time;
+  double earliest_end = current_time + adjusted_pause_time;
+  double limit = earliest_end - _time_slice;
+  double gc_time = calculate_gc_time(earliest_end);
+  double diff = gc_time + adjusted_pause_time - max_gc_time();
+  if (is_double_leq_0(diff))
+    return 0.0;
+
+  int index = _tail_index;
+  while ( 1 ) {
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        diff -= elem->duration();
+      else
+        diff -= elem->end_time() - limit;
+      if (is_double_leq_0(diff))
+        return  elem->end_time() + diff + _time_slice - adjusted_pause_time - current_time;
+    }
+    index = trim_index(index+1);
+    guarantee(index != trim_index(_head_index + 1), "should not go past head");
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Keeps track of the GC work and decides when it is OK to do GC work
+// and for how long so that the MMU invariants are maintained.
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+// this is the "interface"
+class G1MMUTracker {
+protected:
+  double          _time_slice;
+  double          _max_gc_time; // this is per time slice
+
+  double          _conc_overhead_time_sec;
+
+public:
+  G1MMUTracker(double time_slice, double max_gc_time);
+
+  void update_conc_overhead(double conc_overhead);
+
+  virtual void add_pause(double start, double end, bool gc_thread) = 0;
+  virtual double longest_pause(double current_time) = 0;
+  virtual double when_sec(double current_time, double pause_time) = 0;
+
+  double max_gc_time() {
+    return _max_gc_time - _conc_overhead_time_sec;
+  }
+
+  inline bool now_max_gc(double current_time) {
+    return when_sec(current_time, max_gc_time()) < 0.00001;
+  }
+
+  inline double when_max_gc_sec(double current_time) {
+    return when_sec(current_time, max_gc_time());
+  }
+
+  inline jlong when_max_gc_ms(double current_time) {
+    double when = when_max_gc_sec(current_time);
+    return (jlong) (when * 1000.0);
+  }
+
+  inline jlong when_ms(double current_time, double pause_time) {
+    double when = when_sec(current_time, pause_time);
+    return (jlong) (when * 1000.0);
+  }
+};
+
+class G1MMUTrackerQueueElem {
+private:
+  double _start_time;
+  double _end_time;
+
+public:
+  inline double start_time() { return _start_time; }
+  inline double end_time()   { return _end_time; }
+  inline double duration()   { return _end_time - _start_time; }
+
+  G1MMUTrackerQueueElem() {
+    _start_time = 0.0;
+    _end_time   = 0.0;
+  }
+
+  G1MMUTrackerQueueElem(double start_time, double end_time) {
+    _start_time = start_time;
+    _end_time   = end_time;
+  }
+};
+
+// this is an implementation of the MMUTracker using a (fixed-size) queue
+// that keeps track of all the recent pause times
+class G1MMUTrackerQueue: public G1MMUTracker {
+private:
+  enum PrivateConstants {
+    QueueLength = 64
+  };
+
+  // The array keeps track of all the pauses that fall within a time
+  // slice (the last time slice during which pauses took place).
+  // The data structure implemented is a circular queue.
+  // Head "points" to the most recent addition, tail to the oldest one.
+  // The array is of fixed size and I don't think we'll need more than
+  // two or three entries with the current behaviour of G1 pauses.
+  // If the array is full, an easy fix is to look for the pauses with
+  // the shortest gap between them and concolidate them.
+
+  G1MMUTrackerQueueElem _array[QueueLength];
+  int                   _head_index;
+  int                   _tail_index;
+  int                   _no_entries;
+
+  inline int trim_index(int index) {
+    return (index + QueueLength) % QueueLength;
+  }
+
+  void remove_expired_entries(double current_time);
+  double calculate_gc_time(double current_time);
+
+  double longest_pause_internal(double current_time);
+  double when_internal(double current_time, double pause_time);
+
+public:
+  G1MMUTrackerQueue(double time_slice, double max_gc_time);
+
+  virtual void add_pause(double start, double end, bool gc_thread);
+
+  virtual double longest_pause(double current_time);
+  virtual double when_sec(double current_time, double pause_time);
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,385 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MarkSweep.cpp.incl"
+
+class HeapRegion;
+
+void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
+                                      bool clear_all_softrefs) {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
+
+  // hook up weak ref data so it can be used during Mark-Sweep
+  assert(GenMarkSweep::ref_processor() == NULL, "no stomping");
+  GenMarkSweep::_ref_processor = rp;
+  assert(rp != NULL, "should be non-NULL");
+
+  // When collecting the permanent generation methodOops may be moving,
+  // so we either have to flush all bcp data or convert it into bci.
+  CodeCache::gc_prologue();
+  Threads::gc_prologue();
+
+  // Increment the invocation count for the permanent generation, since it is
+  // implicitly collected whenever we do a full mark sweep collection.
+  SharedHeap* sh = SharedHeap::heap();
+  sh->perm_gen()->stat_record()->invocations++;
+
+  bool marked_for_unloading = false;
+
+  allocate_stacks();
+
+  // We should save the marks of the currently locked biased monitors.
+  // The marking doesn't preserve the marks of biased objects.
+  BiasedLocking::preserve_marks();
+
+  mark_sweep_phase1(marked_for_unloading, clear_all_softrefs);
+
+  if (G1VerifyConcMark) {
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      g1h->checkConcurrentMark();
+  }
+
+  mark_sweep_phase2();
+
+  // Don't add any more derived pointers during phase3
+  COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
+
+  mark_sweep_phase3();
+
+  mark_sweep_phase4();
+
+  GenMarkSweep::restore_marks();
+  BiasedLocking::restore_marks();
+  GenMarkSweep::deallocate_stacks();
+
+  // We must invalidate the perm-gen rs, so that it gets rebuilt.
+  GenRemSet* rs = sh->rem_set();
+  rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/);
+
+  // "free at last gc" is calculated from these.
+  // CHF: cheating for now!!!
+  //  Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity());
+  //  Universe::set_heap_used_at_last_gc(Universe::heap()->used());
+
+  Threads::gc_epilogue();
+  CodeCache::gc_epilogue();
+
+  // refs processing: clean slate
+  GenMarkSweep::_ref_processor = NULL;
+}
+
+
+void G1MarkSweep::allocate_stacks() {
+  GenMarkSweep::_preserved_count_max = 0;
+  GenMarkSweep::_preserved_marks = NULL;
+  GenMarkSweep::_preserved_count = 0;
+  GenMarkSweep::_preserved_mark_stack = NULL;
+  GenMarkSweep::_preserved_oop_stack = NULL;
+
+  GenMarkSweep::_marking_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+
+  size_t size = SystemDictionary::number_of_classes() * 2;
+  GenMarkSweep::_revisit_klass_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<Klass*>((int)size, true);
+}
+
+void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
+                                    bool clear_all_softrefs) {
+  // Recursively traverse all live objects and mark them
+  EventMark m("1 mark object");
+  TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace(" 1");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_SystemClasses,
+                           &GenMarkSweep::follow_root_closure,
+                           &GenMarkSweep::follow_root_closure);
+
+  // Process reference objects found during marking
+  ReferencePolicy *soft_ref_policy;
+  if (clear_all_softrefs) {
+    soft_ref_policy = new AlwaysClearPolicy();
+  } else {
+#ifdef COMPILER2
+    soft_ref_policy = new LRUMaxHeapPolicy();
+#else
+    soft_ref_policy = new LRUCurrentHeapPolicy();
+#endif
+  }
+  assert(soft_ref_policy != NULL,"No soft reference policy");
+  GenMarkSweep::ref_processor()->process_discovered_references(
+                                   soft_ref_policy,
+                                   &GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   &GenMarkSweep::follow_stack_closure,
+                                   NULL);
+
+  // Follow system dictionary roots and unload classes
+  bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive);
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Follow code cache roots (has to be done after system dictionary,
+  // assumes all live klasses are marked)
+  CodeCache::do_unloading(&GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   purged_class);
+           GenMarkSweep::follow_stack();
+
+  // Update subklass/sibling/implementor links of live klasses
+  GenMarkSweep::follow_weak_klass_links();
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Visit symbol and interned string tables and delete unmarked oops
+  SymbolTable::unlink(&GenMarkSweep::is_alive);
+  StringTable::unlink(&GenMarkSweep::is_alive);
+
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+}
+
+class G1PrepareCompactClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mrbs;
+  CompactPoint _cp;
+  bool _popular_only;
+
+  void free_humongous_region(HeapRegion* hr) {
+    HeapWord* bot = hr->bottom();
+    HeapWord* end = hr->end();
+    assert(hr->startsHumongous(),
+           "Only the start of a humongous region should be freed.");
+    G1CollectedHeap::heap()->free_region(hr);
+    hr->prepare_for_compaction(&_cp);
+    // Also clear the part of the card table that will be unused after
+    // compaction.
+    _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+  }
+
+public:
+  G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) :
+    _cp(NULL, cs, cs->initialize_threshold()),
+    _mrbs(G1CollectedHeap::heap()->mr_bs()),
+    _popular_only(popular_only)
+  {}
+  bool doHeapRegion(HeapRegion* hr) {
+    if (_popular_only && !hr->popular())
+      return true; // terminate early
+    else if (!_popular_only && hr->popular())
+      return false; // skip this one.
+
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->forward_to(obj);
+        } else  {
+          free_humongous_region(hr);
+        }
+      } else {
+        assert(hr->continuesHumongous(), "Invalid humongous.");
+      }
+    } else {
+      hr->prepare_for_compaction(&_cp);
+      // Also clear the part of the card table that will be unused after
+      // compaction.
+      _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+    }
+    return false;
+  }
+};
+// Stolen verbatim from g1CollectedHeap.cpp
+class FindFirstRegionClosure: public HeapRegionClosure {
+  HeapRegion* _a_region;
+  bool _find_popular;
+public:
+  FindFirstRegionClosure(bool find_popular) :
+    _a_region(NULL), _find_popular(find_popular) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->popular() == _find_popular) {
+      _a_region = r;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  HeapRegion* result() { return _a_region; }
+};
+
+void G1MarkSweep::mark_sweep_phase2() {
+  // Now all live objects are marked, compute the new object addresses.
+
+  // It is imperative that we traverse perm_gen LAST. If dead space is
+  // allowed a range of dead object may get overwritten by a dead int
+  // array. If perm_gen is not traversed last a klassOop may get
+  // overwritten. This is fine since it is dead, but if the class has dead
+  // instances we have to skip them, and in order to find their size we
+  // need the klassOop!
+  //
+  // It is not required that we traverse spaces in the same order in
+  // phase2, phase3 and phase4, but the ValidateMarkSweep live oops
+  // tracking expects us to do so. See comment under phase4.
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("2 compute new addresses");
+  TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("2");
+
+  // First we compact the popular regions.
+  if (G1NumPopularRegions > 0) {
+    CompactibleSpace* sp = g1h->first_compactible_space();
+    FindFirstRegionClosure cl(true /*find_popular*/);
+    g1h->heap_region_iterate(&cl);
+    HeapRegion *r = cl.result();
+    assert(r->popular(), "should have found a popular region.");
+    assert(r == sp, "first popular heap region should "
+                    "== first compactible space");
+    G1PrepareCompactClosure blk(sp, true/*popular_only*/);
+    g1h->heap_region_iterate(&blk);
+  }
+
+  // Now we do the regular regions.
+  FindFirstRegionClosure cl(false /*find_popular*/);
+  g1h->heap_region_iterate(&cl);
+  HeapRegion *r = cl.result();
+  assert(!r->popular(), "should have founda non-popular region.");
+  CompactibleSpace* sp = r;
+  if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
+    sp = r->next_compaction_space();
+  }
+
+  G1PrepareCompactClosure blk(sp, false/*popular_only*/);
+  g1h->heap_region_iterate(&blk);
+
+  CompactPoint perm_cp(pg, NULL, NULL);
+  pg->prepare_for_compaction(&perm_cp);
+}
+
+class G1AdjustPointersClosure: public HeapRegionClosure {
+ public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->isHumongous()) {
+      if (r->startsHumongous()) {
+        // We must adjust the pointers on the single H object.
+        oop obj = oop(r->bottom());
+        debug_only(GenMarkSweep::track_interior_pointers(obj));
+        // point all the oops to the new location
+        obj->adjust_pointers();
+        debug_only(GenMarkSweep::check_interior_pointers());
+      }
+    } else {
+      // This really ought to be "as_CompactibleSpace"...
+      r->adjust_pointers();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase3() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  // Adjust the pointers to reflect the new locations
+  EventMark m("3 adjust pointers");
+  TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("3");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_AllClasses,
+                           &GenMarkSweep::adjust_root_pointer_closure,
+                           &GenMarkSweep::adjust_pointer_closure);
+
+  g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure);
+
+  // Now adjust pointers in remaining weak roots.  (All of which should
+  // have been cleared if they pointed to non-surviving objects.)
+  g1h->g1_process_weak_roots(&GenMarkSweep::adjust_root_pointer_closure,
+                             &GenMarkSweep::adjust_pointer_closure);
+
+  GenMarkSweep::adjust_marks();
+
+  G1AdjustPointersClosure blk;
+  g1h->heap_region_iterate(&blk);
+  pg->adjust_pointers();
+}
+
+class G1SpaceCompactClosure: public HeapRegionClosure {
+public:
+  G1SpaceCompactClosure() {}
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->init_mark();
+        } else {
+          assert(hr->is_empty(), "Should have been cleared in phase 2.");
+        }
+        hr->reset_during_compaction();
+      }
+    } else {
+      hr->compact();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase4() {
+  // All pointers are now adjusted, move objects accordingly
+
+  // It is imperative that we traverse perm_gen first in phase4. All
+  // classes must be allocated earlier than their instances, and traversing
+  // perm_gen first makes sure that all klassOops have moved to their new
+  // location before any instance does a dispatch through it's klass!
+
+  // The ValidateMarkSweep live oops tracking expects us to traverse spaces
+  // in the same order in phase2, phase3 and phase4. We don't quite do that
+  // here (perm_gen first rather than last), so we tell the validate code
+  // to use a higher index (saved from phase2) when verifying perm_gen.
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("4 compact heap");
+  TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("4");
+
+  pg->compact();
+
+  G1SpaceCompactClosure blk;
+  g1h->heap_region_iterate(&blk);
+
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ReferenceProcessor;
+
+// G1MarkSweep takes care of global mark-compact garbage collection for a
+// G1CollectedHeap using a four-phase pointer forwarding algorithm.  All
+// generations are assumed to support marking; those that can also support
+// compaction.
+//
+// Class unloading will only occur when a full gc is invoked.
+
+
+class G1MarkSweep : AllStatic {
+  friend class VM_G1MarkSweep;
+  friend class Scavenge;
+
+ public:
+
+  static void invoke_at_safepoint(ReferenceProcessor* rp,
+                                  bool clear_all_softrefs);
+
+ private:
+
+  // Mark live objects
+  static void mark_sweep_phase1(bool& marked_for_deopt,
+                                bool clear_all_softrefs);
+  // Calculate new addresses
+  static void mark_sweep_phase2();
+  // Update pointers
+  static void mark_sweep_phase3();
+  // Move objects to new positions
+  static void mark_sweep_phase4();
+
+  static void allocate_stacks();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class G1CollectedHeap;
+class G1RemSet;
+class HRInto_G1RemSet;
+class G1RemSet;
+class ConcurrentMark;
+class DirtyCardToOopClosure;
+class CMBitMap;
+class CMMarkStack;
+class G1ParScanThreadState;
+
+// A class that scans oops in a given heap region (much as OopsInGenClosure
+// scans oops in a generation.)
+class OopsInHeapRegionClosure: public OopsInGenClosure {
+protected:
+  HeapRegion* _from;
+public:
+  virtual void set_region(HeapRegion* from) { _from = from; }
+};
+
+
+class G1ScanAndBalanceClosure : public OopClosure {
+  G1CollectedHeap* _g1;
+  static int _nq;
+public:
+  G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { }
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+};
+
+class G1ParClosureSuper : public OopsInHeapRegionClosure {
+protected:
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem;
+  ConcurrentMark* _cm;
+  G1ParScanThreadState* _par_scan_state;
+public:
+  G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
+class G1ParScanClosure : public G1ParClosureSuper {
+public:
+  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);   // should be made inline
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)          { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
+};
+
+#define G1_PARTIAL_ARRAY_MASK 1
+
+class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
+  G1ParScanClosure _scanner;
+  template <class T> void process_array_chunk(oop obj, int start, int end);
+public:
+  G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);
+  void do_oop_nv(narrowOop* p)      { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+
+class G1ParCopyHelper : public G1ParClosureSuper {
+  G1ParScanClosure *_scanner;
+protected:
+  void mark_forwardee(oop* p);
+  oop copy_to_survivor_space(oop obj);
+public:
+  G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
+                  G1ParScanClosure *scanner) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure : public G1ParCopyHelper {
+  G1ParScanClosure _scanner;
+  void do_oop_work(oop* p);
+  void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); }
+public:
+  G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
+  inline void do_oop_nv(oop* p) {
+    do_oop_work(p);
+    if (do_mark_forwardee)
+      mark_forwardee(p);
+  }
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+
+class FilterIntoCSClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  OopClosure* _oc;
+  DirtyCardToOopClosure* _dcto_cl;
+public:
+  FilterIntoCSClosure(  DirtyCardToOopClosure* dcto_cl,
+                        G1CollectedHeap* g1, OopClosure* oc) :
+    _dcto_cl(dcto_cl), _g1(g1), _oc(oc)
+  {}
+  inline void do_oop_nv(oop* p);
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+};
+
+class FilterInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                     OopsInHeapRegionClosure* oc) :
+    _g1(g1), _oc(oc)
+  {}
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterAndMarkInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterAndMarkInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                            OopsInHeapRegionClosure* oc,
+                                            ConcurrentMark* cm)
+  : _g1(g1), _oc(oc), _cm(cm) { }
+
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterOutOfRegionClosure: public OopClosure {
+  HeapWord* _r_bottom;
+  HeapWord* _r_end;
+  OopClosure* _oc;
+  int _out_of_region;
+public:
+  FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  int out_of_region() { return _out_of_region; }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * This really ought to be an inline function, but apparently the C++
+ * compiler sometimes sees fit to ignore inline declarations.  Sigh.
+ */
+
+// This must a ifdef'ed because the counting it controls is in a
+// perf-critical inner loop.
+#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj)) {
+    _oc->do_oop(p);
+#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
+    _dcto_cl->incr_count();
+#endif
+  }
+}
+
+inline void FilterIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  HeapWord* obj_hw = (HeapWord*)obj;
+  if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) {
+    _oc->do_oop(p);
+#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
+    _out_of_region++;
+#endif
+  }
+}
+
+inline void FilterOutOfRegionClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj))
+    _oc->do_oop(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL) {
+    HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      if (hr->in_collection_set())
+        _oc->do_oop(p);
+      else if (!hr->is_young())
+        _cm->grayRoot(obj);
+    }
+  }
+}
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) {
+  RefToScanQueue* q;
+  if (ParallelGCThreads > 0) {
+    // Deal the work out equally.
+    _nq = (_nq + 1) % ParallelGCThreads;
+    q = _g1->task_queue(_nq);
+  } else {
+    q = _g1->task_queue(0);
+  }
+  bool nooverflow = q->push(p);
+  guarantee(nooverflow, "Overflow during poplularity region processing");
+}
+
+inline void G1ScanAndBalanceClosure::do_oop(oop* p) {
+  do_oop_nv(p);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,1003 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1RemSet.cpp.incl"
+
+#define CARD_REPEAT_HISTO 0
+
+#if CARD_REPEAT_HISTO
+static size_t ct_freq_sz;
+static jbyte* ct_freq = NULL;
+
+void init_ct_freq_table(size_t heap_sz_bytes) {
+  if (ct_freq == NULL) {
+    ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
+    ct_freq = new jbyte[ct_freq_sz];
+    for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
+  }
+}
+
+void ct_freq_note_card(size_t index) {
+  assert(0 <= index && index < ct_freq_sz, "Bounds error.");
+  if (ct_freq[index] < 100) { ct_freq[index]++; }
+}
+
+static IntHistogram card_repeat_count(10, 10);
+
+void ct_freq_update_histo_and_reset() {
+  for (size_t j = 0; j < ct_freq_sz; j++) {
+    card_repeat_count.add_entry(ct_freq[j]);
+    ct_freq[j] = 0;
+  }
+
+}
+#endif
+
+
+class IntoCSOopClosure: public OopsInHeapRegionClosure {
+  OopsInHeapRegionClosure* _blk;
+  G1CollectedHeap* _g1;
+public:
+  IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
+    _g1(g1), _blk(blk) {}
+  void set_region(HeapRegion* from) {
+    _blk->set_region(from);
+  }
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
+  }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool idempotent() { return true; }
+};
+
+class IntoCSRegionClosure: public HeapRegionClosure {
+  IntoCSOopClosure _blk;
+  G1CollectedHeap* _g1;
+public:
+  IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
+    _g1(g1), _blk(g1, blk) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set()) {
+      _blk.set_region(r);
+      if (r->isHumongous()) {
+        if (r->startsHumongous()) {
+          oop obj = oop(r->bottom());
+          obj->oop_iterate(&_blk);
+        }
+      } else {
+        r->oop_before_save_marks_iterate(&_blk);
+      }
+    }
+    return false;
+  }
+};
+
+void
+StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+                                            int worker_i) {
+  IntoCSRegionClosure rc(_g1, oc);
+  _g1->heap_region_iterate(&rc);
+}
+
+class UpdateRSOopClosure: public OopClosure {
+  HeapRegion* _from;
+  HRInto_G1RemSet* _rs;
+  int _worker_i;
+public:
+  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
+    _from(NULL), _rs(rs), _worker_i(worker_i) {
+    guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
+  }
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    assert(_from != NULL, "from region must be non-NULL");
+    _rs->par_write_ref(_from, p, _worker_i);
+  }
+  // Override: this closure is idempotent.
+  //  bool idempotent() { return true; }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
+class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap*    _g1h;
+  ModRefBarrierSet*   _mr_bs;
+  UpdateRSOopClosure  _cl;
+  int _worker_i;
+public:
+  UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
+    _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
+    _mr_bs(g1->mr_bs()),
+    _worker_i(worker_i),
+    _g1h(g1)
+    {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set() && !r->continuesHumongous()) {
+      _cl.set_from(r);
+      r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind);
+      _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true);
+    }
+    return false;
+  }
+};
+
+class VerifyRSCleanCardOopClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    HeapRegion* to = _g1->heap_region_containing(obj);
+    guarantee(to == NULL || !to->in_collection_set(),
+              "Missed a rem set member.");
+  }
+};
+
+HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
+  : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
+    _cg1r(g1->concurrent_g1_refine()),
+    _par_traversal_in_progress(false), _new_refs(NULL),
+    _cards_scanned(NULL), _total_cards_scanned(0)
+{
+  _seq_task = new SubTasksDone(NumSeqTasks);
+  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, ParallelGCThreads);
+}
+
+HRInto_G1RemSet::~HRInto_G1RemSet() {
+  delete _seq_task;
+}
+
+void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
+  if (_g1->is_in_g1_reserved(mr.start())) {
+    _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
+    if (_start_first == NULL) _start_first = mr.start();
+  }
+}
+
+class ScanRSClosure : public HeapRegionClosure {
+  size_t _cards_done, _cards;
+  G1CollectedHeap* _g1h;
+  OopsInHeapRegionClosure* _oc;
+  G1BlockOffsetSharedArray* _bot_shared;
+  CardTableModRefBS *_ct_bs;
+  int _worker_i;
+  bool _try_claimed;
+public:
+  ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
+    _oc(oc),
+    _cards(0),
+    _cards_done(0),
+    _worker_i(worker_i),
+    _try_claimed(false)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _bot_shared = _g1h->bot_shared();
+    _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
+  }
+
+  void set_try_claimed() { _try_claimed = true; }
+
+  void scanCard(size_t index, HeapRegion *r) {
+    _cards_done++;
+    DirtyCardToOopClosure* cl =
+      r->new_dcto_closure(_oc,
+                         CardTableModRefBS::Precise,
+                         HeapRegionDCTOC::IntoCSFilterKind);
+
+    // Set the "from" region in the closure.
+    _oc->set_region(r);
+    HeapWord* card_start = _bot_shared->address_for_index(index);
+    HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
+    Space *sp = SharedHeap::heap()->space_containing(card_start);
+    MemRegion sm_region;
+    if (ParallelGCThreads > 0) {
+      // first find the used area
+      sm_region = sp->used_region_at_save_marks();
+    } else {
+      // The closure is not idempotent.  We shouldn't look at objects
+      // allocated during the GC.
+      sm_region = sp->used_region_at_save_marks();
+    }
+    MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
+    if (!mr.is_empty()) {
+      cl->do_MemRegion(mr);
+    }
+  }
+
+  void printCard(HeapRegion* card_region, size_t card_index,
+                 HeapWord* card_start) {
+    gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
+                           "RS names card %p: "
+                           "[" PTR_FORMAT ", " PTR_FORMAT ")",
+                           _worker_i,
+                           card_region->bottom(), card_region->end(),
+                           card_index,
+                           card_start, card_start + G1BlockOffsetSharedArray::N_words);
+  }
+
+  bool doHeapRegion(HeapRegion* r) {
+    assert(r->in_collection_set(), "should only be called on elements of CS.");
+    HeapRegionRemSet* hrrs = r->rem_set();
+    if (hrrs->iter_is_complete()) return false; // All done.
+    if (!_try_claimed && !hrrs->claim_iter()) return false;
+    // If we didn't return above, then
+    //   _try_claimed || r->claim_iter()
+    // is true: either we're supposed to work on claimed-but-not-complete
+    // regions, or we successfully claimed the region.
+    HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
+    hrrs->init_iterator(iter);
+    size_t card_index;
+    while (iter->has_next(card_index)) {
+      HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
+
+#if 0
+      gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
+                          card_start, card_start + CardTableModRefBS::card_size_in_words);
+#endif
+
+      HeapRegion* card_region = _g1h->heap_region_containing(card_start);
+      assert(card_region != NULL, "Yielding cards not in the heap?");
+      _cards++;
+
+      if (!card_region->in_collection_set()) {
+        // If the card is dirty, then we will scan it during updateRS.
+        if (!_ct_bs->is_card_claimed(card_index) &&
+            !_ct_bs->is_card_dirty(card_index)) {
+          assert(_ct_bs->is_card_clean(card_index) ||
+                 _ct_bs->is_card_claimed(card_index),
+                 "Card is either dirty, clean, or claimed");
+          if (_ct_bs->claim_card(card_index))
+            scanCard(card_index, card_region);
+        }
+      }
+    }
+    hrrs->set_iter_complete();
+    return false;
+  }
+  // Set all cards back to clean.
+  void cleanup() {_g1h->cleanUpCardTable();}
+  size_t cards_done() { return _cards_done;}
+  size_t cards_looked_up() { return _cards;}
+};
+
+// We want the parallel threads to start their scanning at
+// different collection set regions to avoid contention.
+// If we have:
+//          n collection set regions
+//          p threads
+// Then thread t will start at region t * floor (n/p)
+
+HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) {
+  HeapRegion* result = _g1p->collection_set();
+  if (ParallelGCThreads > 0) {
+    size_t cs_size = _g1p->collection_set_size();
+    int n_workers = _g1->workers()->total_workers();
+    size_t cs_spans = cs_size / n_workers;
+    size_t ind      = cs_spans * worker_i;
+    for (size_t i = 0; i < ind; i++)
+      result = result->next_in_collection_set();
+  }
+  return result;
+}
+
+void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
+  double rs_time_start = os::elapsedTime();
+  HeapRegion *startRegion = calculateStartRegion(worker_i);
+
+  BufferingOopsInHeapRegionClosure boc(oc);
+  ScanRSClosure scanRScl(&boc, worker_i);
+  _g1->collection_set_iterate_from(startRegion, &scanRScl);
+  scanRScl.set_try_claimed();
+  _g1->collection_set_iterate_from(startRegion, &scanRScl);
+
+  boc.done();
+  double closure_app_time_sec = boc.closure_app_seconds();
+  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
+    closure_app_time_sec;
+  double closure_app_time_ms = closure_app_time_sec * 1000.0;
+
+  assert( _cards_scanned != NULL, "invariant" );
+  _cards_scanned[worker_i] = scanRScl.cards_done();
+
+  _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
+  _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  if (ParallelGCThreads > 0) {
+    // In this case, we called scanNewRefsRS and recorded the corresponding
+    // time.
+    double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
+    if (scan_new_refs_time_ms > 0.0) {
+      closure_app_time_ms += scan_new_refs_time_ms;
+    }
+  }
+  _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
+}
+
+void HRInto_G1RemSet::updateRS(int worker_i) {
+  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+
+  double start = os::elapsedTime();
+  _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
+
+  if (G1RSBarrierUseQueue && !cg1r->do_traversal()) {
+    // Apply the appropriate closure to all remaining log entries.
+    _g1->iterate_dirty_card_closure(false, worker_i);
+    // Now there should be no dirty cards.
+    if (G1RSLogCheckCardTable) {
+      CountNonCleanMemRegionClosure cl(_g1);
+      _ct_bs->mod_card_iterate(&cl);
+      // XXX This isn't true any more: keeping cards of young regions
+      // marked dirty broke it.  Need some reasonable fix.
+      guarantee(cl.n() == 0, "Card table should be clean.");
+    }
+  } else {
+    UpdateRSOutOfRegionClosure update_rs(_g1, worker_i);
+    _g1->heap_region_iterate(&update_rs);
+    // We did a traversal; no further one is necessary.
+    if (G1RSBarrierUseQueue) {
+      assert(cg1r->do_traversal(), "Or we shouldn't have gotten here.");
+      cg1r->set_pya_cancel();
+    }
+    if (_cg1r->use_cache()) {
+      _cg1r->clear_and_record_card_counts();
+      _cg1r->clear_hot_cache();
+    }
+  }
+  _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
+}
+
+#ifndef PRODUCT
+class PrintRSClosure : public HeapRegionClosure {
+  int _count;
+public:
+  PrintRSClosure() : _count(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+    _count += (int) hrrs->occupied();
+    if (hrrs->occupied() == 0) {
+      gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") "
+                          "has no remset entries\n",
+                          r->bottom(), r->end());
+    } else {
+      gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n",
+                          r->bottom(), r->end());
+      r->print();
+      hrrs->print();
+      gclog_or_tty->print("\nDone printing rem set\n");
+    }
+    return false;
+  }
+  int occupied() {return _count;}
+};
+#endif
+
+class CountRSSizeClosure: public HeapRegionClosure {
+  size_t _n;
+  size_t _tot;
+  size_t _max;
+  HeapRegion* _max_r;
+  enum {
+    N = 20,
+    MIN = 6
+  };
+  int _histo[N];
+public:
+  CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
+    for (int i = 0; i < N; i++) _histo[i] = 0;
+  }
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      size_t occ = r->rem_set()->occupied();
+      _n++;
+      _tot += occ;
+      if (occ > _max) {
+        _max = occ;
+        _max_r = r;
+      }
+      // Fit it into a histo bin.
+      int s = 1 << MIN;
+      int i = 0;
+      while (occ > (size_t) s && i < (N-1)) {
+        s = s << 1;
+        i++;
+      }
+      _histo[i]++;
+    }
+    return false;
+  }
+  size_t n() { return _n; }
+  size_t tot() { return _tot; }
+  size_t mx() { return _max; }
+  HeapRegion* mxr() { return _max_r; }
+  void print_histo() {
+    int mx = N;
+    while (mx >= 0) {
+      if (_histo[mx-1] > 0) break;
+      mx--;
+    }
+    gclog_or_tty->print_cr("Number of regions with given RS sizes:");
+    gclog_or_tty->print_cr("           <= %8d   %8d", 1 << MIN, _histo[0]);
+    for (int i = 1; i < mx-1; i++) {
+      gclog_or_tty->print_cr("  %8d  - %8d   %8d",
+                    (1 << (MIN + i - 1)) + 1,
+                    1 << (MIN + i),
+                    _histo[i]);
+    }
+    gclog_or_tty->print_cr("            > %8d   %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
+  }
+};
+
+void
+HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc,
+                                             int worker_i) {
+  double scan_new_refs_start_sec = os::elapsedTime();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
+  while (_new_refs[worker_i]->is_nonempty()) {
+    oop* p = _new_refs[worker_i]->pop();
+    oop obj = *p;
+    // *p was in the collection set when p was pushed on "_new_refs", but
+    // another thread may have processed this location from an RS, so it
+    // might not point into the CS any longer.  If so, it's obviously been
+    // processed, and we don't need to do anything further.
+    if (g1h->obj_in_cs(obj)) {
+      HeapRegion* r = g1h->heap_region_containing(p);
+
+      DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
+      assert(ParallelGCThreads > 1
+             || to->rem_set()->contains_reference(p),
+             "Invariant: pushed after being added."
+             "(Not reliable in parallel code.)");
+      oc->set_region(r);
+      // If "p" has already been processed concurrently, this is
+      // idempotent.
+      oc->do_oop(p);
+    }
+  }
+  _g1p->record_scan_new_refs_time(worker_i,
+                                  (os::elapsedTime() - scan_new_refs_start_sec)
+                                  * 1000.0);
+}
+
+void HRInto_G1RemSet::set_par_traversal(bool b) {
+  _par_traversal_in_progress = b;
+  HeapRegionRemSet::set_par_traversal(b);
+}
+
+void HRInto_G1RemSet::cleanupHRRS() {
+  HeapRegionRemSet::cleanup();
+}
+
+void
+HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+                                             int worker_i) {
+#if CARD_REPEAT_HISTO
+  ct_freq_update_histo_and_reset();
+#endif
+  if (worker_i == 0) {
+    _cg1r->clear_and_record_card_counts();
+  }
+
+  // Make this into a command-line flag...
+  if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
+    CountRSSizeClosure count_cl;
+    _g1->heap_region_iterate(&count_cl);
+    gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
+                  "max region is " PTR_FORMAT,
+                  count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
+                  count_cl.mx(), count_cl.mxr());
+    count_cl.print_histo();
+  }
+
+  if (ParallelGCThreads > 0) {
+    // This is a temporary change to serialize the update and scanning
+    // of remembered sets. There are some race conditions when this is
+    // done in parallel and they are causing failures. When we resolve
+    // said race conditions, we'll revert back to parallel remembered
+    // set updating and scanning. See CRs 6677707 and 6677708.
+    if (worker_i == 0) {
+      updateRS(worker_i);
+      scanNewRefsRS(oc, worker_i);
+      scanRS(oc, worker_i);
+    }
+  } else {
+    assert(worker_i == 0, "invariant");
+
+    updateRS(0);
+    scanRS(oc, 0);
+  }
+}
+
+void HRInto_G1RemSet::
+prepare_for_oops_into_collection_set_do() {
+#if G1_REM_SET_LOGGING
+  PrintRSClosure cl;
+  _g1->collection_set_iterate(&cl);
+#endif
+  cleanupHRRS();
+  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+  _g1->set_refine_cte_cl_concurrency(false);
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  dcqs.concatenate_logs();
+
+  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+  if (ParallelGCThreads > 0) {
+    set_par_traversal(true);
+    int n_workers = _g1->workers()->total_workers();
+    _seq_task->set_par_threads(n_workers);
+    for (uint i = 0; i < ParallelGCThreads; i++)
+      _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
+
+    if (cg1r->do_traversal()) {
+      updateRS(0);
+      // Have to do this again after updaters
+      cleanupHRRS();
+    }
+  }
+  guarantee( _cards_scanned == NULL, "invariant" );
+  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
+  _total_cards_scanned = 0;
+}
+
+
+class cleanUpIteratorsClosure : public HeapRegionClosure {
+  bool doHeapRegion(HeapRegion *r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+    hrrs->init_for_par_iteration();
+    return false;
+  }
+};
+
+void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
+  guarantee( _cards_scanned != NULL, "invariant" );
+  _total_cards_scanned = 0;
+  for (uint i = 0; i < n_workers(); ++i)
+    _total_cards_scanned += _cards_scanned[i];
+  FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
+  _cards_scanned = NULL;
+  // Cleanup after copy
+#if G1_REM_SET_LOGGING
+  PrintRSClosure cl;
+  _g1->heap_region_iterate(&cl);
+#endif
+  _g1->set_refine_cte_cl_concurrency(true);
+  cleanUpIteratorsClosure iterClosure;
+  _g1->collection_set_iterate(&iterClosure);
+  // Set all cards back to clean.
+  _g1->cleanUpCardTable();
+  if (ParallelGCThreads > 0) {
+    ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+    if (cg1r->do_traversal()) {
+      cg1r->cg1rThread()->set_do_traversal(false);
+    }
+    for (uint i = 0; i < ParallelGCThreads; i++) {
+      delete _new_refs[i];
+    }
+    set_par_traversal(false);
+  }
+  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+}
+
+class UpdateRSObjectClosure: public ObjectClosure {
+  UpdateRSOopClosure* _update_rs_oop_cl;
+public:
+  UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) :
+    _update_rs_oop_cl(update_rs_oop_cl) {}
+  void do_object(oop obj) {
+    obj->oop_iterate(_update_rs_oop_cl);
+  }
+
+};
+
+class ScrubRSClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+  CardTableModRefBS* _ctbs;
+public:
+  ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
+    _g1h(G1CollectedHeap::heap()),
+    _region_bm(region_bm), _card_bm(card_bm),
+    _ctbs(NULL)
+  {
+    ModRefBarrierSet* bs = _g1h->mr_bs();
+    guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
+    _ctbs = (CardTableModRefBS*)bs;
+  }
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
+    }
+    return false;
+  }
+};
+
+void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
+  ScrubRSClosure scrub_cl(region_bm, card_bm);
+  _g1->heap_region_iterate(&scrub_cl);
+}
+
+void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
+                                int worker_num, int claim_val) {
+  ScrubRSClosure scrub_cl(region_bm, card_bm);
+  _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
+}
+
+
+class ConcRefineRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+  ConcurrentGCThread* _cgc_thrd;
+  ConcurrentG1Refine* _cg1r;
+  unsigned _cards_processed;
+  UpdateRSOopClosure _update_rs_oop_cl;
+public:
+  ConcRefineRegionClosure(CardTableModRefBS* ctbs,
+                          ConcurrentG1Refine* cg1r,
+                          HRInto_G1RemSet* g1rs) :
+    _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()),
+    _update_rs_oop_cl(g1rs), _cards_processed(0),
+    _g1h(G1CollectedHeap::heap())
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set() &&
+        !r->continuesHumongous() &&
+        !r->is_young()) {
+      _update_rs_oop_cl.set_from(r);
+      UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
+
+      // For each run of dirty card in the region:
+      //   1) Clear the cards.
+      //   2) Process the range corresponding to the run, adding any
+      //      necessary RS entries.
+      // 1 must precede 2, so that a concurrent modification redirties the
+      // card.  If a processing attempt does not succeed, because it runs
+      // into an unparseable region, we will do binary search to find the
+      // beginning of the next parseable region.
+      HeapWord* startAddr = r->bottom();
+      HeapWord* endAddr = r->used_region().end();
+      HeapWord* lastAddr;
+      HeapWord* nextAddr;
+
+      for (nextAddr = lastAddr = startAddr;
+           nextAddr < endAddr;
+           nextAddr = lastAddr) {
+        MemRegion dirtyRegion;
+
+        // Get and clear dirty region from card table
+        MemRegion next_mr(nextAddr, endAddr);
+        dirtyRegion =
+          _ctbs->dirty_card_range_after_reset(
+                           next_mr,
+                           true, CardTableModRefBS::clean_card_val());
+        assert(dirtyRegion.start() >= nextAddr,
+               "returned region inconsistent?");
+
+        if (!dirtyRegion.is_empty()) {
+          HeapWord* stop_point =
+            r->object_iterate_mem_careful(dirtyRegion,
+                                          &update_rs_obj_cl);
+          if (stop_point == NULL) {
+            lastAddr = dirtyRegion.end();
+            _cards_processed +=
+              (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words);
+          } else {
+            // We're going to skip one or more cards that we can't parse.
+            HeapWord* next_parseable_card =
+              r->next_block_start_careful(stop_point);
+            // Round this up to a card boundary.
+            next_parseable_card =
+              _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card));
+            // Now we invalidate the intervening cards so we'll see them
+            // again.
+            MemRegion remaining_dirty =
+              MemRegion(stop_point, dirtyRegion.end());
+            MemRegion skipped =
+              MemRegion(stop_point, next_parseable_card);
+            _ctbs->invalidate(skipped.intersection(remaining_dirty));
+
+            // Now start up again where we can parse.
+            lastAddr = next_parseable_card;
+
+            // Count how many we did completely.
+            _cards_processed +=
+              (stop_point - dirtyRegion.start()) /
+              CardTableModRefBS::card_size_in_words;
+          }
+          // Allow interruption at regular intervals.
+          // (Might need to make them more regular, if we get big
+          // dirty regions.)
+          if (_cgc_thrd != NULL) {
+            if (_cgc_thrd->should_yield()) {
+              _cgc_thrd->yield();
+              switch (_cg1r->get_pya()) {
+              case PYA_continue:
+                // This may have changed: re-read.
+                endAddr = r->used_region().end();
+                continue;
+              case PYA_restart: case PYA_cancel:
+                return true;
+              }
+            }
+          }
+        } else {
+          break;
+        }
+      }
+    }
+    // A good yield opportunity.
+    if (_cgc_thrd != NULL) {
+      if (_cgc_thrd->should_yield()) {
+        _cgc_thrd->yield();
+        switch (_cg1r->get_pya()) {
+        case PYA_restart: case PYA_cancel:
+          return true;
+        default:
+          break;
+        }
+
+      }
+    }
+    return false;
+  }
+
+  unsigned cards_processed() { return _cards_processed; }
+};
+
+
+void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) {
+  ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this);
+  _g1->heap_region_iterate(&cr_cl);
+  _conc_refine_traversals++;
+  _conc_refine_cards += cr_cl.cards_processed();
+}
+
+static IntHistogram out_of_histo(50, 50);
+
+
+
+void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
+  // If the card is no longer dirty, nothing to do.
+  if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
+
+  // Construct the region representing the card.
+  HeapWord* start = _ct_bs->addr_for(card_ptr);
+  // And find the region containing it.
+  HeapRegion* r = _g1->heap_region_containing(start);
+  if (r == NULL) {
+    guarantee(_g1->is_in_permanent(start), "Or else where?");
+    return;  // Not in the G1 heap (might be in perm, for example.)
+  }
+  // Why do we have to check here whether a card is on a young region,
+  // given that we dirty young regions and, as a result, the
+  // post-barrier is supposed to filter them out and never to enqueue
+  // them? When we allocate a new region as the "allocation region" we
+  // actually dirty its cards after we release the lock, since card
+  // dirtying while holding the lock was a performance bottleneck. So,
+  // as a result, it is possible for other threads to actually
+  // allocate objects in the region (after the acquire the lock)
+  // before all the cards on the region are dirtied. This is unlikely,
+  // and it doesn't happen often, but it can happen. So, the extra
+  // check below filters out those cards.
+  if (r->is_young()) {
+    return;
+  }
+  // While we are processing RSet buffers during the collection, we
+  // actually don't want to scan any cards on the collection set,
+  // since we don't want to update remebered sets with entries that
+  // point into the collection set, given that live objects from the
+  // collection set are about to move and such entries will be stale
+  // very soon. This change also deals with a reliability issue which
+  // involves scanning a card in the collection set and coming across
+  // an array that was being chunked and looking malformed. Note,
+  // however, that if evacuation fails, we have to scan any objects
+  // that were not moved and create any missing entries.
+  if (r->in_collection_set()) {
+    return;
+  }
+
+  // Should we defer it?
+  if (_cg1r->use_cache()) {
+    card_ptr = _cg1r->cache_insert(card_ptr);
+    // If it was not an eviction, nothing to do.
+    if (card_ptr == NULL) return;
+
+    // OK, we have to reset the card start, region, etc.
+    start = _ct_bs->addr_for(card_ptr);
+    r = _g1->heap_region_containing(start);
+    if (r == NULL) {
+      guarantee(_g1->is_in_permanent(start), "Or else where?");
+      return;  // Not in the G1 heap (might be in perm, for example.)
+    }
+    guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
+  }
+
+  HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
+  MemRegion dirtyRegion(start, end);
+
+#if CARD_REPEAT_HISTO
+  init_ct_freq_table(_g1->g1_reserved_obj_bytes());
+  ct_freq_note_card(_ct_bs->index_for(start));
+#endif
+
+  UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
+  update_rs_oop_cl.set_from(r);
+  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
+
+  // Undirty the card.
+  *card_ptr = CardTableModRefBS::clean_card_val();
+  // We must complete this write before we do any of the reads below.
+  OrderAccess::storeload();
+  // And process it, being careful of unallocated portions of TLAB's.
+  HeapWord* stop_point =
+    r->oops_on_card_seq_iterate_careful(dirtyRegion,
+                                        &filter_then_update_rs_oop_cl);
+  // If stop_point is non-null, then we encountered an unallocated region
+  // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
+  // card and re-enqueue: if we put off the card until a GC pause, then the
+  // unallocated portion will be filled in.  Alternatively, we might try
+  // the full complexity of the technique used in "regular" precleaning.
+  if (stop_point != NULL) {
+    // The card might have gotten re-dirtied and re-enqueued while we
+    // worked.  (In fact, it's pretty likely.)
+    if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+      *card_ptr = CardTableModRefBS::dirty_card_val();
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      DirtyCardQueue* sdcq =
+        JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
+      sdcq->enqueue(card_ptr);
+    }
+  } else {
+    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
+    _conc_refine_cards++;
+  }
+}
+
+class HRRSStatsIter: public HeapRegionClosure {
+  size_t _occupied;
+  size_t _total_mem_sz;
+  size_t _max_mem_sz;
+  HeapRegion* _max_mem_sz_region;
+public:
+  HRRSStatsIter() :
+    _occupied(0),
+    _total_mem_sz(0),
+    _max_mem_sz(0),
+    _max_mem_sz_region(NULL)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    size_t mem_sz = r->rem_set()->mem_size();
+    if (mem_sz > _max_mem_sz) {
+      _max_mem_sz = mem_sz;
+      _max_mem_sz_region = r;
+    }
+    _total_mem_sz += mem_sz;
+    size_t occ = r->rem_set()->occupied();
+    _occupied += occ;
+    return false;
+  }
+  size_t total_mem_sz() { return _total_mem_sz; }
+  size_t max_mem_sz() { return _max_mem_sz; }
+  size_t occupied() { return _occupied; }
+  HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
+};
+
+void HRInto_G1RemSet::print_summary_info() {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  ConcurrentG1RefineThread* cg1r_thrd =
+    g1->concurrent_g1_refine()->cg1rThread();
+
+#if CARD_REPEAT_HISTO
+  gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
+  gclog_or_tty->print_cr("  # of repeats --> # of cards with that number.");
+  card_repeat_count.print_on(gclog_or_tty);
+#endif
+
+  if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
+    gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
+    gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
+    out_of_histo.print_on(gclog_or_tty);
+  }
+  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in "
+                "%5.2fs.",
+                _conc_refine_cards, cg1r_thrd->vtime_accum());
+
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  jint tot_processed_buffers =
+    dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
+  gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
+  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS thread.",
+                dcqs.processed_buffers_rs_thread(),
+                100.0*(float)dcqs.processed_buffers_rs_thread()/
+                (float)tot_processed_buffers);
+  gclog_or_tty->print_cr("     %8d (%5.1f%%) by mutator threads.",
+                dcqs.processed_buffers_mut(),
+                100.0*(float)dcqs.processed_buffers_mut()/
+                (float)tot_processed_buffers);
+  gclog_or_tty->print_cr("   Did %d concurrent refinement traversals.",
+                _conc_refine_traversals);
+  if (!G1RSBarrierUseQueue) {
+    gclog_or_tty->print_cr("   Scanned %8.2f cards/traversal.",
+                  _conc_refine_traversals > 0 ?
+                  (float)_conc_refine_cards/(float)_conc_refine_traversals :
+                  0);
+  }
+  gclog_or_tty->print_cr("");
+  if (G1UseHRIntoRS) {
+    HRRSStatsIter blk;
+    g1->heap_region_iterate(&blk);
+    gclog_or_tty->print_cr("  Total heap region rem set sizes = " SIZE_FORMAT "K."
+                           "  Max = " SIZE_FORMAT "K.",
+                           blk.total_mem_sz()/K, blk.max_mem_sz()/K);
+    gclog_or_tty->print_cr("  Static structures = " SIZE_FORMAT "K,"
+                           " free_lists = " SIZE_FORMAT "K.",
+                           HeapRegionRemSet::static_mem_size()/K,
+                           HeapRegionRemSet::fl_mem_size()/K);
+    gclog_or_tty->print_cr("    %d occupied cards represented.",
+                           blk.occupied());
+    gclog_or_tty->print_cr("    Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
+                           " %s, cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
+                           blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
+                           (blk.max_mem_sz_region()->popular() ? "POP" : ""),
+                           (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
+                           (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
+    gclog_or_tty->print_cr("    Did %d coarsenings.",
+                  HeapRegionRemSet::n_coarsenings());
+
+  }
+}
+void HRInto_G1RemSet::prepare_for_verify() {
+  if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) {
+    cleanupHRRS();
+    _g1->set_refine_cte_cl_concurrency(false);
+    if (SafepointSynchronize::is_at_safepoint()) {
+      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+      dcqs.concatenate_logs();
+    }
+    bool cg1r_use_cache = _cg1r->use_cache();
+    _cg1r->set_use_cache(false);
+    updateRS(0);
+    _cg1r->set_use_cache(cg1r_use_cache);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A G1RemSet provides ways of iterating over pointers into a selected
+// collection set.
+
+class G1CollectedHeap;
+class CardTableModRefBarrierSet;
+class HRInto_G1RemSet;
+class ConcurrentG1Refine;
+
+class G1RemSet {
+protected:
+  G1CollectedHeap* _g1;
+
+  unsigned _conc_refine_traversals;
+  unsigned _conc_refine_cards;
+
+  size_t n_workers();
+
+public:
+  G1RemSet(G1CollectedHeap* g1) :
+    _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0)
+  {}
+
+  // Invoke "blk->do_oop" on all pointers into the CS in object in regions
+  // outside the CS (having invoked "blk->set_region" to set the "from"
+  // region correctly beforehand.) The "worker_i" param is for the
+  // parallel case where the number of the worker thread calling this
+  // function can be helpful in partitioning the work to be done. It
+  // should be the same as the "i" passed to the calling thread's
+  // work(i) function. In the sequential case this param will be ingored.
+  virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                           int worker_i) = 0;
+
+  // Prepare for and cleanup after an oops_into_collection_set_do
+  // call.  Must call each of these once before and after (in sequential
+  // code) any threads call oops into collection set do.  (This offers an
+  // opportunity to sequential setup and teardown of structures needed by a
+  // parallel iteration over the CS's RS.)
+  virtual void prepare_for_oops_into_collection_set_do() = 0;
+  virtual void cleanup_after_oops_into_collection_set_do() = 0;
+
+  // If "this" is of the given subtype, return "this", else "NULL".
+  virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from")
+  // has changed to its new value.
+  virtual void write_ref(HeapRegion* from, oop* p) = 0;
+  virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
+
+  // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
+  // or card, respectively, such that a region or card with a corresponding
+  // 0 bit contains no part of any live object.  Eliminates any remembered
+  // set entries that correspond to dead heap ranges.
+  virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0;
+  // Like the above, but assumes is called in parallel: "worker_num" is the
+  // parallel thread id of the current thread, and "claim_val" is the
+  // value that should be used to claim heap regions.
+  virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                         int worker_num, int claim_val) = 0;
+
+  // Do any "refinement" activity that might be appropriate to the given
+  // G1RemSet.  If "refinement" has iterateive "passes", do one pass.
+  // If "t" is non-NULL, it is the thread performing the refinement.
+  // Default implementation does nothing.
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
+
+  // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
+  // join and leave around parts that must be atomic wrt GC.  (NULL means
+  // being done at a safepoint.)
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
+
+  unsigned conc_refine_cards() { return _conc_refine_cards; }
+
+  // Print any relevant summary info.
+  virtual void print_summary_info() {}
+
+  // Prepare remebered set for verification.
+  virtual void prepare_for_verify() {};
+};
+
+
+// The simplest possible G1RemSet: iterates over all objects in non-CS
+// regions, searching for pointers into the CS.
+class StupidG1RemSet: public G1RemSet {
+public:
+  StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {}
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do() {}
+  void cleanup_after_oops_into_collection_set_do() {}
+
+  // Nothing is necessary in the version below.
+  void write_ref(HeapRegion* from, oop* p) {}
+  void par_write_ref(HeapRegion* from, oop* p, int tid) {}
+
+  void scrub(BitMap* region_bm, BitMap* card_bm) {}
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val) {}
+
+};
+
+// A G1RemSet in which each heap region has a rem set that records the
+// external heap references into it.  Uses a mod ref bs to track updates,
+// so that they can be used to update the individual region remsets.
+
+class HRInto_G1RemSet: public G1RemSet {
+protected:
+  enum SomePrivateConstants {
+    UpdateRStoMergeSync  = 0,
+    MergeRStoDoDirtySync = 1,
+    DoDirtySync          = 2,
+    LastSync             = 3,
+
+    SeqTask              = 0,
+    NumSeqTasks          = 1
+  };
+
+  CardTableModRefBS*             _ct_bs;
+  SubTasksDone*                  _seq_task;
+  G1CollectorPolicy* _g1p;
+
+  ConcurrentG1Refine* _cg1r;
+
+  size_t*             _cards_scanned;
+  size_t              _total_cards_scanned;
+
+  // _par_traversal_in_progress is "true" iff a parallel traversal is in
+  // progress.  If so, then cards added to remembered sets should also have
+  // their references into the collection summarized in "_new_refs".
+  bool _par_traversal_in_progress;
+  void set_par_traversal(bool b);
+  GrowableArray<oop*>** _new_refs;
+
+public:
+  // This is called to reset dual hash tables after the gc pause
+  // is finished and the initial hash table is no longer being
+  // scanned.
+  void cleanupHRRS();
+
+  HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
+  ~HRInto_G1RemSet();
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do();
+  void cleanup_after_oops_into_collection_set_do();
+  void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void updateRS(int worker_i);
+  HeapRegion* calculateStartRegion(int i);
+
+  HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
+
+  CardTableModRefBS* ct_bs() { return _ct_bs; }
+  size_t cardsScanned() { return _total_cards_scanned; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from",
+  // which is required to be non-NULL) has changed to a new non-NULL value.
+  inline void write_ref(HeapRegion* from, oop* p);
+  // The "_nv" version is the same; it exists just so that it is not virtual.
+  inline void write_ref_nv(HeapRegion* from, oop* p);
+
+  inline bool self_forwarded(oop obj);
+  inline void par_write_ref(HeapRegion* from, oop* p, int tid);
+
+  void scrub(BitMap* region_bm, BitMap* card_bm);
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val);
+
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
+
+  virtual void print_summary_info();
+  virtual void prepare_for_verify();
+};
+
+#define G1_REM_SET_LOGGING 0
+
+class CountNonCleanMemRegionClosure: public MemRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+  HeapWord* _start_first;
+public:
+  CountNonCleanMemRegionClosure(G1CollectedHeap* g1) :
+    _g1(g1), _n(0), _start_first(NULL)
+  {}
+  void do_MemRegion(MemRegion mr);
+  int n() { return _n; };
+  HeapWord* start_first() { return _start_first; }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline size_t G1RemSet::n_workers() {
+  if (_g1->workers() != NULL) {
+    return _g1->workers()->total_workers();
+  } else {
+    return 1;
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
+  oop obj = *p;
+  assert(from != NULL && from->is_in_reserved(p),
+         "p is not in a from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  if (from != to && to != NULL) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
+  write_ref_nv(from, p);
+}
+
+inline bool HRInto_G1RemSet::self_forwarded(oop obj) {
+  bool result =  (obj->is_forwarded() && (obj->forwardee()== obj));
+  return result;
+}
+
+inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
+  oop obj = *p;
+#ifdef ASSERT
+  // can't do because of races
+  // assert(obj == NULL || obj->is_oop(), "expected an oop");
+
+  // Do the safe subset of is_oop
+  if (obj != NULL) {
+#ifdef CHECK_UNHANDLED_OOPS
+    oopDesc* o = obj.obj();
+#else
+    oopDesc* o = obj;
+#endif // CHECK_UNHANDLED_OOPS
+    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  }
+#endif // ASSERT
+  assert(from == NULL || from->is_in_reserved(p),
+         "p is not in from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  // The test below could be optimized by applying a bit op to to and from.
+  if (to != NULL && from != NULL && from != to) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p, tid)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+    // There is a tricky infinite loop if we keep pushing
+    // self forwarding pointers onto our _new_refs list.
+    if (_par_traversal_in_progress &&
+        to->in_collection_set() && !self_forwarded(obj)) {
+      _new_refs[tid]->push(p);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1SATBCardTableModRefBS.cpp.incl"
+
+G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap,
+                                                 int max_covered_regions) :
+    CardTableModRefBSForCTRS(whole_heap, max_covered_regions)
+{
+  _kind = G1SATBCT;
+}
+
+
+void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  Thread* thr = Thread::current();
+  if (thr->is_Java_thread()) {
+    JavaThread* jt = (JavaThread*)thr;
+    jt->satb_mark_queue().enqueue(pre_val);
+  } else {
+    MutexLocker x(Shared_SATB_Q_lock);
+    JavaThread::satb_mark_queue_set().shared_satb_queue()->enqueue(pre_val);
+  }
+}
+
+// When we know the current java thread:
+void
+G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field,
+                                                    oop newVal,
+                                                    JavaThread* jt) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop preVal = *(oop*)field;
+  if (preVal != NULL) {
+    jt->satb_mark_queue().enqueue(preVal);
+  }
+}
+
+void
+G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop* elem_ptr = (oop*)mr.start();
+  while ((HeapWord*)elem_ptr < mr.end()) {
+    oop elem = *elem_ptr;
+    if (elem != NULL) enqueue(elem);
+    elem_ptr++;
+  }
+}
+
+
+
+G1SATBCardTableLoggingModRefBS::
+G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                               int max_covered_regions) :
+  G1SATBCardTableModRefBS(whole_heap, max_covered_regions),
+  _dcqs(JavaThread::dirty_card_queue_set())
+{
+  _kind = G1SATBCTLogging;
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
+                                                     oop new_val) {
+  jbyte* byte = byte_for(field);
+  if (*byte != dirty_card) {
+    *byte = dirty_card;
+    Thread* thr = Thread::current();
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      jt->dirty_card_queue().enqueue(byte);
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      _dcqs.shared_dirty_card_queue()->enqueue(byte);
+    }
+  }
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_static(void* field,
+                                                       oop new_val) {
+  uintptr_t field_uint = (uintptr_t)field;
+  uintptr_t new_val_uint = (uintptr_t)new_val;
+  uintptr_t comb = field_uint ^ new_val_uint;
+  comb = comb >> HeapRegion::LogOfHRGrainBytes;
+  if (comb == 0) return;
+  if (new_val == NULL) return;
+  // Otherwise, log it.
+  G1SATBCardTableLoggingModRefBS* g1_bs =
+    (G1SATBCardTableLoggingModRefBS*)Universe::heap()->barrier_set();
+  g1_bs->write_ref_field_work(field, new_val);
+}
+
+void
+G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) {
+  jbyte* byte = byte_for(mr.start());
+  jbyte* last_byte = byte_for(mr.last());
+  Thread* thr = Thread::current();
+  if (whole_heap) {
+    while (byte <= last_byte) {
+      *byte = dirty_card;
+      byte++;
+    }
+  } else {
+    // Enqueue if necessary.
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          jt->dirty_card_queue().enqueue(byte);
+        }
+        byte++;
+      }
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          _dcqs.shared_dirty_card_queue()->enqueue(byte);
+        }
+        byte++;
+      }
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+class DirtyCardQueueSet;
+
+// This barrier is specialized to use a logging barrier to support
+// snapshot-at-the-beginning marking.
+
+class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
+private:
+  // Add "pre_val" to a set of objects that may have been disconnected from the
+  // pre-marking object graph.
+  static void enqueue(oop pre_val);
+
+public:
+  G1SATBCardTableModRefBS(MemRegion whole_heap,
+                          int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCT || CardTableModRefBS::is_a(bsn);
+  }
+
+  virtual bool has_write_ref_pre_barrier() { return true; }
+
+  // This notes that we don't need to access any BarrierSet data
+  // structures, so this can be called from a static context.
+  static void write_ref_field_pre_static(void* field, oop newVal) {
+    assert(!UseCompressedOops, "Else needs to be templatized");
+    oop preVal = *((oop*)field);
+    if (preVal != NULL) {
+      enqueue(preVal);
+    }
+  }
+
+  // When we know the current java thread:
+  static void write_ref_field_pre_static(void* field, oop newVal,
+                                         JavaThread* jt);
+
+  // We export this to make it available in cases where the static
+  // type of the barrier set is known.  Note that it is non-virtual.
+  inline void inline_write_ref_field_pre(void* field, oop newVal) {
+    write_ref_field_pre_static(field, newVal);
+  }
+
+  // This is the more general virtual version.
+  void write_ref_field_pre_work(void* field, oop new_val) {
+    inline_write_ref_field_pre(field, new_val);
+  }
+
+  virtual void write_ref_array_pre(MemRegion mr);
+
+};
+
+// Adds card-table logging to the post-barrier.
+// Usual invariant: all dirty cards are logged in the DirtyCardQueueSet.
+class G1SATBCardTableLoggingModRefBS: public G1SATBCardTableModRefBS {
+ private:
+  DirtyCardQueueSet& _dcqs;
+ public:
+  G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                                 int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCTLogging ||
+      G1SATBCardTableModRefBS::is_a(bsn);
+  }
+
+  void write_ref_field_work(void* field, oop new_val);
+
+  // Can be called from static contexts.
+  static void write_ref_field_static(void* field, oop new_val);
+
+  // NB: if you do a whole-heap invalidation, the "usual invariant" defined
+  // above no longer applies.
+  void invalidate(MemRegion mr, bool whole_heap = false);
+
+  void write_region_work(MemRegion mr)    { invalidate(mr); }
+  void write_ref_array_work(MemRegion mr) { invalidate(mr); }
+
+
+};
+
+
+#endif // SERIALGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1_globals.cpp.incl"
+
+G1_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \
+         MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG,     \
+         MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \
+         MATERIALIZE_NOTPRODUCT_FLAG,  \
+         MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Defines all globals flags used by the garbage-first compiler.
+//
+
+#define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \
+                                                                            \
+  product(intx, ParallelGCG1AllocBufferSize, 4*K,                           \
+          "Size of parallel G1 allocation buffers in to-space.")            \
+                                                                            \
+  product(intx, G1TimeSliceMS, 500,                                         \
+          "Time slice for MMU specification")                               \
+                                                                            \
+  product(intx, G1MaxPauseTimeMS, 200,                                      \
+          "Max GC time per MMU time slice")                                 \
+                                                                            \
+  product(intx, G1ConfidencePerc, 50,                                       \
+          "Confidence level for MMU/pause predictions")                     \
+                                                                            \
+  product(intx, G1MarkingOverheadPerc, 0,                                   \
+          "Overhead of concurrent marking")                                 \
+                                                                            \
+  product(bool, G1AccountConcurrentOverhead, false,                         \
+          "Whether soft real-time compliance in G1 will take into account"  \
+          "concurrent overhead")                                            \
+                                                                            \
+  product(intx, G1YoungGenSize, 0,                                          \
+          "Size of the G1 young generation, 0 is the adaptive policy")      \
+                                                                            \
+  product(bool, G1Gen, true,                                                \
+          "If true, it will enable the generational G1")                    \
+                                                                            \
+  develop(intx, G1GCPct, 10,                                                \
+          "The desired percent time spent on GC")                           \
+                                                                            \
+  product(intx, G1PolicyVerbose, 0,                                         \
+          "The verbosity level on G1 policy decisions")                     \
+                                                                            \
+  develop(bool, G1UseHRIntoRS, true,                                        \
+          "Determines whether the 'advanced' HR Into rem set is used.")     \
+                                                                            \
+  product(bool, G1VerifyRemSet, false,                                      \
+          "If true, verify the rem set functioning at each GC")             \
+                                                                            \
+  product(bool, G1VerifyConcMark, false,                                    \
+          "If true, verify the conc marking code at full GC time")          \
+                                                                            \
+  develop(intx, G1MarkingVerboseLevel, 0,                                   \
+          "Level (0-4) of verboseness of the marking code")                 \
+                                                                            \
+  develop(bool, G1VerifyConcMarkPrintReachable, true,                       \
+          "If conc mark verification fails, print reachable objects")       \
+                                                                            \
+  develop(bool, G1TraceMarkStackOverflow, false,                            \
+          "If true, extra debugging code for CM restart for ovflw.")        \
+                                                                            \
+  product(bool, G1VerifyMarkingInEvac, false,                               \
+          "If true, verify marking info during evacuation")                 \
+                                                                            \
+  develop(intx, G1PausesBtwnConcMark, -1,                                   \
+          "If positive, fixed number of pauses between conc markings")      \
+                                                                            \
+  product(intx, G1EfficiencyPctCausesMark, 80,                              \
+          "The cum gc efficiency since mark fall-off that causes "          \
+          "new marking")                                                    \
+                                                                            \
+  product(bool, TraceConcurrentMark, false,                                 \
+          "Trace concurrent mark")                                          \
+                                                                            \
+  product(bool, SummarizeG1ConcMark, false,                                 \
+          "Summarize concurrent mark info")                                 \
+                                                                            \
+  product(bool, SummarizeG1RSStats, false,                                  \
+          "Summarize remembered set processing info")                       \
+                                                                            \
+  product(bool, SummarizeG1ZFStats, false,                                  \
+          "Summarize zero-filling info")                                    \
+                                                                            \
+  product(bool, TraceG1Refine, false,                                       \
+          "Trace G1 concurrent refinement")                                 \
+                                                                            \
+  develop(bool, G1ConcMark, true,                                           \
+          "If true, run concurrent marking for G1")                         \
+                                                                            \
+  product(intx, G1CMStackSize, 2 * 1024 * 1024,                             \
+          "Size of the mark stack for concurrent marking.")                 \
+                                                                            \
+  product(intx, G1CMRegionStackSize, 1024 * 1024,                           \
+          "Size of the region stack for concurrent marking.")               \
+                                                                            \
+  develop(bool, G1ConcRefine, true,                                         \
+          "If true, run concurrent rem set refinement for G1")              \
+                                                                            \
+  develop(intx, G1ConcRefineTargTraversals, 4,                              \
+          "Number of concurrent refinement we try to achieve")              \
+                                                                            \
+  develop(intx, G1ConcRefineInitialDelta, 4,                                \
+          "Number of heap regions of alloc ahead of starting collection "   \
+          "pause to start concurrent refinement (initially)")               \
+                                                                            \
+  product(bool, G1SmoothConcRefine, true,                                   \
+          "Attempts to smooth out the overhead of concurrent refinement")   \
+                                                                            \
+  develop(bool, G1ConcZeroFill, true,                                       \
+          "If true, run concurrent zero-filling thread")                    \
+                                                                            \
+  develop(intx, G1ConcZFMaxRegions, 1,                                      \
+          "Stop zero-filling when # of zf'd regions reaches")               \
+                                                                            \
+  product(intx, G1SteadyStateUsed, 90,                                      \
+          "If non-0, try to maintain 'used' at this pct (of max)")          \
+                                                                            \
+  product(intx, G1SteadyStateUsedDelta, 30,                                 \
+          "If G1SteadyStateUsed is non-0, then do pause this number of "    \
+          "of percentage points earlier if no marking is in progress.")     \
+                                                                            \
+  develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
+          "If true, count frac of ptr writes with null pre-vals.")          \
+                                                                            \
+  product(intx, G1SATBLogBufferSize, 1*K,                                   \
+          "Number of entries in an SATB log buffer.")                       \
+                                                                            \
+  product(intx, G1SATBProcessCompletedThreshold, 20,                        \
+          "Number of completed buffers that triggers log processing.")      \
+                                                                            \
+  develop(intx, G1ExtraRegionSurvRate, 33,                                  \
+          "If the young survival rate is S, and there's room left in "      \
+          "to-space, we will allow regions whose survival rate is up to "   \
+          "S + (1 - S)*X, where X is this parameter (as a fraction.)")      \
+                                                                            \
+  develop(intx, G1InitYoungSurvRatio, 50,                                   \
+          "Expected Survival Rate for newly allocated bytes")               \
+                                                                            \
+  develop(bool, G1SATBPrintStubs, false,                                    \
+          "If true, print generated stubs for the SATB barrier")            \
+                                                                            \
+  product(intx, G1ExpandByPctOfAvail, 20,                                   \
+          "When expanding, % of uncommitted space to claim.")               \
+                                                                            \
+  develop(bool, G1RSBarrierRegionFilter, true,                              \
+          "If true, generate region filtering code in RS barrier")          \
+                                                                            \
+  develop(bool, G1RSBarrierNullFilter, true,                                \
+          "If true, generate null-pointer filtering code in RS barrier")    \
+                                                                            \
+  develop(bool, G1PrintCTFilterStats, false,                                \
+          "If true, print stats on RS filtering effectiveness")             \
+                                                                            \
+  develop(bool, G1RSBarrierUseQueue, true,                                  \
+          "If true, use queueing RS barrier")                               \
+                                                                            \
+  develop(bool, G1RSLogCheckCardTable, false,                               \
+          "If true, verify that no dirty cards remain after RS log "        \
+          "processing.")                                                    \
+                                                                            \
+  product(intx, G1MinPausesBetweenMarks, 2,                                 \
+          "Number of inefficient pauses necessary to trigger marking.")     \
+                                                                            \
+  product(intx, G1InefficientPausePct, 80,                                  \
+          "Threshold of an 'inefficient' pauses (as % of cum efficiency.")  \
+                                                                            \
+  product(intx, G1RSPopLimit, 32768,                                        \
+          "Limit that defines popularity.  Should go away! XXX")            \
+                                                                            \
+  develop(bool, G1RSCountHisto, false,                                      \
+          "If true, print a histogram of RS occupancies after each pause")  \
+                                                                            \
+  product(intx, G1ObjPopLimit, 256,                                         \
+          "Limit that defines popularity for an object.")                   \
+                                                                            \
+  product(bool, G1TraceFileOverwrite, false,                                \
+          "Allow the trace file to be overwritten")                         \
+                                                                            \
+  develop(intx, G1PrintRegionLivenessInfo, 0,                               \
+          "When > 0, print the occupancies of the <n> best and worst"       \
+          "regions.")                                                       \
+                                                                            \
+  develop(bool, G1TracePopularity, false,                                   \
+          "When true, provide detailed tracing of popularity.")             \
+                                                                            \
+  product(bool, G1SummarizePopularity, false,                               \
+          "When true, provide end-of-run-summarization of popularity.")     \
+                                                                            \
+  product(intx, G1NumPopularRegions, 1,                                     \
+          "Number of regions reserved to hold popular objects.  "           \
+          "Should go away later.")                                          \
+                                                                            \
+  develop(bool, G1PrintParCleanupStats, false,                              \
+          "When true, print extra stats about parallel cleanup.")           \
+                                                                            \
+  product(bool, G1DoAgeCohortChecks, false,                                 \
+          "When true, check well-formedness of age cohort structures.")     \
+                                                                            \
+  develop(bool, G1DisablePreBarrier, false,                                 \
+          "Disable generation of pre-barrier (i.e., marking barrier)   ")   \
+                                                                            \
+  develop(bool, G1DisablePostBarrier, false,                                \
+          "Disable generation of post-barrier (i.e., RS barrier)   ")       \
+                                                                            \
+  product(intx, G1DirtyCardQueueMax, 30,                                    \
+          "Maximum number of completed RS buffers before mutator threads "  \
+          "start processing them.")                                         \
+                                                                            \
+  develop(intx, G1ConcRSLogCacheSize, 10,                                   \
+          "Log base 2 of the length of conc RS hot-card cache.")            \
+                                                                            \
+  product(bool, G1ConcRSCountTraversals, false,                             \
+          "If true, gather data about the number of times CR traverses "    \
+          "cards ")                                                         \
+                                                                            \
+  product(intx, G1ConcRSHotCardLimit, 4,                                    \
+          "The threshold that defines (>=) a hot card.")                    \
+                                                                            \
+  develop(bool, G1PrintOopAppls, false,                                     \
+          "When true, print applications of closures to external locs.")    \
+                                                                            \
+  product(intx, G1LogRSRegionEntries, 7,                                    \
+          "Log_2 of max number of regions for which we keep bitmaps.")      \
+                                                                            \
+  develop(bool, G1RecordHRRSOops, false,                                    \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(bool, G1RecordHRRSEvents, false,                                  \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(intx, G1MaxVerifyFailures, -1,                                    \
+          "The maximum number of verification failrues to print.  "         \
+          "-1 means print all.")                                            \
+                                                                            \
+  develop(bool, G1ScrubRemSets, true,                                       \
+          "When true, do RS scrubbing after cleanup.")                      \
+                                                                            \
+  develop(bool, G1RSScrubVerbose, false,                                    \
+          "When true, do RS scrubbing with verbose output.")                \
+                                                                            \
+  develop(bool, G1YoungSurvRateVerbose, false,                              \
+          "print out the survival rate of young regions according to age.") \
+                                                                            \
+  develop(intx, G1YoungSurvRateNumRegionsSummary, 0,                        \
+          "the number of regions for which we'll print a surv rate "        \
+          "summary.")                                                       \
+                                                                            \
+  product(bool, G1UseScanOnlyPrefix, false,                                 \
+          "It determines whether the system will calculate an optimum "     \
+          "scan-only set.")                                                 \
+                                                                            \
+  product(intx, G1MinReservePerc, 10,                                       \
+          "It determines the minimum reserve we should have in the heap "   \
+          "to minimize the probability of promotion failure.")              \
+                                                                            \
+  product(bool, G1TraceRegions, false,                                      \
+          "If set G1 will print information on which regions are being "    \
+          "allocated and which are reclaimed.")                             \
+                                                                            \
+  develop(bool, G1HRRSUseSparseTable, true,                                 \
+          "When true, use sparse table to save space.")                     \
+                                                                            \
+  develop(bool, G1HRRSFlushLogBuffersOnVerify, false,                       \
+          "Forces flushing of log buffers before verification.")            \
+                                                                            \
+  product(intx, G1MaxSurvivorRegions, 0,                                    \
+          "The maximum number of survivor regions")
+
+G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The following OopClosure types get specialized versions of
+// "oop_oop_iterate" that invoke the closures' do_oop methods
+// non-virtually, using a mechanism defined in this file.  Extend these
+// macros in the obvious way to add specializations for new closures.
+
+// Forward declarations.
+enum G1Barrier {
+  G1BarrierNone, G1BarrierRS, G1BarrierEvac
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure;
+class G1ParScanClosure;
+
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+class FilterIntoCSClosure;
+class FilterOutOfRegionClosure;
+class FilterInHeapRegionAndIntoCSClosure;
+class FilterAndMarkInHeapRegionAndIntoCSClosure;
+class G1ScanAndBalanceClosure;
+
+#ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
+#error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
+      f(G1ParScanHeapEvacClosure,_nv)                   \
+      f(G1ParScanClosure,_nv)                           \
+      f(FilterIntoCSClosure,_nv)                        \
+      f(FilterOutOfRegionClosure,_nv)                   \
+      f(FilterInHeapRegionAndIntoCSClosure,_nv)         \
+      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)  \
+      f(G1ScanAndBalanceClosure,_nv)
+
+#ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
+#error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,873 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegion.cpp.incl"
+
+HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1,
+                                 HeapRegion* hr, OopClosure* cl,
+                                 CardTableModRefBS::PrecisionStyle precision,
+                                 FilterKind fk) :
+  ContiguousSpaceDCTOC(hr, cl, precision, NULL),
+  _hr(hr), _fk(fk), _g1(g1)
+{}
+
+FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
+                                                   OopClosure* oc) :
+  _r_bottom(r->bottom()), _r_end(r->end()),
+  _oc(oc), _out_of_region(0)
+{}
+
+class VerifyLiveClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _bs;
+  oop _containing_obj;
+  bool _failures;
+  int _n_failures;
+public:
+  VerifyLiveClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
+    _failures(false), _n_failures(0)
+  {
+    BarrierSet* bs = _g1h->barrier_set();
+    if (bs->is_a(BarrierSet::CardTableModRef))
+      _bs = (CardTableModRefBS*)bs;
+  }
+
+  void set_containing_obj(oop obj) {
+    _containing_obj = obj;
+  }
+
+  bool failures() { return _failures; }
+  int n_failures() { return _n_failures; }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    assert(_containing_obj != NULL, "Precondition");
+    assert(!_g1h->is_obj_dead(_containing_obj), "Precondition");
+    oop obj = *p;
+    if (obj != NULL) {
+      bool failed = false;
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) {
+        if (!_failures) {
+          gclog_or_tty->print_cr("");
+          gclog_or_tty->print_cr("----------");
+        }
+        if (!_g1h->is_in_closed_subset(obj)) {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to obj "PTR_FORMAT
+                        " not in the heap.",
+                        p, (void*) _containing_obj, (void*) obj);
+        } else {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to dead obj "PTR_FORMAT".",
+                        p, (void*) _containing_obj, (void*) obj);
+        }
+        gclog_or_tty->print_cr("Live obj:");
+        _containing_obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("Bad referent:");
+        obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("----------");
+        _failures = true;
+        failed = true;
+        _n_failures++;
+      }
+
+      if (!_g1h->full_collection()) {
+        HeapRegion* from = _g1h->heap_region_containing(p);
+        HeapRegion* to   = _g1h->heap_region_containing(*p);
+        if (from != NULL && to != NULL &&
+            from != to &&
+            !to->popular() &&
+            !to->isHumongous()) {
+          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
+          jbyte cv_field = *_bs->byte_for_const(p);
+          const jbyte dirty = CardTableModRefBS::dirty_card_val();
+
+          bool is_bad = !(from->is_young()
+                          || to->rem_set()->contains_reference(p)
+                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
+                              (_containing_obj->is_objArray() ?
+                                  cv_field == dirty
+                               : cv_obj == dirty || cv_field == dirty));
+          if (is_bad) {
+            if (!_failures) {
+              gclog_or_tty->print_cr("");
+              gclog_or_tty->print_cr("----------");
+            }
+            gclog_or_tty->print_cr("Missing rem set entry:");
+            gclog_or_tty->print_cr("Field "PTR_FORMAT
+                          " of obj "PTR_FORMAT
+                          ", in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT"),",
+                          p, (void*) _containing_obj,
+                          from->hrs_index(),
+                          from->bottom(),
+                          from->end());
+            _containing_obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("points to obj "PTR_FORMAT
+                          " in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT").",
+                          (void*) obj, to->hrs_index(),
+                          to->bottom(), to->end());
+            obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
+                          cv_obj, cv_field);
+            gclog_or_tty->print_cr("----------");
+            _failures = true;
+            if (!failed) _n_failures++;
+          }
+        }
+      }
+    }
+  }
+};
+
+template<class ClosureType>
+HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
+                               HeapRegion* hr,
+                               HeapWord* cur, HeapWord* top) {
+  oop cur_oop = oop(cur);
+  int oop_size = cur_oop->size();
+  HeapWord* next_obj = cur + oop_size;
+  while (next_obj < top) {
+    // Keep filtering the remembered set.
+    if (!g1h->is_obj_dead(cur_oop, hr)) {
+      // Bottom lies entirely below top, so we can call the
+      // non-memRegion version of oop_iterate below.
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        cur_oop->oop_iterate(&vl_cl);
+      }
+#endif
+      cur_oop->oop_iterate(cl);
+    }
+    cur = next_obj;
+    cur_oop = oop(cur);
+    oop_size = cur_oop->size();
+    next_obj = cur + oop_size;
+  }
+  return cur;
+}
+
+void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr,
+                                              HeapWord* bottom,
+                                              HeapWord* top,
+                                              OopClosure* cl) {
+  G1CollectedHeap* g1h = _g1;
+
+  int oop_size;
+
+  OopClosure* cl2 = cl;
+  FilterIntoCSClosure intoCSFilt(this, g1h, cl);
+  FilterOutOfRegionClosure outOfRegionFilt(_hr, cl);
+  switch (_fk) {
+  case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
+  case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
+  }
+
+  // Start filtering what we add to the remembered set. If the object is
+  // not considered dead, either because it is marked (in the mark bitmap)
+  // or it was allocated after marking finished, then we add it. Otherwise
+  // we can safely ignore the object.
+  if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+    if (G1VerifyMarkingInEvac) {
+      VerifyLiveClosure vl_cl(g1h);
+      oop(bottom)->oop_iterate(&vl_cl, mr);
+    }
+#endif
+    oop_size = oop(bottom)->oop_iterate(cl2, mr);
+  } else {
+    oop_size = oop(bottom)->size();
+  }
+
+  bottom += oop_size;
+
+  if (bottom < top) {
+    // We replicate the loop below for several kinds of possible filters.
+    switch (_fk) {
+    case NoFilterKind:
+      bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top);
+      break;
+    case IntoCSFilterKind: {
+      FilterIntoCSClosure filt(this, g1h, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    case OutOfRegionFilterKind: {
+      FilterOutOfRegionClosure filt(_hr, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+
+    // Last object. Need to do dead-obj filtering here too.
+    if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        oop(bottom)->oop_iterate(&vl_cl, mr);
+      }
+#endif
+      oop(bottom)->oop_iterate(cl2, mr);
+    }
+  }
+}
+
+void HeapRegion::reset_after_compaction() {
+  G1OffsetTableContigSpace::reset_after_compaction();
+  // After a compaction the mark bitmap is invalid, so we must
+  // treat all objects as being inside the unmarked area.
+  zero_marked_bytes();
+  init_top_at_mark_start();
+}
+
+DirtyCardToOopClosure*
+HeapRegion::new_dcto_closure(OopClosure* cl,
+                             CardTableModRefBS::PrecisionStyle precision,
+                             HeapRegionDCTOC::FilterKind fk) {
+  return new HeapRegionDCTOC(G1CollectedHeap::heap(),
+                             this, cl, precision, fk);
+}
+
+void HeapRegion::hr_clear(bool par, bool clear_space) {
+  _humongous_type = NotHumongous;
+  _humongous_start_region = NULL;
+  _in_collection_set = false;
+  _is_gc_alloc_region = false;
+
+  // Age stuff (if parallel, this will be done separately, since it needs
+  // to be sequential).
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  set_young_index_in_cset(-1);
+  uninstall_surv_rate_group();
+  set_young_type(NotYoung);
+
+  // In case it had been the start of a humongous sequence, reset its end.
+  set_end(_orig_end);
+
+  if (!par) {
+    // If this is parallel, this will be done later.
+    HeapRegionRemSet* hrrs = rem_set();
+    if (hrrs != NULL) hrrs->clear();
+    _claimed = InitialClaimValue;
+  }
+  zero_marked_bytes();
+  set_sort_index(-1);
+  if ((uintptr_t)bottom() >= (uintptr_t)g1h->popular_object_boundary())
+    set_popular(false);
+
+  _offsets.resize(HeapRegion::GrainWords);
+  init_top_at_mark_start();
+  if (clear_space) clear(SpaceDecorator::Mangle);
+}
+
+// <PREDICTION>
+void HeapRegion::calc_gc_efficiency() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _gc_efficiency = (double) garbage_bytes() /
+                            g1h->predict_region_elapsed_time_ms(this, false);
+}
+// </PREDICTION>
+
+void HeapRegion::set_startsHumongous() {
+  _humongous_type = StartsHumongous;
+  _humongous_start_region = this;
+  assert(end() == _orig_end, "Should be normal before alloc.");
+}
+
+bool HeapRegion::claimHeapRegion(jint claimValue) {
+  jint current = _claimed;
+  if (current != claimValue) {
+    jint res = Atomic::cmpxchg(claimValue, &_claimed, current);
+    if (res == current) {
+      return true;
+    }
+  }
+  return false;
+}
+
+HeapWord* HeapRegion::next_block_start_careful(HeapWord* addr) {
+  HeapWord* low = addr;
+  HeapWord* high = end();
+  while (low < high) {
+    size_t diff = pointer_delta(high, low);
+    // Must add one below to bias toward the high amount.  Otherwise, if
+  // "high" were at the desired value, and "low" were one less, we
+    // would not converge on "high".  This is not symmetric, because
+    // we set "high" to a block start, which might be the right one,
+    // which we don't do for "low".
+    HeapWord* middle = low + (diff+1)/2;
+    if (middle == high) return high;
+    HeapWord* mid_bs = block_start_careful(middle);
+    if (mid_bs < addr) {
+      low = middle;
+    } else {
+      high = mid_bs;
+    }
+  }
+  assert(low == high && low >= addr, "Didn't work.");
+  return low;
+}
+
+void HeapRegion::set_next_on_unclean_list(HeapRegion* r) {
+  assert(r == NULL || r->is_on_unclean_list(), "Malformed unclean list.");
+  _next_in_special_set = r;
+}
+
+void HeapRegion::set_on_unclean_list(bool b) {
+  _is_on_unclean_list = b;
+}
+
+void HeapRegion::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
+  G1OffsetTableContigSpace::initialize(mr, false, mangle_space);
+  hr_clear(false/*par*/, clear_space);
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+HeapRegion::
+HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+                     MemRegion mr, bool is_zeroed)
+  : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed),
+    _next_fk(HeapRegionDCTOC::NoFilterKind),
+    _hrs_index(-1),
+    _humongous_type(NotHumongous), _humongous_start_region(NULL),
+    _in_collection_set(false), _is_gc_alloc_region(false),
+    _is_on_free_list(false), _is_on_unclean_list(false),
+    _next_in_special_set(NULL), _orig_end(NULL),
+    _claimed(InitialClaimValue), _evacuation_failed(false),
+    _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1),
+    _popularity(NotPopular),
+    _young_type(NotYoung), _next_young_region(NULL),
+    _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1),
+    _rem_set(NULL), _zfs(NotZeroFilled)
+{
+  _orig_end = mr.end();
+  // Note that initialize() will set the start of the unmarked area of the
+  // region.
+  this->initialize(mr, !is_zeroed, SpaceDecorator::Mangle);
+  set_top(bottom());
+  set_saved_mark();
+
+  _rem_set =  new HeapRegionRemSet(sharedOffsetArray, this);
+
+  assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
+  // In case the region is allocated during a pause, note the top.
+  // We haven't done any counting on a brand new region.
+  _top_at_conc_mark_count = bottom();
+}
+
+class NextCompactionHeapRegionClosure: public HeapRegionClosure {
+  const HeapRegion* _target;
+  bool _target_seen;
+  HeapRegion* _last;
+  CompactibleSpace* _res;
+public:
+  NextCompactionHeapRegionClosure(const HeapRegion* target) :
+    _target(target), _target_seen(false), _res(NULL) {}
+  bool doHeapRegion(HeapRegion* cur) {
+    if (_target_seen) {
+      if (!cur->isHumongous()) {
+        _res = cur;
+        return true;
+      }
+    } else if (cur == _target) {
+      _target_seen = true;
+    }
+    return false;
+  }
+  CompactibleSpace* result() { return _res; }
+};
+
+CompactibleSpace* HeapRegion::next_compaction_space() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // cast away const-ness
+  HeapRegion* r = (HeapRegion*) this;
+  NextCompactionHeapRegionClosure blk(r);
+  g1h->heap_region_iterate_from(r, &blk);
+  return blk.result();
+}
+
+void HeapRegion::set_continuesHumongous(HeapRegion* start) {
+  // The order is important here.
+  start->add_continuingHumongousRegion(this);
+  _humongous_type = ContinuesHumongous;
+  _humongous_start_region = start;
+}
+
+void HeapRegion::add_continuingHumongousRegion(HeapRegion* cont) {
+  // Must join the blocks of the current H region seq with the block of the
+  // added region.
+  offsets()->join_blocks(bottom(), cont->bottom());
+  arrayOop obj = (arrayOop)(bottom());
+  obj->set_length((int) (obj->length() + cont->capacity()/jintSize));
+  set_end(cont->end());
+  set_top(cont->end());
+}
+
+void HeapRegion::save_marks() {
+  set_saved_mark();
+}
+
+void HeapRegion::oops_in_mr_iterate(MemRegion mr, OopClosure* cl) {
+  HeapWord* p = mr.start();
+  HeapWord* e = mr.end();
+  oop obj;
+  while (p < e) {
+    obj = oop(p);
+    p += obj->oop_iterate(cl);
+  }
+  assert(p == e, "bad memregion: doesn't end on obj boundary");
+}
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \
+void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \
+  ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl);              \
+}
+SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN)
+
+
+void HeapRegion::oop_before_save_marks_iterate(OopClosure* cl) {
+  oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
+}
+
+#ifdef DEBUG
+HeapWord* HeapRegion::allocate(size_t size) {
+  jint state = zero_fill_state();
+  assert(!G1CollectedHeap::heap()->allocs_are_zero_filled() ||
+         zero_fill_is_allocated(),
+         "When ZF is on, only alloc in ZF'd regions");
+  return G1OffsetTableContigSpace::allocate(size);
+}
+#endif
+
+void HeapRegion::set_zero_fill_state_work(ZeroFillState zfs) {
+  assert(top() == bottom() || zfs == Allocated,
+         "Region must be empty, or we must be setting it to allocated.");
+  assert(ZF_mon->owned_by_self() ||
+         Universe::heap()->is_gc_active(),
+         "Must hold the lock or be a full GC to modify.");
+  _zfs = zfs;
+}
+
+void HeapRegion::set_zero_fill_complete() {
+  set_zero_fill_state_work(ZeroFilled);
+  if (ZF_mon->owned_by_self()) {
+    ZF_mon->notify_all();
+  }
+}
+
+
+void HeapRegion::ensure_zero_filled() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  ensure_zero_filled_locked();
+}
+
+void HeapRegion::ensure_zero_filled_locked() {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  bool should_ignore_zf = SafepointSynchronize::is_at_safepoint();
+  assert(should_ignore_zf || Heap_lock->is_locked(),
+         "Either we're in a GC or we're allocating a region.");
+  switch (zero_fill_state()) {
+  case HeapRegion::NotZeroFilled:
+    set_zero_fill_in_progress(Thread::current());
+    {
+      ZF_mon->unlock();
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      ZF_mon->lock_without_safepoint_check();
+    }
+    // A trap.
+    guarantee(zero_fill_state() == HeapRegion::ZeroFilling
+              && zero_filler() == Thread::current(),
+              "AHA!  Tell Dave D if you see this...");
+    set_zero_fill_complete();
+    // gclog_or_tty->print_cr("Did sync ZF.");
+    ConcurrentZFThread::note_sync_zfs();
+    break;
+  case HeapRegion::ZeroFilling:
+    if (should_ignore_zf) {
+      // We can "break" the lock and take over the work.
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      set_zero_fill_complete();
+      ConcurrentZFThread::note_sync_zfs();
+      break;
+    } else {
+      ConcurrentZFThread::wait_for_ZF_completed(this);
+    }
+  case HeapRegion::ZeroFilled:
+    // Nothing to do.
+    break;
+  case HeapRegion::Allocated:
+    guarantee(false, "Should not call on allocated regions.");
+  }
+  assert(zero_fill_state() == HeapRegion::ZeroFilled, "Post");
+}
+
+HeapWord*
+HeapRegion::object_iterate_mem_careful(MemRegion mr,
+                                                 ObjectClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  mr = mr.intersection(used_region());
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  assert(cur <= mr.start()
+         && (oop(cur)->klass() == NULL ||
+             cur + oop(cur)->size() > mr.start()),
+         "postcondition of block_start");
+  oop obj;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    } else if (!g1h->is_obj_dead(obj)) {
+      cl->do_object(obj);
+    }
+    if (cl->abort()) return cur;
+    // The check above must occur before the operation below, since an
+    // abort might invalidate the "size" operation.
+    cur += obj->size();
+  }
+  return NULL;
+}
+
+HeapWord*
+HeapRegion::
+oops_on_card_seq_iterate_careful(MemRegion mr,
+                                     FilterOutOfRegionClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If we're within a stop-world GC, then we might look at a card in a
+  // GC alloc region that extends onto a GC LAB, which may not be
+  // parseable.  Stop such at the "saved_mark" of the region.
+  if (G1CollectedHeap::heap()->is_gc_active()) {
+    mr = mr.intersection(used_region_at_save_marks());
+  } else {
+    mr = mr.intersection(used_region());
+  }
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  assert(cur <= mr.start(), "Postcondition");
+
+  while (cur <= mr.start()) {
+    if (oop(cur)->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    }
+    // Otherwise...
+    int sz = oop(cur)->size();
+    if (cur + sz > mr.start()) break;
+    // Otherwise, go on.
+    cur = cur + sz;
+  }
+  oop obj;
+  obj = oop(cur);
+  // If we finish this loop...
+  assert(cur <= mr.start()
+         && obj->klass() != NULL
+         && cur + obj->size() > mr.start(),
+         "Loop postcondition");
+  if (!g1h->is_obj_dead(obj)) {
+    obj->oop_iterate(cl, mr);
+  }
+
+  HeapWord* next;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    };
+    // Otherwise:
+    next = (cur + obj->size());
+    if (!g1h->is_obj_dead(obj)) {
+      if (next < mr.end()) {
+        obj->oop_iterate(cl);
+      } else {
+        // this obj spans the boundary.  If it's an array, stop at the
+        // boundary.
+        if (obj->is_objArray()) {
+          obj->oop_iterate(cl, mr);
+        } else {
+          obj->oop_iterate(cl);
+        }
+      }
+    }
+    cur = next;
+  }
+  return NULL;
+}
+
+void HeapRegion::print() const { print_on(gclog_or_tty); }
+void HeapRegion::print_on(outputStream* st) const {
+  if (isHumongous()) {
+    if (startsHumongous())
+      st->print(" HS");
+    else
+      st->print(" HC");
+  } else {
+    st->print("   ");
+  }
+  if (in_collection_set())
+    st->print(" CS");
+  else if (is_gc_alloc_region())
+    st->print(" A ");
+  else
+    st->print("   ");
+  if (is_young())
+    st->print(is_scan_only() ? " SO" : (is_survivor() ? " SU" : " Y "));
+  else
+    st->print("   ");
+  if (is_empty())
+    st->print(" F");
+  else
+    st->print("  ");
+  st->print(" %d", _gc_time_stamp);
+  G1OffsetTableContigSpace::print_on(st);
+}
+
+#define OBJ_SAMPLE_INTERVAL 0
+#define BLOCK_SAMPLE_INTERVAL 100
+
+// This really ought to be commoned up into OffsetTableContigSpace somehow.
+// We would need a mechanism to make that code skip dead objects.
+
+void HeapRegion::verify(bool allow_dirty) const {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  HeapWord* p = bottom();
+  HeapWord* prev_p = NULL;
+  int objs = 0;
+  int blocks = 0;
+  VerifyLiveClosure vl_cl(g1);
+  while (p < top()) {
+    size_t size = oop(p)->size();
+    if (blocks == BLOCK_SAMPLE_INTERVAL) {
+      guarantee(p == block_start_const(p + (size/2)),
+                "check offset computation");
+      blocks = 0;
+    } else {
+      blocks++;
+    }
+    if (objs == OBJ_SAMPLE_INTERVAL) {
+      oop obj = oop(p);
+      if (!g1->is_obj_dead(obj, this)) {
+        obj->verify();
+        vl_cl.set_containing_obj(obj);
+        obj->oop_iterate(&vl_cl);
+        if (G1MaxVerifyFailures >= 0
+            && vl_cl.n_failures() >= G1MaxVerifyFailures) break;
+      }
+      objs = 0;
+    } else {
+      objs++;
+    }
+    prev_p = p;
+    p += size;
+  }
+  HeapWord* rend = end();
+  HeapWord* rtop = top();
+  if (rtop < rend) {
+    guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop,
+              "check offset computation");
+  }
+  if (vl_cl.failures()) {
+    gclog_or_tty->print_cr("Heap:");
+    G1CollectedHeap::heap()->print();
+    gclog_or_tty->print_cr("");
+  }
+  if (G1VerifyConcMark &&
+      G1VerifyConcMarkPrintReachable &&
+      vl_cl.failures()) {
+    g1->concurrent_mark()->print_prev_bitmap_reachable();
+  }
+  guarantee(!vl_cl.failures(), "should not have had any failures");
+  guarantee(p == top(), "end of last object must match end of space");
+}
+
+// G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
+// away eventually.
+
+void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
+  // false ==> we'll do the clearing if there's clearing to be done.
+  ContiguousSpace::initialize(mr, false, mangle_space);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+  if (clear_space) clear(mangle_space);
+}
+
+void G1OffsetTableContigSpace::clear(bool mangle_space) {
+  ContiguousSpace::clear(mangle_space);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+}
+
+void G1OffsetTableContigSpace::set_bottom(HeapWord* new_bottom) {
+  Space::set_bottom(new_bottom);
+  _offsets.set_bottom(new_bottom);
+}
+
+void G1OffsetTableContigSpace::set_end(HeapWord* new_end) {
+  Space::set_end(new_end);
+  _offsets.resize(new_end - bottom());
+}
+
+void G1OffsetTableContigSpace::print() const {
+  print_short();
+  gclog_or_tty->print_cr(" [" INTPTR_FORMAT ", " INTPTR_FORMAT ", "
+                INTPTR_FORMAT ", " INTPTR_FORMAT ")",
+                bottom(), top(), _offsets.threshold(), end());
+}
+
+HeapWord* G1OffsetTableContigSpace::initialize_threshold() {
+  return _offsets.initialize_threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start,
+                                                    HeapWord* end) {
+  _offsets.alloc_block(start, end);
+  return _offsets.threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::saved_mark_word() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" );
+  if (_gc_time_stamp < g1h->get_gc_time_stamp())
+    return top();
+  else
+    return ContiguousSpace::saved_mark_word();
+}
+
+void G1OffsetTableContigSpace::set_saved_mark() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
+
+  if (_gc_time_stamp < curr_gc_time_stamp) {
+    // The order of these is important, as another thread might be
+    // about to start scanning this region. If it does so after
+    // set_saved_mark and before _gc_time_stamp = ..., then the latter
+    // will be false, and it will pick up top() as the high water mark
+    // of region. If it does so after _gc_time_stamp = ..., then it
+    // will pick up the right saved_mark_word() as the high water mark
+    // of the region. Either way, the behaviour will be correct.
+    ContiguousSpace::set_saved_mark();
+    _gc_time_stamp = curr_gc_time_stamp;
+    OrderAccess::fence();
+  }
+}
+
+G1OffsetTableContigSpace::
+G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                         MemRegion mr, bool is_zeroed) :
+  _offsets(sharedOffsetArray, mr),
+  _par_alloc_lock(Mutex::leaf, "OffsetTableContigSpace par alloc lock", true),
+  _gc_time_stamp(0)
+{
+  _offsets.set_space(this);
+  initialize(mr, !is_zeroed, SpaceDecorator::Mangle);
+}
+
+size_t RegionList::length() {
+  size_t len = 0;
+  HeapRegion* cur = hd();
+  DEBUG_ONLY(HeapRegion* last = NULL);
+  while (cur != NULL) {
+    len++;
+    DEBUG_ONLY(last = cur);
+    cur = get_next(cur);
+  }
+  assert(last == tl(), "Invariant");
+  return len;
+}
+
+void RegionList::insert_before_head(HeapRegion* r) {
+  assert(well_formed(), "Inv");
+  set_next(r, hd());
+  _hd = r;
+  _sz++;
+  if (tl() == NULL) _tl = r;
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::prepend_list(RegionList* new_list) {
+  assert(well_formed(), "Precondition");
+  assert(new_list->well_formed(), "Precondition");
+  HeapRegion* new_tl = new_list->tl();
+  if (new_tl != NULL) {
+    set_next(new_tl, hd());
+    _hd = new_list->hd();
+    _sz += new_list->sz();
+    if (tl() == NULL) _tl = new_list->tl();
+  } else {
+    assert(new_list->hd() == NULL && new_list->sz() == 0, "Inv");
+  }
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::delete_after(HeapRegion* r) {
+  assert(well_formed(), "Precondition");
+  HeapRegion* next = get_next(r);
+  assert(r != NULL, "Precondition");
+  HeapRegion* next_tl = get_next(next);
+  set_next(r, next_tl);
+  dec_sz();
+  if (next == tl()) {
+    assert(next_tl == NULL, "Inv");
+    _tl = r;
+  }
+  assert(well_formed(), "Inv");
+}
+
+HeapRegion* RegionList::pop() {
+  assert(well_formed(), "Inv");
+  HeapRegion* res = hd();
+  if (res != NULL) {
+    _hd = get_next(res);
+    _sz--;
+    set_next(res, NULL);
+    if (sz() == 0) _tl = NULL;
+  }
+  assert(well_formed(), "Inv");
+  return res;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,937 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+// A HeapRegion is the smallest piece of a G1CollectedHeap that
+// can be collected independently.
+
+// NOTE: Although a HeapRegion is a Space, its
+// Space::initDirtyCardClosure method must not be called.
+// The problem is that the existence of this method breaks
+// the independence of barrier sets from remembered sets.
+// The solution is to remove this method from the definition
+// of a Space.
+
+class CompactibleSpace;
+class ContiguousSpace;
+class HeapRegionRemSet;
+class HeapRegionRemSetIterator;
+class HeapRegion;
+
+// A dirty card to oop closure for heap regions. It
+// knows how to get the G1 heap and how to use the bitmap
+// in the concurrent marker used by G1 to filter remembered
+// sets.
+
+class HeapRegionDCTOC : public ContiguousSpaceDCTOC {
+public:
+  // Specification of possible DirtyCardToOopClosure filtering.
+  enum FilterKind {
+    NoFilterKind,
+    IntoCSFilterKind,
+    OutOfRegionFilterKind
+  };
+
+protected:
+  HeapRegion* _hr;
+  FilterKind _fk;
+  G1CollectedHeap* _g1;
+
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               OopClosure* cl);
+
+  // We don't specialize this for FilteringClosure; filtering is handled by
+  // the "FilterKind" mechanism.  But we provide this to avoid a compiler
+  // warning.
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               FilteringClosure* cl) {
+    HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top,
+                                                       (OopClosure*)cl);
+  }
+
+  // Get the actual top of the area on which the closure will
+  // operate, given where the top is assumed to be (the end of the
+  // memory region passed to do_MemRegion) and where the object
+  // at the top is assumed to start. For example, an object may
+  // start at the top but actually extend past the assumed top,
+  // in which case the top becomes the end of the object.
+  HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) {
+    return ContiguousSpaceDCTOC::get_actual_top(top, top_obj);
+  }
+
+  // Walk the given memory region from bottom to (actual) top
+  // looking for objects and applying the oop closure (_cl) to
+  // them. The base implementation of this treats the area as
+  // blocks, where a block may or may not be an object. Sub-
+  // classes should override this to provide more accurate
+  // or possibly more efficient walking.
+  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) {
+    Filtering_DCTOC::walk_mem_region(mr, bottom, top);
+  }
+
+public:
+  HeapRegionDCTOC(G1CollectedHeap* g1,
+                  HeapRegion* hr, OopClosure* cl,
+                  CardTableModRefBS::PrecisionStyle precision,
+                  FilterKind fk);
+};
+
+
+// The complicating factor is that BlockOffsetTable diverged
+// significantly, and we need functionality that is only in the G1 version.
+// So I copied that code, which led to an alternate G1 version of
+// OffsetTableContigSpace.  If the two versions of BlockOffsetTable could
+// be reconciled, then G1OffsetTableContigSpace could go away.
+
+// The idea behind time stamps is the following. Doing a save_marks on
+// all regions at every GC pause is time consuming (if I remember
+// well, 10ms or so). So, we would like to do that only for regions
+// that are GC alloc regions. To achieve this, we use time
+// stamps. For every evacuation pause, G1CollectedHeap generates a
+// unique time stamp (essentially a counter that gets
+// incremented). Every time we want to call save_marks on a region,
+// we set the saved_mark_word to top and also copy the current GC
+// time stamp to the time stamp field of the space. Reading the
+// saved_mark_word involves checking the time stamp of the
+// region. If it is the same as the current GC time stamp, then we
+// can safely read the saved_mark_word field, as it is valid. If the
+// time stamp of the region is not the same as the current GC time
+// stamp, then we instead read top, as the saved_mark_word field is
+// invalid. Time stamps (on the regions and also on the
+// G1CollectedHeap) are reset at every cleanup (we iterate over
+// the regions anyway) and at the end of a Full GC. The current scheme
+// that uses sequential unsigned ints will fail only if we have 4b
+// evacuation pauses between two cleanups, which is _highly_ unlikely.
+
+class G1OffsetTableContigSpace: public ContiguousSpace {
+  friend class VMStructs;
+ protected:
+  G1BlockOffsetArrayContigSpace _offsets;
+  Mutex _par_alloc_lock;
+  volatile unsigned _gc_time_stamp;
+
+ public:
+  // Constructor.  If "is_zeroed" is true, the MemRegion "mr" may be
+  // assumed to contain zeros.
+  G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                           MemRegion mr, bool is_zeroed = false);
+
+  void set_bottom(HeapWord* value);
+  void set_end(HeapWord* value);
+
+  virtual HeapWord* saved_mark_word() const;
+  virtual void set_saved_mark();
+  void reset_gc_time_stamp() { _gc_time_stamp = 0; }
+
+  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void clear(bool mangle_space);
+
+  HeapWord* block_start(const void* p);
+  HeapWord* block_start_const(const void* p) const;
+
+  // Add offset table update.
+  virtual HeapWord* allocate(size_t word_size);
+  HeapWord* par_allocate(size_t word_size);
+
+  // MarkSweep support phase3
+  virtual HeapWord* initialize_threshold();
+  virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end);
+
+  virtual void print() const;
+};
+
+class HeapRegion: public G1OffsetTableContigSpace {
+  friend class VMStructs;
+ private:
+
+  enum HumongousType {
+    NotHumongous = 0,
+    StartsHumongous,
+    ContinuesHumongous
+  };
+
+  // The next filter kind that should be used for a "new_dcto_cl" call with
+  // the "traditional" signature.
+  HeapRegionDCTOC::FilterKind _next_fk;
+
+  // Requires that the region "mr" be dense with objects, and begin and end
+  // with an object.
+  void oops_in_mr_iterate(MemRegion mr, OopClosure* cl);
+
+  // The remembered set for this region.
+  // (Might want to make this "inline" later, to avoid some alloc failure
+  // issues.)
+  HeapRegionRemSet* _rem_set;
+
+  G1BlockOffsetArrayContigSpace* offsets() { return &_offsets; }
+
+ protected:
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int  _hrs_index;
+
+  HumongousType _humongous_type;
+  // For a humongous region, region in which it starts.
+  HeapRegion* _humongous_start_region;
+  // For the start region of a humongous sequence, it's original end().
+  HeapWord* _orig_end;
+
+  // True iff the region is in current collection_set.
+  bool _in_collection_set;
+
+    // True iff the region is on the unclean list, waiting to be zero filled.
+  bool _is_on_unclean_list;
+
+  // True iff the region is on the free list, ready for allocation.
+  bool _is_on_free_list;
+
+  // Is this or has it been an allocation region in the current collection
+  // pause.
+  bool _is_gc_alloc_region;
+
+  // True iff an attempt to evacuate an object in the region failed.
+  bool _evacuation_failed;
+
+  // A heap region may be a member one of a number of special subsets, each
+  // represented as linked lists through the field below.  Currently, these
+  // sets include:
+  //   The collection set.
+  //   The set of allocation regions used in a collection pause.
+  //   Spaces that may contain gray objects.
+  HeapRegion* _next_in_special_set;
+
+  // next region in the young "generation" region set
+  HeapRegion* _next_young_region;
+
+  // For parallel heapRegion traversal.
+  jint _claimed;
+
+  // We use concurrent marking to determine the amount of live data
+  // in each heap region.
+  size_t _prev_marked_bytes;    // Bytes known to be live via last completed marking.
+  size_t _next_marked_bytes;    // Bytes known to be live via in-progress marking.
+
+  // See "sort_index" method.  -1 means is not in the array.
+  int _sort_index;
+
+  // Means it has (or at least had) a very large RS, and should not be
+  // considered for membership in a collection set.
+  enum PopularityState {
+    NotPopular,
+    PopularPending,
+    Popular
+  };
+  PopularityState _popularity;
+
+  // <PREDICTION>
+  double _gc_efficiency;
+  // </PREDICTION>
+
+  enum YoungType {
+    NotYoung,                   // a region is not young
+    ScanOnly,                   // a region is young and scan-only
+    Young,                      // a region is young
+    Survivor                    // a region is young and it contains
+                                // survivor
+  };
+
+  YoungType _young_type;
+  int  _young_index_in_cset;
+  SurvRateGroup* _surv_rate_group;
+  int  _age_index;
+
+  // The start of the unmarked area. The unmarked area extends from this
+  // word until the top and/or end of the region, and is the part
+  // of the region for which no marking was done, i.e. objects may
+  // have been allocated in this part since the last mark phase.
+  // "prev" is the top at the start of the last completed marking.
+  // "next" is the top at the start of the in-progress marking (if any.)
+  HeapWord* _prev_top_at_mark_start;
+  HeapWord* _next_top_at_mark_start;
+  // If a collection pause is in progress, this is the top at the start
+  // of that pause.
+
+  // We've counted the marked bytes of objects below here.
+  HeapWord* _top_at_conc_mark_count;
+
+  void init_top_at_mark_start() {
+    assert(_prev_marked_bytes == 0 &&
+           _next_marked_bytes == 0,
+           "Must be called after zero_marked_bytes.");
+    HeapWord* bot = bottom();
+    _prev_top_at_mark_start = bot;
+    _next_top_at_mark_start = bot;
+    _top_at_conc_mark_count = bot;
+  }
+
+  jint _zfs;  // A member of ZeroFillState.  Protected by ZF_lock.
+  Thread* _zero_filler; // If _zfs is ZeroFilling, the thread that (last)
+                        // made it so.
+
+  void set_young_type(YoungType new_type) {
+    //assert(_young_type != new_type, "setting the same type" );
+    // TODO: add more assertions here
+    _young_type = new_type;
+  }
+
+ public:
+  // If "is_zeroed" is "true", the region "mr" can be assumed to contain zeros.
+  HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+             MemRegion mr, bool is_zeroed);
+
+  enum SomePublicConstants {
+    // HeapRegions are GrainBytes-aligned
+    // and have sizes that are multiples of GrainBytes.
+    LogOfHRGrainBytes = 20,
+    LogOfHRGrainWords = LogOfHRGrainBytes - LogHeapWordSize,
+    GrainBytes = 1 << LogOfHRGrainBytes,
+    GrainWords = 1 <<LogOfHRGrainWords,
+    MaxAge = 2, NoOfAges = MaxAge+1
+  };
+
+  enum ClaimValues {
+    InitialClaimValue     = 0,
+    FinalCountClaimValue  = 1,
+    NoteEndClaimValue     = 2,
+    ScrubRemSetClaimValue = 3,
+    ParVerifyClaimValue   = 4
+  };
+
+  // Concurrent refinement requires contiguous heap regions (in which TLABs
+  // might be allocated) to be zero-filled.  Each region therefore has a
+  // zero-fill-state.
+  enum ZeroFillState {
+    NotZeroFilled,
+    ZeroFilling,
+    ZeroFilled,
+    Allocated
+  };
+
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int hrs_index() const { return _hrs_index; }
+  void set_hrs_index(int index) { _hrs_index = index; }
+
+  // The number of bytes marked live in the region in the last marking phase.
+  size_t marked_bytes()    { return _prev_marked_bytes; }
+  // The number of bytes counted in the next marking.
+  size_t next_marked_bytes() { return _next_marked_bytes; }
+  // The number of bytes live wrt the next marking.
+  size_t next_live_bytes() {
+    return (top() - next_top_at_mark_start())
+      * HeapWordSize
+      + next_marked_bytes();
+  }
+
+  // A lower bound on the amount of garbage bytes in the region.
+  size_t garbage_bytes() {
+    size_t used_at_mark_start_bytes =
+      (prev_top_at_mark_start() - bottom()) * HeapWordSize;
+    assert(used_at_mark_start_bytes >= marked_bytes(),
+           "Can't mark more than we have.");
+    return used_at_mark_start_bytes - marked_bytes();
+  }
+
+  // An upper bound on the number of live bytes in the region.
+  size_t max_live_bytes() { return used() - garbage_bytes(); }
+
+  void add_to_marked_bytes(size_t incr_bytes) {
+    _next_marked_bytes = _next_marked_bytes + incr_bytes;
+    guarantee( _next_marked_bytes <= used(), "invariant" );
+  }
+
+  void zero_marked_bytes()      {
+    _prev_marked_bytes = _next_marked_bytes = 0;
+  }
+
+  bool isHumongous() const { return _humongous_type != NotHumongous; }
+  bool startsHumongous() const { return _humongous_type == StartsHumongous; }
+  bool continuesHumongous() const { return _humongous_type == ContinuesHumongous; }
+  // For a humongous region, region in which it starts.
+  HeapRegion* humongous_start_region() const {
+    return _humongous_start_region;
+  }
+
+  // Causes the current region to represent a humongous object spanning "n"
+  // regions.
+  virtual void set_startsHumongous();
+
+  // The regions that continue a humongous sequence should be added using
+  // this method, in increasing address order.
+  void set_continuesHumongous(HeapRegion* start);
+
+  void add_continuingHumongousRegion(HeapRegion* cont);
+
+  // If the region has a remembered set, return a pointer to it.
+  HeapRegionRemSet* rem_set() const {
+    return _rem_set;
+  }
+
+  // True iff the region is in current collection_set.
+  bool in_collection_set() const {
+    return _in_collection_set;
+  }
+  void set_in_collection_set(bool b) {
+    _in_collection_set = b;
+  }
+  HeapRegion* next_in_collection_set() {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->in_collection_set(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_in_collection_set(HeapRegion* r) {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(r == NULL || r->in_collection_set(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  // True iff it is or has been an allocation region in the current
+  // collection pause.
+  bool is_gc_alloc_region() const {
+    return _is_gc_alloc_region;
+  }
+  void set_is_gc_alloc_region(bool b) {
+    _is_gc_alloc_region = b;
+  }
+  HeapRegion* next_gc_alloc_region() {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_gc_alloc_region(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_gc_alloc_region(HeapRegion* r) {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(r == NULL || r->is_gc_alloc_region(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  bool is_reserved() {
+    return popular();
+  }
+
+  bool is_on_free_list() {
+    return _is_on_free_list;
+  }
+
+  void set_on_free_list(bool b) {
+    _is_on_free_list = b;
+  }
+
+  HeapRegion* next_from_free_list() {
+    assert(is_on_free_list(),
+           "Should only invoke on free space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_free_list(),
+           "Malformed Free List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_free_list(HeapRegion* r) {
+    assert(r == NULL || r->is_on_free_list(), "Malformed free list.");
+    _next_in_special_set = r;
+  }
+
+  bool is_on_unclean_list() {
+    return _is_on_unclean_list;
+  }
+
+  void set_on_unclean_list(bool b);
+
+  HeapRegion* next_from_unclean_list() {
+    assert(is_on_unclean_list(),
+           "Should only invoke on unclean space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_unclean_list(),
+           "Malformed unclean List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_unclean_list(HeapRegion* r);
+
+  HeapRegion* get_next_young_region() { return _next_young_region; }
+  void set_next_young_region(HeapRegion* hr) {
+    _next_young_region = hr;
+  }
+
+  // Allows logical separation between objects allocated before and after.
+  void save_marks();
+
+  // Reset HR stuff to default values.
+  void hr_clear(bool par, bool clear_space);
+
+  void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+
+  // Ensure that "this" is zero-filled.
+  void ensure_zero_filled();
+  // This one requires that the calling thread holds ZF_mon.
+  void ensure_zero_filled_locked();
+
+  // Get the start of the unmarked area in this region.
+  HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; }
+  HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; }
+
+  // Apply "cl->do_oop" to (the addresses of) all reference fields in objects
+  // allocated in the current region before the last call to "save_mark".
+  void oop_before_save_marks_iterate(OopClosure* cl);
+
+  // This call determines the "filter kind" argument that will be used for
+  // the next call to "new_dcto_cl" on this region with the "traditional"
+  // signature (i.e., the call below.)  The default, in the absence of a
+  // preceding call to this method, is "NoFilterKind", and a call to this
+  // method is necessary for each such call, or else it reverts to the
+  // default.
+  // (This is really ugly, but all other methods I could think of changed a
+  // lot of main-line code for G1.)
+  void set_next_filter_kind(HeapRegionDCTOC::FilterKind nfk) {
+    _next_fk = nfk;
+  }
+
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapRegionDCTOC::FilterKind fk);
+
+#if WHASSUP
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapWord* boundary) {
+    assert(boundary == NULL, "This arg doesn't make sense here.");
+    DirtyCardToOopClosure* res = new_dcto_closure(cl, precision, _next_fk);
+    _next_fk = HeapRegionDCTOC::NoFilterKind;
+    return res;
+  }
+#endif
+
+  //
+  // Note the start or end of marking. This tells the heap region
+  // that the collector is about to start or has finished (concurrently)
+  // marking the heap.
+  //
+
+  // Note the start of a marking phase. Record the
+  // start of the unmarked area of the region here.
+  void note_start_of_marking(bool during_initial_mark) {
+    init_top_at_conc_mark_count();
+    _next_marked_bytes = 0;
+    if (during_initial_mark && is_young() && !is_survivor())
+      _next_top_at_mark_start = bottom();
+    else
+      _next_top_at_mark_start = top();
+  }
+
+  // Note the end of a marking phase. Install the start of
+  // the unmarked area that was captured at start of marking.
+  void note_end_of_marking() {
+    _prev_top_at_mark_start = _next_top_at_mark_start;
+    _prev_marked_bytes = _next_marked_bytes;
+    _next_marked_bytes = 0;
+
+    guarantee(_prev_marked_bytes <=
+              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
+              "invariant");
+  }
+
+  // After an evacuation, we need to update _next_top_at_mark_start
+  // to be the current top.  Note this is only valid if we have only
+  // ever evacuated into this region.  If we evacuate, allocate, and
+  // then evacuate we are in deep doodoo.
+  void note_end_of_copying() {
+    assert(top() >= _next_top_at_mark_start,
+           "Increase only");
+    _next_top_at_mark_start = top();
+  }
+
+  // Returns "false" iff no object in the region was allocated when the
+  // last mark phase ended.
+  bool is_marked() { return _prev_top_at_mark_start != bottom(); }
+
+  // If "is_marked()" is true, then this is the index of the region in
+  // an array constructed at the end of marking of the regions in a
+  // "desirability" order.
+  int sort_index() {
+    return _sort_index;
+  }
+  void set_sort_index(int i) {
+    _sort_index = i;
+  }
+
+  void init_top_at_conc_mark_count() {
+    _top_at_conc_mark_count = bottom();
+  }
+
+  void set_top_at_conc_mark_count(HeapWord *cur) {
+    assert(bottom() <= cur && cur <= end(), "Sanity.");
+    _top_at_conc_mark_count = cur;
+  }
+
+  HeapWord* top_at_conc_mark_count() {
+    return _top_at_conc_mark_count;
+  }
+
+  void reset_during_compaction() {
+    guarantee( isHumongous() && startsHumongous(),
+               "should only be called for humongous regions");
+
+    zero_marked_bytes();
+    init_top_at_mark_start();
+  }
+
+  bool popular() { return _popularity == Popular; }
+  void set_popular(bool b) {
+    if (b) {
+      _popularity = Popular;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+  bool popular_pending() { return _popularity == PopularPending; }
+  void set_popular_pending(bool b) {
+    if (b) {
+      _popularity = PopularPending;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+
+  // <PREDICTION>
+  void calc_gc_efficiency(void);
+  double gc_efficiency() { return _gc_efficiency;}
+  // </PREDICTION>
+
+  bool is_young() const     { return _young_type != NotYoung; }
+  bool is_scan_only() const { return _young_type == ScanOnly; }
+  bool is_survivor() const  { return _young_type == Survivor; }
+
+  int  young_index_in_cset() const { return _young_index_in_cset; }
+  void set_young_index_in_cset(int index) {
+    assert( (index == -1) || is_young(), "pre-condition" );
+    _young_index_in_cset = index;
+  }
+
+  int age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    return _surv_rate_group->age_in_group(_age_index);
+  }
+
+  void recalculate_age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    _age_index = _surv_rate_group->recalculate_age_index(_age_index);
+  }
+
+  void record_surv_words_in_group(size_t words_survived) {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    int age_in_group = age_in_surv_rate_group();
+    _surv_rate_group->record_surviving_words(age_in_group, words_survived);
+  }
+
+  int age_in_surv_rate_group_cond() {
+    if (_surv_rate_group != NULL)
+      return age_in_surv_rate_group();
+    else
+      return -1;
+  }
+
+  SurvRateGroup* surv_rate_group() {
+    return _surv_rate_group;
+  }
+
+  void install_surv_rate_group(SurvRateGroup* surv_rate_group) {
+    assert( surv_rate_group != NULL, "pre-condition" );
+    assert( _surv_rate_group == NULL, "pre-condition" );
+    assert( is_young(), "pre-condition" );
+
+    _surv_rate_group = surv_rate_group;
+    _age_index = surv_rate_group->next_age_index();
+  }
+
+  void uninstall_surv_rate_group() {
+    if (_surv_rate_group != NULL) {
+      assert( _age_index > -1, "pre-condition" );
+      assert( is_young(), "pre-condition" );
+
+      _surv_rate_group = NULL;
+      _age_index = -1;
+    } else {
+      assert( _age_index == -1, "pre-condition" );
+    }
+  }
+
+  void set_young() { set_young_type(Young); }
+
+  void set_scan_only() { set_young_type(ScanOnly); }
+
+  void set_survivor() { set_young_type(Survivor); }
+
+  void set_not_young() { set_young_type(NotYoung); }
+
+  // Determine if an object has been allocated since the last
+  // mark performed by the collector. This returns true iff the object
+  // is within the unmarked area of the region.
+  bool obj_allocated_since_prev_marking(oop obj) const {
+    return (HeapWord *) obj >= prev_top_at_mark_start();
+  }
+  bool obj_allocated_since_next_marking(oop obj) const {
+    return (HeapWord *) obj >= next_top_at_mark_start();
+  }
+
+  // For parallel heapRegion traversal.
+  bool claimHeapRegion(int claimValue);
+  jint claim_value() { return _claimed; }
+  // Use this carefully: only when you're sure no one is claiming...
+  void set_claim_value(int claimValue) { _claimed = claimValue; }
+
+  // Returns the "evacuation_failed" property of the region.
+  bool evacuation_failed() { return _evacuation_failed; }
+
+  // Sets the "evacuation_failed" property of the region.
+  void set_evacuation_failed(bool b) {
+    _evacuation_failed = b;
+
+    if (b) {
+      init_top_at_conc_mark_count();
+      _next_marked_bytes = 0;
+    }
+  }
+
+  // Requires that "mr" be entirely within the region.
+  // Apply "cl->do_object" to all objects that intersect with "mr".
+  // If the iteration encounters an unparseable portion of the region,
+  // or if "cl->abort()" is true after a closure application,
+  // terminate the iteration and return the address of the start of the
+  // subregion that isn't done.  (The two can be distinguished by querying
+  // "cl->abort()".)  Return of "NULL" indicates that the iteration
+  // completed.
+  HeapWord*
+  object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
+
+  HeapWord*
+  oops_on_card_seq_iterate_careful(MemRegion mr,
+                                   FilterOutOfRegionClosure* cl);
+
+  // The region "mr" is entirely in "this", and starts and ends at block
+  // boundaries. The caller declares that all the contained blocks are
+  // coalesced into one.
+  void declare_filled_region_to_BOT(MemRegion mr) {
+    _offsets.single_block(mr.start(), mr.end());
+  }
+
+  // A version of block start that is guaranteed to find *some* block
+  // boundary at or before "p", but does not object iteration, and may
+  // therefore be used safely when the heap is unparseable.
+  HeapWord* block_start_careful(const void* p) const {
+    return _offsets.block_start_careful(p);
+  }
+
+  // Requires that "addr" is within the region.  Returns the start of the
+  // first ("careful") block that starts at or after "addr", or else the
+  // "end" of the region if there is no such block.
+  HeapWord* next_block_start_careful(HeapWord* addr);
+
+  // Returns the zero-fill-state of the current region.
+  ZeroFillState zero_fill_state() { return (ZeroFillState)_zfs; }
+  bool zero_fill_is_allocated() { return _zfs == Allocated; }
+  Thread* zero_filler() { return _zero_filler; }
+
+  // Indicate that the contents of the region are unknown, and therefore
+  // might require zero-filling.
+  void set_zero_fill_needed() {
+    set_zero_fill_state_work(NotZeroFilled);
+  }
+  void set_zero_fill_in_progress(Thread* t) {
+    set_zero_fill_state_work(ZeroFilling);
+    _zero_filler = t;
+  }
+  void set_zero_fill_complete();
+  void set_zero_fill_allocated() {
+    set_zero_fill_state_work(Allocated);
+  }
+
+  void set_zero_fill_state_work(ZeroFillState zfs);
+
+  // This is called when a full collection shrinks the heap.
+  // We want to set the heap region to a value which says
+  // it is no longer part of the heap.  For now, we'll let "NotZF" fill
+  // that role.
+  void reset_zero_fill() {
+    set_zero_fill_state_work(NotZeroFilled);
+    _zero_filler = NULL;
+  }
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix)  \
+  virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
+  SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
+
+  CompactibleSpace* next_compaction_space() const;
+
+  virtual void reset_after_compaction();
+
+  void print() const;
+  void print_on(outputStream* st) const;
+
+  // Override
+  virtual void verify(bool allow_dirty) const;
+
+#ifdef DEBUG
+  HeapWord* allocate(size_t size);
+#endif
+};
+
+// HeapRegionClosure is used for iterating over regions.
+// Terminates the iteration when the "doHeapRegion" method returns "true".
+class HeapRegionClosure : public StackObj {
+  friend class HeapRegionSeq;
+  friend class G1CollectedHeap;
+
+  bool _complete;
+  void incomplete() { _complete = false; }
+
+ public:
+  HeapRegionClosure(): _complete(true) {}
+
+  // Typically called on each region until it returns true.
+  virtual bool doHeapRegion(HeapRegion* r) = 0;
+
+  // True after iteration if the closure was applied to all heap regions
+  // and returned "false" in all cases.
+  bool complete() { return _complete; }
+};
+
+// A linked lists of heap regions.  It leaves the "next" field
+// unspecified; that's up to subtypes.
+class RegionList {
+protected:
+  virtual HeapRegion* get_next(HeapRegion* chr) = 0;
+  virtual void set_next(HeapRegion* chr,
+                        HeapRegion* new_next) = 0;
+
+  HeapRegion* _hd;
+  HeapRegion* _tl;
+  size_t _sz;
+
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  RegionList() : _hd(NULL), _tl(NULL), _sz(0) {}
+public:
+  void reset() {
+    _hd = NULL;
+    _tl = NULL;
+    _sz = 0;
+  }
+  HeapRegion* hd() { return _hd; }
+  HeapRegion* tl() { return _tl; }
+  size_t sz() { return _sz; }
+  size_t length();
+
+  bool well_formed() {
+    return
+      ((hd() == NULL && tl() == NULL && sz() == 0)
+       || (hd() != NULL && tl() != NULL && sz() > 0))
+      && (sz() == length());
+  }
+  virtual void insert_before_head(HeapRegion* r);
+  void prepend_list(RegionList* new_list);
+  virtual HeapRegion* pop();
+  void dec_sz() { _sz--; }
+  // Requires that "r" is an element of the list, and is not the tail.
+  void delete_after(HeapRegion* r);
+};
+
+class EmptyNonHRegionList: public RegionList {
+protected:
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  EmptyNonHRegionList() : RegionList() {}
+
+public:
+  void insert_before_head(HeapRegion* r) {
+    //    assert(r->is_empty(), "Better be empty");
+    assert(!r->isHumongous(), "Better not be humongous.");
+    RegionList::insert_before_head(r);
+  }
+  void prepend_list(EmptyNonHRegionList* new_list) {
+    //    assert(new_list->hd() == NULL || new_list->hd()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->hd() == NULL || !new_list->hd()->isHumongous(),
+           "Better not be humongous.");
+    //    assert(new_list->tl() == NULL || new_list->tl()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->tl() == NULL || !new_list->tl()->isHumongous(),
+           "Better not be humongous.");
+    RegionList::prepend_list(new_list);
+  }
+};
+
+class UncleanRegionList: public EmptyNonHRegionList {
+public:
+  HeapRegion* get_next(HeapRegion* hr) {
+    return hr->next_from_unclean_list();
+  }
+  void set_next(HeapRegion* hr, HeapRegion* new_next) {
+    hr->set_next_on_unclean_list(new_next);
+  }
+
+  UncleanRegionList() : EmptyNonHRegionList() {}
+
+  void insert_before_head(HeapRegion* r) {
+    assert(!r->is_on_free_list(),
+           "Better not already be on free list");
+    assert(!r->is_on_unclean_list(),
+           "Better not already be on unclean list");
+    r->set_zero_fill_needed();
+    r->set_on_unclean_list(true);
+    EmptyNonHRegionList::insert_before_head(r);
+  }
+  void prepend_list(UncleanRegionList* new_list) {
+    assert(new_list->tl() == NULL || !new_list->tl()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->tl() == NULL || new_list->tl()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    assert(new_list->hd() == NULL || !new_list->hd()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->hd() == NULL || new_list->hd()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    EmptyNonHRegionList::prepend_list(new_list);
+  }
+  HeapRegion* pop() {
+    HeapRegion* res = RegionList::pop();
+    if (res != NULL) res->set_on_unclean_list(false);
+    return res;
+  }
+};
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
+
+#endif // SERIALGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
+  HeapWord* res = ContiguousSpace::allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+// Because of the requirement of keeping "_offsets" up to date with the
+// allocations, we sequentialize these with a lock.  Therefore, best if
+// this is used for larger LAB allocations only.
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
+  MutexLocker x(&_par_alloc_lock);
+  // This ought to be just "allocate", because of the lock above, but that
+  // ContiguousSpace::allocate asserts that either the allocating thread
+  // holds the heap lock or it is the VM thread and we're at a safepoint.
+  // The best I (dld) could figure was to put a field in ContiguousSpace
+  // meaning "locking at safepoint taken care of", and set/reset that
+  // here.  But this will do for now, especially in light of the comment
+  // above.  Perhaps in the future some lock-free manner of keeping the
+  // coordination.
+  HeapWord* res = ContiguousSpace::par_allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
+  return _offsets.block_start(p);
+}
+
+inline HeapWord*
+G1OffsetTableContigSpace::block_start_const(const void* p) const {
+  return _offsets.block_start_const(p);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,1443 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegionRemSet.cpp.incl"
+
+#define HRRS_VERBOSE 0
+
+#define PRT_COUNT_OCCUPIED 1
+
+// OtherRegionsTable
+
+class PerRegionTable: public CHeapObj {
+  friend class OtherRegionsTable;
+  friend class HeapRegionRemSetIterator;
+
+  HeapRegion*     _hr;
+  BitMap          _bm;
+#if PRT_COUNT_OCCUPIED
+  jint            _occupied;
+#endif
+  PerRegionTable* _next_free;
+
+  PerRegionTable* next_free() { return _next_free; }
+  void set_next_free(PerRegionTable* prt) { _next_free = prt; }
+
+
+  static PerRegionTable* _free_list;
+
+#ifdef _MSC_VER
+  // For some reason even though the classes are marked as friend they are unable
+  // to access CardsPerRegion when private/protected. Only the windows c++ compiler
+  // says this Sun CC and linux gcc don't have a problem with access when private
+
+  public:
+
+#endif // _MSC_VER
+
+  enum SomePrivateConstants {
+    CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+  };
+
+protected:
+  // We need access in order to union things into the base table.
+  BitMap* bm() { return &_bm; }
+
+  void recount_occupied() {
+    _occupied = (jint) bm()->count_one_bits();
+  }
+
+  PerRegionTable(HeapRegion* hr) :
+    _hr(hr),
+#if PRT_COUNT_OCCUPIED
+    _occupied(0),
+#endif
+    _bm(CardsPerRegion, false /* in-resource-area */)
+  {}
+
+  static void free(PerRegionTable* prt) {
+    while (true) {
+      PerRegionTable* fl = _free_list;
+      prt->set_next_free(fl);
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+      if (res == fl) return;
+    }
+    ShouldNotReachHere();
+  }
+
+  static PerRegionTable* alloc(HeapRegion* hr) {
+    PerRegionTable* fl = _free_list;
+    while (fl != NULL) {
+      PerRegionTable* nxt = fl->next_free();
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
+      if (res == fl) {
+        fl->init(hr);
+        return fl;
+      } else {
+        fl = _free_list;
+      }
+    }
+    assert(fl == NULL, "Loop condition.");
+    return new PerRegionTable(hr);
+  }
+
+  void add_card_work(short from_card, bool par) {
+    if (!_bm.at(from_card)) {
+      if (par) {
+        if (_bm.par_at_put(from_card, 1)) {
+#if PRT_COUNT_OCCUPIED
+          Atomic::inc(&_occupied);
+#endif
+        }
+      } else {
+        _bm.at_put(from_card, 1);
+#if PRT_COUNT_OCCUPIED
+        _occupied++;
+#endif
+      }
+    }
+  }
+
+  void add_reference_work(oop* from, bool par) {
+    // Must make this robust in case "from" is not in "_hr", because of
+    // concurrency.
+
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("    PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").",
+                           from, *from);
+#endif
+
+    HeapRegion* loc_hr = hr();
+    // If the test below fails, then this table was reused concurrently
+    // with this operation.  This is OK, since the old table was coarsened,
+    // and adding a bit to the new table is never incorrect.
+    if (loc_hr->is_in_reserved(from)) {
+      size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom());
+      size_t from_card =
+        hw_offset >>
+        (CardTableModRefBS::card_shift - LogHeapWordSize);
+
+      add_card_work((short) from_card, par);
+    }
+  }
+
+public:
+
+  HeapRegion* hr() const { return _hr; }
+
+#if PRT_COUNT_OCCUPIED
+  jint occupied() const {
+    // Overkill, but if we ever need it...
+    // guarantee(_occupied == _bm.count_one_bits(), "Check");
+    return _occupied;
+  }
+#else
+  jint occupied() const {
+    return _bm.count_one_bits();
+  }
+#endif
+
+  void init(HeapRegion* hr) {
+    _hr = hr;
+#if PRT_COUNT_OCCUPIED
+    _occupied = 0;
+#endif
+    _bm.clear();
+  }
+
+  void add_reference(oop* from) {
+    add_reference_work(from, /*parallel*/ true);
+  }
+
+  void seq_add_reference(oop* from) {
+    add_reference_work(from, /*parallel*/ false);
+  }
+
+  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
+    HeapWord* hr_bot = hr()->bottom();
+    int hr_first_card_index = ctbs->index_for(hr_bot);
+    bm()->set_intersection_at_offset(*card_bm, hr_first_card_index);
+#if PRT_COUNT_OCCUPIED
+    recount_occupied();
+#endif
+  }
+
+  void add_card(short from_card_index) {
+    add_card_work(from_card_index, /*parallel*/ true);
+  }
+
+  void seq_add_card(short from_card_index) {
+    add_card_work(from_card_index, /*parallel*/ false);
+  }
+
+  // (Destructively) union the bitmap of the current table into the given
+  // bitmap (which is assumed to be of the same size.)
+  void union_bitmap_into(BitMap* bm) {
+    bm->set_union(_bm);
+  }
+
+  // Mem size in bytes.
+  size_t mem_size() const {
+    return sizeof(this) + _bm.size_in_words() * HeapWordSize;
+  }
+
+  static size_t fl_mem_size() {
+    PerRegionTable* cur = _free_list;
+    size_t res = 0;
+    while (cur != NULL) {
+      res += sizeof(PerRegionTable);
+      cur = cur->next_free();
+    }
+    return res;
+  }
+
+  // Requires "from" to be in "hr()".
+  bool contains_reference(oop* from) const {
+    assert(hr()->is_in_reserved(from), "Precondition.");
+    size_t card_ind = pointer_delta(from, hr()->bottom(),
+                                    CardTableModRefBS::card_size);
+    return _bm.at(card_ind);
+  }
+};
+
+PerRegionTable* PerRegionTable::_free_list = NULL;
+
+
+#define COUNT_PAR_EXPANDS 0
+
+#if COUNT_PAR_EXPANDS
+static jint n_par_expands = 0;
+static jint n_par_contracts = 0;
+static jint par_expand_list_len = 0;
+static jint max_par_expand_list_len = 0;
+
+static void print_par_expand() {
+  Atomic::inc(&n_par_expands);
+  Atomic::inc(&par_expand_list_len);
+  if (par_expand_list_len > max_par_expand_list_len) {
+    max_par_expand_list_len = par_expand_list_len;
+  }
+  if ((n_par_expands % 10) == 0) {
+    gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, "
+                  "len = %d, max_len = %d\n.",
+                  n_par_expands, n_par_contracts, par_expand_list_len,
+                  max_par_expand_list_len);
+  }
+}
+#endif
+
+class PosParPRT: public PerRegionTable {
+  PerRegionTable** _par_tables;
+
+  enum SomePrivateConstants {
+    ReserveParTableExpansion = 1
+  };
+
+  void par_expand() {
+    int n = HeapRegionRemSet::num_par_rem_sets()-1;
+    if (n <= 0) return;
+    if (_par_tables == NULL) {
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion,
+                            &_par_tables, NULL);
+      if (res != NULL) return;
+      // Otherwise, we reserved the right to do the expansion.
+
+      PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n);
+      for (int i = 0; i < n; i++) {
+        PerRegionTable* ptable = PerRegionTable::alloc(hr());
+        ptables[i] = ptable;
+      }
+      // Here we do not need an atomic.
+      _par_tables = ptables;
+#if COUNT_PAR_EXPANDS
+      print_par_expand();
+#endif
+      // We must put this table on the expanded list.
+      PosParPRT* exp_head = _par_expanded_list;
+      while (true) {
+        set_next_par_expanded(exp_head);
+        PosParPRT* res =
+          (PosParPRT*)
+          Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head);
+        if (res == exp_head) return;
+        // Otherwise.
+        exp_head = res;
+      }
+      ShouldNotReachHere();
+    }
+  }
+
+  void par_contract() {
+    assert(_par_tables != NULL, "Precondition.");
+    int n = HeapRegionRemSet::num_par_rem_sets()-1;
+    for (int i = 0; i < n; i++) {
+      _par_tables[i]->union_bitmap_into(bm());
+      PerRegionTable::free(_par_tables[i]);
+      _par_tables[i] = NULL;
+    }
+#if PRT_COUNT_OCCUPIED
+    // We must recount the "occupied."
+    recount_occupied();
+#endif
+    FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables);
+    _par_tables = NULL;
+#if COUNT_PAR_EXPANDS
+    Atomic::inc(&n_par_contracts);
+    Atomic::dec(&par_expand_list_len);
+#endif
+  }
+
+  static PerRegionTable** _par_table_fl;
+
+  PosParPRT* _next;
+
+  static PosParPRT* _free_list;
+
+  PerRegionTable** par_tables() const {
+    assert(uintptr_t(NULL) == 0, "Assumption.");
+    if (uintptr_t(_par_tables) <= ReserveParTableExpansion)
+      return NULL;
+    else
+      return _par_tables;
+  }
+
+  PosParPRT* _next_par_expanded;
+  PosParPRT* next_par_expanded() { return _next_par_expanded; }
+  void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; }
+  static PosParPRT* _par_expanded_list;
+
+public:
+
+  PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {}
+
+  jint occupied() const {
+    jint res = PerRegionTable::occupied();
+    if (par_tables() != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        res += par_tables()[i]->occupied();
+      }
+    }
+    return res;
+  }
+
+  void init(HeapRegion* hr) {
+    PerRegionTable::init(hr);
+    _next = NULL;
+    if (par_tables() != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        par_tables()[i]->init(hr);
+      }
+    }
+  }
+
+  static void free(PosParPRT* prt) {
+    while (true) {
+      PosParPRT* fl = _free_list;
+      prt->set_next(fl);
+      PosParPRT* res =
+        (PosParPRT*)
+        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+      if (res == fl) return;
+    }
+    ShouldNotReachHere();
+  }
+
+  static PosParPRT* alloc(HeapRegion* hr) {
+    PosParPRT* fl = _free_list;
+    while (fl != NULL) {
+      PosParPRT* nxt = fl->next();
+      PosParPRT* res =
+        (PosParPRT*)
+        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
+      if (res == fl) {
+        fl->init(hr);
+        return fl;
+      } else {
+        fl = _free_list;
+      }
+    }
+    assert(fl == NULL, "Loop condition.");
+    return new PosParPRT(hr);
+  }
+
+  PosParPRT* next() const { return _next; }
+  void set_next(PosParPRT* nxt) { _next = nxt; }
+  PosParPRT** next_addr() { return &_next; }
+
+  void add_reference(oop* from, int tid) {
+    // Expand if necessary.
+    PerRegionTable** pt = par_tables();
+    if (par_tables() == NULL && tid > 0 && hr()->is_gc_alloc_region()) {
+      par_expand();
+      pt = par_tables();
+    }
+    if (pt != NULL) {
+      // We always have to assume that mods to table 0 are in parallel,
+      // because of the claiming scheme in parallel expansion.  A thread
+      // with tid != 0 that finds the table to be NULL, but doesn't succeed
+      // in claiming the right of expanding it, will end up in the else
+      // clause of the above if test.  That thread could be delayed, and a
+      // thread 0 add reference could see the table expanded, and come
+      // here.  Both threads would be adding in parallel.  But we get to
+      // not use atomics for tids > 0.
+      if (tid == 0) {
+        PerRegionTable::add_reference(from);
+      } else {
+        pt[tid-1]->seq_add_reference(from);
+      }
+    } else {
+      // Not expanded -- add to the base table.
+      PerRegionTable::add_reference(from);
+    }
+  }
+
+  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
+    assert(_par_tables == NULL, "Precondition");
+    PerRegionTable::scrub(ctbs, card_bm);
+  }
+
+  size_t mem_size() const {
+    size_t res =
+      PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable);
+    if (_par_tables != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        res += _par_tables[i]->mem_size();
+      }
+    }
+    return res;
+  }
+
+  static size_t fl_mem_size() {
+    PosParPRT* cur = _free_list;
+    size_t res = 0;
+    while (cur != NULL) {
+      res += sizeof(PosParPRT);
+      cur = cur->next();
+    }
+    return res;
+  }
+
+  bool contains_reference(oop* from) const {
+    if (PerRegionTable::contains_reference(from)) return true;
+    if (_par_tables != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        if (_par_tables[i]->contains_reference(from)) return true;
+      }
+    }
+    return false;
+  }
+
+  static void par_contract_all();
+
+};
+
+void PosParPRT::par_contract_all() {
+  PosParPRT* hd = _par_expanded_list;
+  while (hd != NULL) {
+    PosParPRT* nxt = hd->next_par_expanded();
+    PosParPRT* res =
+      (PosParPRT*)
+      Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd);
+    if (res == hd) {
+      // We claimed the right to contract this table.
+      hd->set_next_par_expanded(NULL);
+      hd->par_contract();
+      hd = _par_expanded_list;
+    } else {
+      hd = res;
+    }
+  }
+}
+
+PosParPRT* PosParPRT::_free_list = NULL;
+PosParPRT* PosParPRT::_par_expanded_list = NULL;
+
+jint OtherRegionsTable::_cache_probes = 0;
+jint OtherRegionsTable::_cache_hits = 0;
+
+size_t OtherRegionsTable::_max_fine_entries = 0;
+size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0;
+#if SAMPLE_FOR_EVICTION
+size_t OtherRegionsTable::_fine_eviction_stride = 0;
+size_t OtherRegionsTable::_fine_eviction_sample_size = 0;
+#endif
+
+OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
+  _g1h(G1CollectedHeap::heap()),
+  _m(Mutex::leaf, "An OtherRegionsTable lock", true),
+  _hr(hr),
+  _coarse_map(G1CollectedHeap::heap()->max_regions(),
+              false /* in-resource-area */),
+  _fine_grain_regions(NULL),
+  _n_fine_entries(0), _n_coarse_entries(0),
+#if SAMPLE_FOR_EVICTION
+  _fine_eviction_start(0),
+#endif
+  _sparse_table(hr)
+{
+  typedef PosParPRT* PosParPRTPtr;
+  if (_max_fine_entries == 0) {
+    assert(_mod_max_fine_entries_mask == 0, "Both or none.");
+    _max_fine_entries = (1 << G1LogRSRegionEntries);
+    _mod_max_fine_entries_mask = _max_fine_entries - 1;
+#if SAMPLE_FOR_EVICTION
+    assert(_fine_eviction_sample_size == 0
+           && _fine_eviction_stride == 0, "All init at same time.");
+    _fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries);
+    _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size;
+#endif
+  }
+  _fine_grain_regions = new PosParPRTPtr[_max_fine_entries];
+  if (_fine_grain_regions == NULL)
+    vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries,
+                          "Failed to allocate _fine_grain_entries.");
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    _fine_grain_regions[i] = NULL;
+  }
+}
+
+int** OtherRegionsTable::_from_card_cache = NULL;
+size_t OtherRegionsTable::_from_card_cache_max_regions = 0;
+size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
+
+void OtherRegionsTable::init_from_card_cache(size_t max_regions) {
+  _from_card_cache_max_regions = max_regions;
+
+  int n_par_rs = HeapRegionRemSet::num_par_rem_sets();
+  _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs);
+  for (int i = 0; i < n_par_rs; i++) {
+    _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions);
+    for (size_t j = 0; j < max_regions; j++) {
+      _from_card_cache[i][j] = -1;  // An invalid value.
+    }
+  }
+  _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int);
+}
+
+void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max.");
+    for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) {
+      _from_card_cache[i][j] = -1;  // An invalid value.
+    }
+  }
+}
+
+#ifndef PRODUCT
+void OtherRegionsTable::print_from_card_cache() {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    for (size_t j = 0; j < _from_card_cache_max_regions; j++) {
+      gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.",
+                    i, j, _from_card_cache[i][j]);
+    }
+  }
+}
+#endif
+
+void OtherRegionsTable::add_reference(oop* from, int tid) {
+  size_t cur_hrs_ind = hr()->hrs_index();
+
+#if HRRS_VERBOSE
+  gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
+                                                  from, *from);
+#endif
+
+  int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
+
+#if HRRS_VERBOSE
+  gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
+                hr()->bottom(), from_card,
+                _from_card_cache[tid][cur_hrs_ind]);
+#endif
+
+#define COUNT_CACHE 0
+#if COUNT_CACHE
+  jint p = Atomic::add(1, &_cache_probes);
+  if ((p % 10000) == 0) {
+    jint hits = _cache_hits;
+    gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.",
+                  _cache_hits, p, 100.0* (float)hits/(float)p);
+  }
+#endif
+  if (from_card == _from_card_cache[tid][cur_hrs_ind]) {
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("  from-card cache hit.");
+#endif
+#if COUNT_CACHE
+    Atomic::inc(&_cache_hits);
+#endif
+    assert(contains_reference(from), "We just added it!");
+    return;
+  } else {
+    _from_card_cache[tid][cur_hrs_ind] = from_card;
+  }
+
+  // Note that this may be a continued H region.
+  HeapRegion* from_hr = _g1h->heap_region_containing_raw(from);
+  size_t from_hrs_ind = (size_t)from_hr->hrs_index();
+
+  // If the region is already coarsened, return.
+  if (_coarse_map.at(from_hrs_ind)) {
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("  coarse map hit.");
+#endif
+    assert(contains_reference(from), "We just added it!");
+    return;
+  }
+
+  // Otherwise find a per-region table to add it to.
+  size_t ind = from_hrs_ind & _mod_max_fine_entries_mask;
+  PosParPRT* prt = find_region_table(ind, from_hr);
+  if (prt == NULL) {
+    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+    // Confirm that it's really not there...
+    prt = find_region_table(ind, from_hr);
+    if (prt == NULL) {
+
+      uintptr_t from_hr_bot_card_index =
+        uintptr_t(from_hr->bottom())
+          >> CardTableModRefBS::card_shift;
+      int card_index = from_card - from_hr_bot_card_index;
+      assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion,
+             "Must be in range.");
+      if (G1HRRSUseSparseTable &&
+          _sparse_table.add_card((short) from_hrs_ind, card_index)) {
+        if (G1RecordHRRSOops) {
+          HeapRegionRemSet::record(hr(), from);
+#if HRRS_VERBOSE
+          gclog_or_tty->print("   Added card " PTR_FORMAT " to region "
+                              "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                              align_size_down(uintptr_t(from),
+                                              CardTableModRefBS::card_size),
+                              hr()->bottom(), from);
+#endif
+        }
+#if HRRS_VERBOSE
+        gclog_or_tty->print_cr("   added card to sparse table.");
+#endif
+        assert(contains_reference_locked(from), "We just added it!");
+        return;
+      } else {
+#if HRRS_VERBOSE
+        gclog_or_tty->print_cr("   [tid %d] sparse table entry "
+                      "overflow(f: %d, t: %d)",
+                      tid, from_hrs_ind, cur_hrs_ind);
+#endif
+      }
+
+      // Otherwise, transfer from sparse to fine-grain.
+      short cards[SparsePRTEntry::CardsPerEntry];
+      if (G1HRRSUseSparseTable) {
+        bool res = _sparse_table.get_cards((short) from_hrs_ind, &cards[0]);
+        assert(res, "There should have been an entry");
+      }
+
+      if (_n_fine_entries == _max_fine_entries) {
+        prt = delete_region_table();
+      } else {
+        prt = PosParPRT::alloc(from_hr);
+      }
+      prt->init(from_hr);
+      // Record the outgoing pointer in the from_region's outgoing bitmap.
+      from_hr->rem_set()->add_outgoing_reference(hr());
+
+      PosParPRT* first_prt = _fine_grain_regions[ind];
+      prt->set_next(first_prt);  // XXX Maybe move to init?
+      _fine_grain_regions[ind] = prt;
+      _n_fine_entries++;
+
+      // Add in the cards from the sparse table.
+      if (G1HRRSUseSparseTable) {
+        for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) {
+          short c = cards[i];
+          if (c != SparsePRTEntry::NullEntry) {
+            prt->add_card(c);
+          }
+        }
+        // Now we can delete the sparse entry.
+        bool res = _sparse_table.delete_entry((short) from_hrs_ind);
+        assert(res, "It should have been there.");
+      }
+    }
+    assert(prt != NULL && prt->hr() == from_hr, "consequence");
+  }
+  // Note that we can't assert "prt->hr() == from_hr", because of the
+  // possibility of concurrent reuse.  But see head comment of
+  // OtherRegionsTable for why this is OK.
+  assert(prt != NULL, "Inv");
+
+  prt->add_reference(from, tid);
+  if (G1RecordHRRSOops) {
+    HeapRegionRemSet::record(hr(), from);
+#if HRRS_VERBOSE
+    gclog_or_tty->print("Added card " PTR_FORMAT " to region "
+                        "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                        align_size_down(uintptr_t(from),
+                                        CardTableModRefBS::card_size),
+                        hr()->bottom(), from);
+#endif
+  }
+  assert(contains_reference(from), "We just added it!");
+}
+
+PosParPRT*
+OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const {
+  assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
+  PosParPRT* prt = _fine_grain_regions[ind];
+  while (prt != NULL && prt->hr() != hr) {
+    prt = prt->next();
+  }
+  // Loop postcondition is the method postcondition.
+  return prt;
+}
+
+
+#define DRT_CENSUS 0
+
+#if DRT_CENSUS
+static const int HistoSize = 6;
+static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
+static int coarsenings = 0;
+static int occ_sum = 0;
+#endif
+
+jint OtherRegionsTable::_n_coarsenings = 0;
+
+PosParPRT* OtherRegionsTable::delete_region_table() {
+#if DRT_CENSUS
+  int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
+  const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 };
+#endif
+
+  assert(_m.owned_by_self(), "Precondition");
+  assert(_n_fine_entries == _max_fine_entries, "Precondition");
+  PosParPRT* max = NULL;
+  jint max_occ = 0;
+  PosParPRT** max_prev;
+  size_t max_ind;
+
+#if SAMPLE_FOR_EVICTION
+  size_t i = _fine_eviction_start;
+  for (size_t k = 0; k < _fine_eviction_sample_size; k++) {
+    size_t ii = i;
+    // Make sure we get a non-NULL sample.
+    while (_fine_grain_regions[ii] == NULL) {
+      ii++;
+      if (ii == _max_fine_entries) ii = 0;
+      guarantee(ii != i, "We must find one.");
+    }
+    PosParPRT** prev = &_fine_grain_regions[ii];
+    PosParPRT* cur = *prev;
+    while (cur != NULL) {
+      jint cur_occ = cur->occupied();
+      if (max == NULL || cur_occ > max_occ) {
+        max = cur;
+        max_prev = prev;
+        max_ind = i;
+        max_occ = cur_occ;
+      }
+      prev = cur->next_addr();
+      cur = cur->next();
+    }
+    i = i + _fine_eviction_stride;
+    if (i >= _n_fine_entries) i = i - _n_fine_entries;
+  }
+  _fine_eviction_start++;
+  if (_fine_eviction_start >= _n_fine_entries)
+    _fine_eviction_start -= _n_fine_entries;
+#else
+  for (int i = 0; i < _max_fine_entries; i++) {
+    PosParPRT** prev = &_fine_grain_regions[i];
+    PosParPRT* cur = *prev;
+    while (cur != NULL) {
+      jint cur_occ = cur->occupied();
+#if DRT_CENSUS
+      for (int k = 0; k < HistoSize; k++) {
+        if (cur_occ <= histo_limits[k]) {
+          histo[k]++; global_histo[k]++; break;
+        }
+      }
+#endif
+      if (max == NULL || cur_occ > max_occ) {
+        max = cur;
+        max_prev = prev;
+        max_ind = i;
+        max_occ = cur_occ;
+      }
+      prev = cur->next_addr();
+      cur = cur->next();
+    }
+  }
+#endif
+  // XXX
+  guarantee(max != NULL, "Since _n_fine_entries > 0");
+#if DRT_CENSUS
+  gclog_or_tty->print_cr("In a coarsening: histo of occs:");
+  for (int k = 0; k < HistoSize; k++) {
+    gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], histo[k]);
+  }
+  coarsenings++;
+  occ_sum += max_occ;
+  if ((coarsenings % 100) == 0) {
+    gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings);
+    for (int k = 0; k < HistoSize; k++) {
+      gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], global_histo[k]);
+    }
+    gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.",
+                  (float)occ_sum/(float)coarsenings);
+  }
+#endif
+
+  // Set the corresponding coarse bit.
+  int max_hrs_index = max->hr()->hrs_index();
+  if (!_coarse_map.at(max_hrs_index)) {
+    _coarse_map.at_put(max_hrs_index, true);
+    _n_coarse_entries++;
+#if 0
+    gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] "
+               "for region [" PTR_FORMAT "...] (%d coarse entries).\n",
+               hr()->bottom(),
+               max->hr()->bottom(),
+               _n_coarse_entries);
+#endif
+  }
+
+  // Unsplice.
+  *max_prev = max->next();
+  Atomic::inc(&_n_coarsenings);
+  _n_fine_entries--;
+  return max;
+}
+
+
+// At present, this must be called stop-world single-threaded.
+void OtherRegionsTable::scrub(CardTableModRefBS* ctbs,
+                              BitMap* region_bm, BitMap* card_bm) {
+  // First eliminated garbage regions from the coarse map.
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print_cr("Scrubbing region %d:", hr()->hrs_index());
+
+  assert(_coarse_map.size() == region_bm->size(), "Precondition");
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print("   Coarse map: before = %d...", _n_coarse_entries);
+  _coarse_map.set_intersection(*region_bm);
+  _n_coarse_entries = _coarse_map.count_one_bits();
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print_cr("   after = %d.", _n_coarse_entries);
+
+  // Now do the fine-grained maps.
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    PosParPRT** prev = &_fine_grain_regions[i];
+    while (cur != NULL) {
+      PosParPRT* nxt = cur->next();
+      // If the entire region is dead, eliminate.
+      if (G1RSScrubVerbose)
+        gclog_or_tty->print_cr("     For other region %d:", cur->hr()->hrs_index());
+      if (!region_bm->at(cur->hr()->hrs_index())) {
+        *prev = nxt;
+        cur->set_next(NULL);
+        _n_fine_entries--;
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print_cr("          deleted via region map.");
+        PosParPRT::free(cur);
+      } else {
+        // Do fine-grain elimination.
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print("          occ: before = %4d.", cur->occupied());
+        cur->scrub(ctbs, card_bm);
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print_cr("          after = %4d.", cur->occupied());
+        // Did that empty the table completely?
+        if (cur->occupied() == 0) {
+          *prev = nxt;
+          cur->set_next(NULL);
+          _n_fine_entries--;
+          PosParPRT::free(cur);
+        } else {
+          prev = cur->next_addr();
+        }
+      }
+      cur = nxt;
+    }
+  }
+  // Since we may have deleted a from_card_cache entry from the RS, clear
+  // the FCC.
+  clear_fcc();
+}
+
+
+size_t OtherRegionsTable::occupied() const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  size_t sum = occ_fine();
+  sum += occ_sparse();
+  sum += occ_coarse();
+  return sum;
+}
+
+size_t OtherRegionsTable::occ_fine() const {
+  size_t sum = 0;
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      sum += cur->occupied();
+      cur = cur->next();
+    }
+  }
+  return sum;
+}
+
+size_t OtherRegionsTable::occ_coarse() const {
+  return (_n_coarse_entries * PosParPRT::CardsPerRegion);
+}
+
+size_t OtherRegionsTable::occ_sparse() const {
+  return _sparse_table.occupied();
+}
+
+size_t OtherRegionsTable::mem_size() const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  size_t sum = 0;
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      sum += cur->mem_size();
+      cur = cur->next();
+    }
+  }
+  sum += (sizeof(PosParPRT*) * _max_fine_entries);
+  sum += (_coarse_map.size_in_words() * HeapWordSize);
+  sum += (_sparse_table.mem_size());
+  sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above.
+  return sum;
+}
+
+size_t OtherRegionsTable::static_mem_size() {
+  return _from_card_cache_mem_size;
+}
+
+size_t OtherRegionsTable::fl_mem_size() {
+  return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size();
+}
+
+void OtherRegionsTable::clear_fcc() {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    _from_card_cache[i][hr()->hrs_index()] = -1;
+  }
+}
+
+void OtherRegionsTable::clear() {
+  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      PosParPRT* nxt = cur->next();
+      PosParPRT::free(cur);
+      cur = nxt;
+    }
+    _fine_grain_regions[i] = NULL;
+  }
+  _sparse_table.clear();
+  _coarse_map.clear();
+  _n_fine_entries = 0;
+  _n_coarse_entries = 0;
+
+  clear_fcc();
+}
+
+void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) {
+  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  size_t hrs_ind = (size_t)from_hr->hrs_index();
+  size_t ind = hrs_ind & _mod_max_fine_entries_mask;
+  if (del_single_region_table(ind, from_hr)) {
+    assert(!_coarse_map.at(hrs_ind), "Inv");
+  } else {
+    _coarse_map.par_at_put(hrs_ind, 0);
+  }
+  // Check to see if any of the fcc entries come from here.
+  int hr_ind = hr()->hrs_index();
+  for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
+    int fcc_ent = _from_card_cache[tid][hr_ind];
+    if (fcc_ent != -1) {
+      HeapWord* card_addr = (HeapWord*)
+        (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift);
+      if (hr()->is_in_reserved(card_addr)) {
+        // Clear the from card cache.
+        _from_card_cache[tid][hr_ind] = -1;
+      }
+    }
+  }
+}
+
+bool OtherRegionsTable::del_single_region_table(size_t ind,
+                                                HeapRegion* hr) {
+  assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
+  PosParPRT** prev_addr = &_fine_grain_regions[ind];
+  PosParPRT* prt = *prev_addr;
+  while (prt != NULL && prt->hr() != hr) {
+    prev_addr = prt->next_addr();
+    prt = prt->next();
+  }
+  if (prt != NULL) {
+    assert(prt->hr() == hr, "Loop postcondition.");
+    *prev_addr = prt->next();
+    PosParPRT::free(prt);
+    _n_fine_entries--;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool OtherRegionsTable::contains_reference(oop* from) const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  return contains_reference_locked(from);
+}
+
+bool OtherRegionsTable::contains_reference_locked(oop* from) const {
+  HeapRegion* hr = _g1h->heap_region_containing_raw(from);
+  if (hr == NULL) return false;
+  size_t hr_ind = hr->hrs_index();
+  // Is this region in the coarse map?
+  if (_coarse_map.at(hr_ind)) return true;
+
+  PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask,
+                                     hr);
+  if (prt != NULL) {
+    return prt->contains_reference(from);
+
+  } else {
+    uintptr_t from_card =
+      (uintptr_t(from) >> CardTableModRefBS::card_shift);
+    uintptr_t hr_bot_card_index =
+      uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift;
+    assert(from_card >= hr_bot_card_index, "Inv");
+    int card_index = from_card - hr_bot_card_index;
+    return _sparse_table.contains_card((short)hr_ind, card_index);
+  }
+
+
+}
+
+
+bool HeapRegionRemSet::_par_traversal = false;
+
+void HeapRegionRemSet::set_par_traversal(bool b) {
+  assert(_par_traversal != b, "Proper alternation...");
+  _par_traversal = b;
+}
+
+int HeapRegionRemSet::num_par_rem_sets() {
+  // We always have at least two, so that a mutator thread can claim an
+  // id and add to a rem set.
+  return (int) MAX2(ParallelGCThreads, (size_t)2);
+}
+
+HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
+                                   HeapRegion* hr)
+    : _bosa(bosa), _other_regions(hr),
+      _outgoing_region_map(G1CollectedHeap::heap()->max_regions(),
+                           false /* in-resource-area */),
+      _iter_state(Unclaimed)
+{}
+
+
+void HeapRegionRemSet::init_for_par_iteration() {
+  _iter_state = Unclaimed;
+}
+
+bool HeapRegionRemSet::claim_iter() {
+  if (_iter_state != Unclaimed) return false;
+  jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_state), Unclaimed);
+  return (res == Unclaimed);
+}
+
+void HeapRegionRemSet::set_iter_complete() {
+  _iter_state = Complete;
+}
+
+bool HeapRegionRemSet::iter_is_complete() {
+  return _iter_state == Complete;
+}
+
+
+void HeapRegionRemSet::init_iterator(HeapRegionRemSetIterator* iter) const {
+  iter->initialize(this);
+}
+
+#ifndef PRODUCT
+void HeapRegionRemSet::print() const {
+  HeapRegionRemSetIterator iter;
+  init_iterator(&iter);
+  size_t card_index;
+  while (iter.has_next(card_index)) {
+    HeapWord* card_start =
+      G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
+    gclog_or_tty->print_cr("  Card " PTR_FORMAT ".", card_start);
+  }
+  // XXX
+  if (iter.n_yielded() != occupied()) {
+    gclog_or_tty->print_cr("Yielded disagrees with occupied:");
+    gclog_or_tty->print_cr("  %6d yielded (%6d coarse, %6d fine).",
+                  iter.n_yielded(),
+                  iter.n_yielded_coarse(), iter.n_yielded_fine());
+    gclog_or_tty->print_cr("  %6d occ     (%6d coarse, %6d fine).",
+                  occupied(), occ_coarse(), occ_fine());
+  }
+  guarantee(iter.n_yielded() == occupied(),
+            "We should have yielded all the represented cards.");
+}
+#endif
+
+void HeapRegionRemSet::cleanup() {
+  SparsePRT::cleanup_all();
+}
+
+void HeapRegionRemSet::par_cleanup() {
+  PosParPRT::par_contract_all();
+}
+
+void HeapRegionRemSet::add_outgoing_reference(HeapRegion* to_hr) {
+  _outgoing_region_map.par_at_put(to_hr->hrs_index(), 1);
+}
+
+void HeapRegionRemSet::clear() {
+  clear_outgoing_entries();
+  _outgoing_region_map.clear();
+  _other_regions.clear();
+  assert(occupied() == 0, "Should be clear.");
+}
+
+void HeapRegionRemSet::clear_outgoing_entries() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  size_t i = _outgoing_region_map.get_next_one_offset(0);
+  while (i < _outgoing_region_map.size()) {
+    HeapRegion* to_region = g1h->region_at(i);
+    to_region->rem_set()->clear_incoming_entry(hr());
+    i = _outgoing_region_map.get_next_one_offset(i+1);
+  }
+}
+
+
+void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs,
+                             BitMap* region_bm, BitMap* card_bm) {
+  _other_regions.scrub(ctbs, region_bm, card_bm);
+}
+
+//-------------------- Iteration --------------------
+
+HeapRegionRemSetIterator::
+HeapRegionRemSetIterator() :
+  _hrrs(NULL),
+  _g1h(G1CollectedHeap::heap()),
+  _bosa(NULL),
+  _sparse_iter(size_t(G1CollectedHeap::heap()->reserved_region().start())
+               >> CardTableModRefBS::card_shift)
+{}
+
+void HeapRegionRemSetIterator::initialize(const HeapRegionRemSet* hrrs) {
+  _hrrs = hrrs;
+  _coarse_map = &_hrrs->_other_regions._coarse_map;
+  _fine_grain_regions = _hrrs->_other_regions._fine_grain_regions;
+  _bosa = _hrrs->bosa();
+
+  _is = Sparse;
+  // Set these values so that we increment to the first region.
+  _coarse_cur_region_index = -1;
+  _coarse_cur_region_cur_card = (PosParPRT::CardsPerRegion-1);;
+
+  _cur_region_cur_card = 0;
+
+  _fine_array_index = -1;
+  _fine_cur_prt = NULL;
+
+  _n_yielded_coarse = 0;
+  _n_yielded_fine = 0;
+  _n_yielded_sparse = 0;
+
+  _sparse_iter.init(&hrrs->_other_regions._sparse_table);
+}
+
+bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) {
+  if (_hrrs->_other_regions._n_coarse_entries == 0) return false;
+  // Go to the next card.
+  _coarse_cur_region_cur_card++;
+  // Was the last the last card in the current region?
+  if (_coarse_cur_region_cur_card == PosParPRT::CardsPerRegion) {
+    // Yes: find the next region.  This may leave _coarse_cur_region_index
+    // Set to the last index, in which case there are no more coarse
+    // regions.
+    _coarse_cur_region_index =
+      (int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1);
+    if ((size_t)_coarse_cur_region_index < _coarse_map->size()) {
+      _coarse_cur_region_cur_card = 0;
+      HeapWord* r_bot =
+        _g1h->region_at(_coarse_cur_region_index)->bottom();
+      _cur_region_card_offset = _bosa->index_for(r_bot);
+    } else {
+      return false;
+    }
+  }
+  // If we didn't return false above, then we can yield a card.
+  card_index = _cur_region_card_offset + _coarse_cur_region_cur_card;
+  return true;
+}
+
+void HeapRegionRemSetIterator::fine_find_next_non_null_prt() {
+  // Otherwise, find the next bucket list in the array.
+  _fine_array_index++;
+  while (_fine_array_index < (int) OtherRegionsTable::_max_fine_entries) {
+    _fine_cur_prt = _fine_grain_regions[_fine_array_index];
+    if (_fine_cur_prt != NULL) return;
+    else _fine_array_index++;
+  }
+  assert(_fine_cur_prt == NULL, "Loop post");
+}
+
+bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) {
+  if (fine_has_next()) {
+    _cur_region_cur_card =
+      _fine_cur_prt->_bm.get_next_one_offset(_cur_region_cur_card + 1);
+  }
+  while (!fine_has_next()) {
+    if (_cur_region_cur_card == PosParPRT::CardsPerRegion) {
+      _cur_region_cur_card = 0;
+      _fine_cur_prt = _fine_cur_prt->next();
+    }
+    if (_fine_cur_prt == NULL) {
+      fine_find_next_non_null_prt();
+      if (_fine_cur_prt == NULL) return false;
+    }
+    assert(_fine_cur_prt != NULL && _cur_region_cur_card == 0,
+           "inv.");
+    HeapWord* r_bot =
+      _fine_cur_prt->hr()->bottom();
+    _cur_region_card_offset = _bosa->index_for(r_bot);
+    _cur_region_cur_card = _fine_cur_prt->_bm.get_next_one_offset(0);
+  }
+  assert(fine_has_next(), "Or else we exited the loop via the return.");
+  card_index = _cur_region_card_offset + _cur_region_cur_card;
+  return true;
+}
+
+bool HeapRegionRemSetIterator::fine_has_next() {
+  return
+    _fine_cur_prt != NULL &&
+    _cur_region_cur_card < PosParPRT::CardsPerRegion;
+}
+
+bool HeapRegionRemSetIterator::has_next(size_t& card_index) {
+  switch (_is) {
+  case Sparse:
+    if (_sparse_iter.has_next(card_index)) {
+      _n_yielded_sparse++;
+      return true;
+    }
+    // Otherwise, deliberate fall-through
+    _is = Fine;
+  case Fine:
+    if (fine_has_next(card_index)) {
+      _n_yielded_fine++;
+      return true;
+    }
+    // Otherwise, deliberate fall-through
+    _is = Coarse;
+  case Coarse:
+    if (coarse_has_next(card_index)) {
+      _n_yielded_coarse++;
+      return true;
+    }
+    // Otherwise...
+    break;
+  }
+  assert(ParallelGCThreads > 1 ||
+         n_yielded() == _hrrs->occupied(),
+         "Should have yielded all the cards in the rem set "
+         "(in the non-par case).");
+  return false;
+}
+
+
+
+oop**        HeapRegionRemSet::_recorded_oops = NULL;
+HeapWord**   HeapRegionRemSet::_recorded_cards = NULL;
+HeapRegion** HeapRegionRemSet::_recorded_regions = NULL;
+int          HeapRegionRemSet::_n_recorded = 0;
+
+HeapRegionRemSet::Event* HeapRegionRemSet::_recorded_events = NULL;
+int*         HeapRegionRemSet::_recorded_event_index = NULL;
+int          HeapRegionRemSet::_n_recorded_events = 0;
+
+void HeapRegionRemSet::record(HeapRegion* hr, oop* f) {
+  if (_recorded_oops == NULL) {
+    assert(_n_recorded == 0
+           && _recorded_cards == NULL
+           && _recorded_regions == NULL,
+           "Inv");
+    _recorded_oops = NEW_C_HEAP_ARRAY(oop*, MaxRecorded);
+    _recorded_cards = NEW_C_HEAP_ARRAY(HeapWord*, MaxRecorded);
+    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*, MaxRecorded);
+  }
+  if (_n_recorded == MaxRecorded) {
+    gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded);
+  } else {
+    _recorded_cards[_n_recorded] =
+      (HeapWord*)align_size_down(uintptr_t(f),
+                                 CardTableModRefBS::card_size);
+    _recorded_oops[_n_recorded] = f;
+    _recorded_regions[_n_recorded] = hr;
+    _n_recorded++;
+  }
+}
+
+void HeapRegionRemSet::record_event(Event evnt) {
+  if (!G1RecordHRRSEvents) return;
+
+  if (_recorded_events == NULL) {
+    assert(_n_recorded_events == 0
+           && _recorded_event_index == NULL,
+           "Inv");
+    _recorded_events = NEW_C_HEAP_ARRAY(Event, MaxRecordedEvents);
+    _recorded_event_index = NEW_C_HEAP_ARRAY(int, MaxRecordedEvents);
+  }
+  if (_n_recorded_events == MaxRecordedEvents) {
+    gclog_or_tty->print_cr("Filled up 'recorded_events' (%d).", MaxRecordedEvents);
+  } else {
+    _recorded_events[_n_recorded_events] = evnt;
+    _recorded_event_index[_n_recorded_events] = _n_recorded;
+    _n_recorded_events++;
+  }
+}
+
+void HeapRegionRemSet::print_event(outputStream* str, Event evnt) {
+  switch (evnt) {
+  case Event_EvacStart:
+    str->print("Evac Start");
+    break;
+  case Event_EvacEnd:
+    str->print("Evac End");
+    break;
+  case Event_RSUpdateEnd:
+    str->print("RS Update End");
+    break;
+  }
+}
+
+void HeapRegionRemSet::print_recorded() {
+  int cur_evnt = 0;
+  Event cur_evnt_kind;
+  int cur_evnt_ind = 0;
+  if (_n_recorded_events > 0) {
+    cur_evnt_kind = _recorded_events[cur_evnt];
+    cur_evnt_ind = _recorded_event_index[cur_evnt];
+  }
+
+  for (int i = 0; i < _n_recorded; i++) {
+    while (cur_evnt < _n_recorded_events && i == cur_evnt_ind) {
+      gclog_or_tty->print("Event: ");
+      print_event(gclog_or_tty, cur_evnt_kind);
+      gclog_or_tty->print_cr("");
+      cur_evnt++;
+      if (cur_evnt < MaxRecordedEvents) {
+        cur_evnt_kind = _recorded_events[cur_evnt];
+        cur_evnt_ind = _recorded_event_index[cur_evnt];
+      }
+    }
+    gclog_or_tty->print("Added card " PTR_FORMAT " to region [" PTR_FORMAT "...]"
+                        " for ref " PTR_FORMAT ".\n",
+                        _recorded_cards[i], _recorded_regions[i]->bottom(),
+                        _recorded_oops[i]);
+  }
+}
+
+#ifndef PRODUCT
+void HeapRegionRemSet::test() {
+  os::sleep(Thread::current(), (jlong)5000, false);
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same
+  // hash bucket.
+  HeapRegion* hr0 = g1h->region_at(0);
+  HeapRegion* hr1 = g1h->region_at(1);
+  HeapRegion* hr2 = g1h->region_at(5);
+  HeapRegion* hr3 = g1h->region_at(6);
+  HeapRegion* hr4 = g1h->region_at(7);
+  HeapRegion* hr5 = g1h->region_at(8);
+
+  HeapWord* hr1_start = hr1->bottom();
+  HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2;
+  HeapWord* hr1_last = hr1->end() - 1;
+
+  HeapWord* hr2_start = hr2->bottom();
+  HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2;
+  HeapWord* hr2_last = hr2->end() - 1;
+
+  HeapWord* hr3_start = hr3->bottom();
+  HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2;
+  HeapWord* hr3_last = hr3->end() - 1;
+
+  HeapRegionRemSet* hrrs = hr0->rem_set();
+
+  // Make three references from region 0x101...
+  hrrs->add_reference((oop*)hr1_start);
+  hrrs->add_reference((oop*)hr1_mid);
+  hrrs->add_reference((oop*)hr1_last);
+
+  hrrs->add_reference((oop*)hr2_start);
+  hrrs->add_reference((oop*)hr2_mid);
+  hrrs->add_reference((oop*)hr2_last);
+
+  hrrs->add_reference((oop*)hr3_start);
+  hrrs->add_reference((oop*)hr3_mid);
+  hrrs->add_reference((oop*)hr3_last);
+
+  // Now cause a coarsening.
+  hrrs->add_reference((oop*)hr4->bottom());
+  hrrs->add_reference((oop*)hr5->bottom());
+
+  // Now, does iteration yield these three?
+  HeapRegionRemSetIterator iter;
+  hrrs->init_iterator(&iter);
+  size_t sum = 0;
+  size_t card_index;
+  while (iter.has_next(card_index)) {
+    HeapWord* card_start =
+      G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
+    gclog_or_tty->print_cr("  Card " PTR_FORMAT ".", card_start);
+    sum++;
+  }
+  guarantee(sum == 11 - 3 + 2048, "Failure");
+  guarantee(sum == hrrs->occupied(), "Failure");
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Remembered set for a heap region.  Represent a set of "cards" that
+// contain pointers into the owner heap region.  Cards are defined somewhat
+// abstractly, in terms of what the "BlockOffsetTable" in use can parse.
+
+class G1CollectedHeap;
+class G1BlockOffsetSharedArray;
+class HeapRegion;
+class HeapRegionRemSetIterator;
+class PosParPRT;
+class SparsePRT;
+
+
+// The "_coarse_map" is a bitmap with one bit for each region, where set
+// bits indicate that the corresponding region may contain some pointer
+// into the owning region.
+
+// The "_fine_grain_entries" array is an open hash table of PerRegionTables
+// (PRTs), indicating regions for which we're keeping the RS as a set of
+// cards.  The strategy is to cap the size of the fine-grain table,
+// deleting an entry and setting the corresponding coarse-grained bit when
+// we would overflow this cap.
+
+// We use a mixture of locking and lock-free techniques here.  We allow
+// threads to locate PRTs without locking, but threads attempting to alter
+// a bucket list obtain a lock.  This means that any failing attempt to
+// find a PRT must be retried with the lock.  It might seem dangerous that
+// a read can find a PRT that is concurrently deleted.  This is all right,
+// because:
+//
+//   1) We only actually free PRT's at safe points (though we reuse them at
+//      other times).
+//   2) We find PRT's in an attempt to add entries.  If a PRT is deleted,
+//      it's _coarse_map bit is set, so the that we were attempting to add
+//      is represented.  If a deleted PRT is re-used, a thread adding a bit,
+//      thinking the PRT is for a different region, does no harm.
+
+class OtherRegionsTable: public CHeapObj {
+  friend class HeapRegionRemSetIterator;
+
+  G1CollectedHeap* _g1h;
+  Mutex            _m;
+  HeapRegion*      _hr;
+
+  // These are protected by "_m".
+  BitMap      _coarse_map;
+  size_t      _n_coarse_entries;
+  static jint _n_coarsenings;
+
+  PosParPRT** _fine_grain_regions;
+  size_t      _n_fine_entries;
+
+#define SAMPLE_FOR_EVICTION 1
+#if SAMPLE_FOR_EVICTION
+  size_t        _fine_eviction_start;
+  static size_t _fine_eviction_stride;
+  static size_t _fine_eviction_sample_size;
+#endif
+
+  SparsePRT   _sparse_table;
+
+  // These are static after init.
+  static size_t _max_fine_entries;
+  static size_t _mod_max_fine_entries_mask;
+
+  // Requires "prt" to be the first element of the bucket list appropriate
+  // for "hr".  If this list contains an entry for "hr", return it,
+  // otherwise return "NULL".
+  PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const;
+
+  // Find, delete, and return a candidate PosParPRT, if any exists,
+  // adding the deleted region to the coarse bitmap.  Requires the caller
+  // to hold _m, and the fine-grain table to be full.
+  PosParPRT* delete_region_table();
+
+  // If a PRT for "hr" is in the bucket list indicated by "ind" (which must
+  // be the correct index for "hr"), delete it and return true; else return
+  // false.
+  bool del_single_region_table(size_t ind, HeapRegion* hr);
+
+  static jint _cache_probes;
+  static jint _cache_hits;
+
+  // Indexed by thread X heap region, to minimize thread contention.
+  static int** _from_card_cache;
+  static size_t _from_card_cache_max_regions;
+  static size_t _from_card_cache_mem_size;
+
+public:
+  OtherRegionsTable(HeapRegion* hr);
+
+  HeapRegion* hr() const { return _hr; }
+
+  // For now.  Could "expand" some tables in the future, so that this made
+  // sense.
+  void add_reference(oop* from, int tid);
+
+  void add_reference(oop* from) {
+    return add_reference(from, 0);
+  }
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // Not const because it takes a lock.
+  size_t occupied() const;
+  size_t occ_fine() const;
+  size_t occ_coarse() const;
+  size_t occ_sparse() const;
+
+  static jint n_coarsenings() { return _n_coarsenings; }
+
+  // Returns size in bytes.
+  // Not const because it takes a lock.
+  size_t mem_size() const;
+  static size_t static_mem_size();
+  static size_t fl_mem_size();
+
+  bool contains_reference(oop* from) const;
+  bool contains_reference_locked(oop* from) const;
+
+  void clear();
+
+  // Specifically clear the from_card_cache.
+  void clear_fcc();
+
+  // "from_hr" is being cleared; remove any entries from it.
+  void clear_incoming_entry(HeapRegion* from_hr);
+
+  // Declare the heap size (in # of regions) to the OtherRegionsTable.
+  // (Uses it to initialize from_card_cache).
+  static void init_from_card_cache(size_t max_regions);
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  // Make sure any entries for higher regions are invalid.
+  static void shrink_from_card_cache(size_t new_n_regs);
+
+  static void print_from_card_cache();
+
+};
+
+
+class HeapRegionRemSet : public CHeapObj {
+  friend class VMStructs;
+  friend class HeapRegionRemSetIterator;
+
+public:
+  enum Event {
+    Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd
+  };
+
+private:
+  G1BlockOffsetSharedArray* _bosa;
+  G1BlockOffsetSharedArray* bosa() const { return _bosa; }
+
+  static bool _par_traversal;
+
+  OtherRegionsTable _other_regions;
+
+  // One set bit for every region that has an entry for this one.
+  BitMap _outgoing_region_map;
+
+  // Clear entries for the current region in any rem sets named in
+  // the _outgoing_region_map.
+  void clear_outgoing_entries();
+
+#if MAYBE
+  // Audit the given card index.
+  void audit_card(size_t card_num, HeapRegion* hr, u2* rc_arr,
+                  HeapRegionRemSet* empty_cards, size_t* one_obj_cards);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects; returns "true" if any are found.
+  bool audit_find_pop(HeapRegion* hr, u2* rc_arr);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects, and determines the number of entries in "new_rs" if any such
+  // popular objects are ignored.  If this is sufficiently small, returns
+  // "false" to indicate that a constraint should not be introduced.
+  // Otherwise, returns "true" to indicate that we should go ahead with
+  // adding the constraint.
+  bool audit_stag(HeapRegion* hr, u2* rc_arr);
+
+
+  u2* alloc_rc_array();
+
+  SeqHeapRegionRemSet* audit_post(u2* rc_arr, size_t multi_obj_crds,
+                                  SeqHeapRegionRemSet* empty_cards);
+#endif
+
+  enum ParIterState { Unclaimed, Claimed, Complete };
+  ParIterState _iter_state;
+
+  // Unused unless G1RecordHRRSOops is true.
+
+  static const int MaxRecorded = 1000000;
+  static oop**        _recorded_oops;
+  static HeapWord**   _recorded_cards;
+  static HeapRegion** _recorded_regions;
+  static int          _n_recorded;
+
+  static const int MaxRecordedEvents = 1000;
+  static Event*       _recorded_events;
+  static int*         _recorded_event_index;
+  static int          _n_recorded_events;
+
+  static void print_event(outputStream* str, Event evnt);
+
+public:
+  HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
+                   HeapRegion* hr);
+
+  static int num_par_rem_sets();
+  static bool par_traversal() { return _par_traversal; }
+  static void set_par_traversal(bool b);
+
+  HeapRegion* hr() const {
+    return _other_regions.hr();
+  }
+
+  size_t occupied() const {
+    return _other_regions.occupied();
+  }
+  size_t occ_fine() const {
+    return _other_regions.occ_fine();
+  }
+  size_t occ_coarse() const {
+    return _other_regions.occ_coarse();
+  }
+  size_t occ_sparse() const {
+    return _other_regions.occ_sparse();
+  }
+
+  static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); }
+
+  /* Used in the sequential case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from) {
+    _other_regions.add_reference(from);
+    return false;
+  }
+
+  /* Used in the parallel case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from, int tid) {
+    _other_regions.add_reference(from, tid);
+    return false;
+  }
+
+  // Records the fact that the current region contains an outgoing
+  // reference into "to_hr".
+  void add_outgoing_reference(HeapRegion* to_hr);
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // The region is being reclaimed; clear its remset, and any mention of
+  // entries for this region in other remsets.
+  void clear();
+
+  // Forget any entries due to pointers from "from_hr".
+  void clear_incoming_entry(HeapRegion* from_hr) {
+    _other_regions.clear_incoming_entry(from_hr);
+  }
+
+#if 0
+  virtual void cleanup() = 0;
+#endif
+
+  // Should be called from single-threaded code.
+  void init_for_par_iteration();
+  // Attempt to claim the region.  Returns true iff this call caused an
+  // atomic transition from Unclaimed to Claimed.
+  bool claim_iter();
+  // Sets the iteration state to "complete".
+  void set_iter_complete();
+  // Returns "true" iff the region's iteration is complete.
+  bool iter_is_complete();
+
+  // Initialize the given iterator to iterate over this rem set.
+  void init_iterator(HeapRegionRemSetIterator* iter) const;
+
+#if 0
+  // Apply the "do_card" method to the start address of every card in the
+  // rem set.  Returns false if some application of the closure aborted.
+  virtual bool card_iterate(CardClosure* iter) = 0;
+#endif
+
+  // The actual # of bytes this hr_remset takes up.
+  size_t mem_size() {
+    return _other_regions.mem_size()
+      // This correction is necessary because the above includes the second
+      // part.
+      + sizeof(this) - sizeof(OtherRegionsTable);
+  }
+
+  // Returns the memory occupancy of all static data structures associated
+  // with remembered sets.
+  static size_t static_mem_size() {
+    return OtherRegionsTable::static_mem_size();
+  }
+
+  // Returns the memory occupancy of all free_list data structures associated
+  // with remembered sets.
+  static size_t fl_mem_size() {
+    return OtherRegionsTable::fl_mem_size();
+  }
+
+  bool contains_reference(oop* from) const {
+    return _other_regions.contains_reference(from);
+  }
+  void print() const;
+
+#if MAYBE
+  // We are about to introduce a constraint, requiring the collection time
+  // of the region owning this RS to be <= "hr", and forgetting pointers
+  // from the owning region to "hr."  Before doing so, examines this rem
+  // set for pointers to "hr", possibly identifying some popular objects.,
+  // and possibly finding some cards to no longer contain pointers to "hr",
+  //
+  // These steps may prevent the the constraint from being necessary; in
+  // which case returns a set of cards now thought to contain no pointers
+  // into HR.  In the normal (I assume) case, returns NULL, indicating that
+  // we should go ahead and add the constraint.
+  virtual SeqHeapRegionRemSet* audit(HeapRegion* hr) = 0;
+#endif
+
+  // Called during a stop-world phase to perform any deferred cleanups.
+  // The second version may be called by parallel threads after then finish
+  // collection work.
+  static void cleanup();
+  static void par_cleanup();
+
+  // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
+  // (Uses it to initialize from_card_cache).
+  static void init_heap(size_t max_regions) {
+    OtherRegionsTable::init_from_card_cache(max_regions);
+  }
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  static void shrink_heap(size_t new_n_regs) {
+    OtherRegionsTable::shrink_from_card_cache(new_n_regs);
+  }
+
+#ifndef PRODUCT
+  static void print_from_card_cache() {
+    OtherRegionsTable::print_from_card_cache();
+  }
+#endif
+
+  static void record(HeapRegion* hr, oop* f);
+  static void print_recorded();
+  static void record_event(Event evnt);
+
+  // Run unit tests.
+#ifndef PRODUCT
+  static void test();
+#endif
+
+};
+
+class HeapRegionRemSetIterator : public CHeapObj {
+
+  // The region over which we're iterating.
+  const HeapRegionRemSet* _hrrs;
+
+  // Local caching of HRRS fields.
+  const BitMap*             _coarse_map;
+  PosParPRT**               _fine_grain_regions;
+
+  G1BlockOffsetSharedArray* _bosa;
+  G1CollectedHeap*          _g1h;
+
+  // The number yielded since initialization.
+  size_t _n_yielded_fine;
+  size_t _n_yielded_coarse;
+  size_t _n_yielded_sparse;
+
+  // If true we're iterating over the coarse table; if false the fine
+  // table.
+  enum IterState {
+    Sparse,
+    Fine,
+    Coarse
+  };
+  IterState _is;
+
+  // In both kinds of iteration, heap offset of first card of current
+  // region.
+  size_t _cur_region_card_offset;
+  // Card offset within cur region.
+  size_t _cur_region_cur_card;
+
+  // Coarse table iteration fields:
+
+  // Current region index;
+  int _coarse_cur_region_index;
+  int _coarse_cur_region_cur_card;
+
+  bool coarse_has_next(size_t& card_index);
+
+  // Fine table iteration fields:
+
+  // Index of bucket-list we're working on.
+  int _fine_array_index;
+  // Per Region Table we're doing within current bucket list.
+  PosParPRT* _fine_cur_prt;
+
+  /* SparsePRT::*/ SparsePRTIter _sparse_iter;
+
+  void fine_find_next_non_null_prt();
+
+  bool fine_has_next();
+  bool fine_has_next(size_t& card_index);
+
+public:
+  // We require an iterator to be initialized before use, so the
+  // constructor does little.
+  HeapRegionRemSetIterator();
+
+  void initialize(const HeapRegionRemSet* hrrs);
+
+  // If there remains one or more cards to be yielded, returns true and
+  // sets "card_index" to one of those cards (which is then considered
+  // yielded.)   Otherwise, returns false (and leaves "card_index"
+  // undefined.)
+  bool has_next(size_t& card_index);
+
+  size_t n_yielded_fine() { return _n_yielded_fine; }
+  size_t n_yielded_coarse() { return _n_yielded_coarse; }
+  size_t n_yielded_sparse() { return _n_yielded_sparse; }
+  size_t n_yielded() {
+    return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse();
+  }
+};
+
+#if 0
+class CardClosure: public Closure {
+public:
+  virtual void do_card(HeapWord* card_start) = 0;
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,345 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegionSeq.cpp.incl"
+
+// Local to this file.
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  if ((*hr1p)->end() <= (*hr2p)->bottom()) return -1;
+  else if ((*hr2p)->end() <= (*hr1p)->bottom()) return 1;
+  else if (*hr1p == *hr2p) return 0;
+  else {
+    assert(false, "We should never compare distinct overlapping regions.");
+  }
+  return 0;
+}
+
+HeapRegionSeq::HeapRegionSeq(const size_t max_size) :
+  _alloc_search_start(0),
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _regions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                       (void*)&_regions,
+                                       ResourceObj::C_HEAP),
+            (int)max_size),
+           true),
+  _next_rr_candidate(0),
+  _seq_bottom(NULL)
+{}
+
+// Private methods.
+
+HeapWord*
+HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) {
+  assert(G1CollectedHeap::isHumongous(word_size),
+         "Allocation size should be humongous");
+  int cur = ind;
+  int first = cur;
+  size_t sumSizes = 0;
+  while (cur < _regions.length() && sumSizes < word_size) {
+    // Loop invariant:
+    //  For all i in [first, cur):
+    //       _regions.at(i)->is_empty()
+    //    && _regions.at(i) is contiguous with its predecessor, if any
+    //  && sumSizes is the sum of the sizes of the regions in the interval
+    //       [first, cur)
+    HeapRegion* curhr = _regions.at(cur);
+    if (curhr->is_empty()
+        && !curhr->is_reserved()
+        && (first == cur
+            || (_regions.at(cur-1)->end() ==
+                curhr->bottom()))) {
+      sumSizes += curhr->capacity() / HeapWordSize;
+    } else {
+      first = cur + 1;
+      sumSizes = 0;
+    }
+    cur++;
+  }
+  if (sumSizes >= word_size) {
+    _alloc_search_start = cur;
+    // Mark the allocated regions as allocated.
+    bool zf = G1CollectedHeap::heap()->allocs_are_zero_filled();
+    HeapRegion* first_hr = _regions.at(first);
+    for (int i = first; i < cur; i++) {
+      HeapRegion* hr = _regions.at(i);
+      if (zf)
+        hr->ensure_zero_filled();
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        hr->set_zero_fill_allocated();
+      }
+      size_t sz = hr->capacity() / HeapWordSize;
+      HeapWord* tmp = hr->allocate(sz);
+      assert(tmp != NULL, "Humongous allocation failure");
+      MemRegion mr = MemRegion(tmp, sz);
+      SharedHeap::fill_region_with_object(mr);
+      hr->declare_filled_region_to_BOT(mr);
+      if (i == first) {
+        first_hr->set_startsHumongous();
+      } else {
+        assert(i > first, "sanity");
+        hr->set_continuesHumongous(first_hr);
+      }
+    }
+    HeapWord* first_hr_bot = first_hr->bottom();
+    HeapWord* obj_end = first_hr_bot + word_size;
+    first_hr->set_top(obj_end);
+    return first_hr_bot;
+  } else {
+    // If we started from the beginning, we want to know why we can't alloc.
+    return NULL;
+  }
+}
+
+void HeapRegionSeq::print_empty_runs(bool reserved_are_empty) {
+  int empty_run = 0;
+  int n_empty = 0;
+  bool at_least_one_reserved = false;
+  int empty_run_start;
+  for (int i = 0; i < _regions.length(); i++) {
+    HeapRegion* r = _regions.at(i);
+    if (r->continuesHumongous()) continue;
+    if (r->is_empty() && (reserved_are_empty || !r->is_reserved())) {
+      assert(!r->isHumongous(), "H regions should not be empty.");
+      if (empty_run == 0) empty_run_start = i;
+      empty_run++;
+      n_empty++;
+      if (r->is_reserved()) {
+        at_least_one_reserved = true;
+      }
+    } else {
+      if (empty_run > 0) {
+        gclog_or_tty->print("  %d:%d", empty_run_start, empty_run);
+        if (reserved_are_empty && at_least_one_reserved)
+          gclog_or_tty->print("(R)");
+        empty_run = 0;
+        at_least_one_reserved = false;
+      }
+    }
+  }
+  if (empty_run > 0) {
+    gclog_or_tty->print(" %d:%d", empty_run_start, empty_run);
+    if (reserved_are_empty && at_least_one_reserved) gclog_or_tty->print("(R)");
+  }
+  gclog_or_tty->print_cr(" [tot = %d]", n_empty);
+}
+
+int HeapRegionSeq::find(HeapRegion* hr) {
+  // FIXME: optimized for adjacent regions of fixed size.
+  int ind = hr->hrs_index();
+  if (ind != -1) {
+    assert(_regions.at(ind) == hr, "Mismatch");
+  }
+  return ind;
+}
+
+
+// Public methods.
+
+void HeapRegionSeq::insert(HeapRegion* hr) {
+  assert(!_regions.is_full(), "Too many elements in HeapRegionSeq");
+  if (_regions.length() == 0
+      || _regions.top()->end() <= hr->bottom()) {
+    hr->set_hrs_index(_regions.length());
+    _regions.append(hr);
+  } else {
+    _regions.append(hr);
+    _regions.sort(orderRegions);
+    for (int i = 0; i < _regions.length(); i++) {
+      _regions.at(i)->set_hrs_index(i);
+    }
+  }
+  char* bot = (char*)_regions.at(0)->bottom();
+  if (_seq_bottom == NULL || bot < _seq_bottom) _seq_bottom = bot;
+}
+
+size_t HeapRegionSeq::length() {
+  return _regions.length();
+}
+
+size_t HeapRegionSeq::free_suffix() {
+  size_t res = 0;
+  int first = _regions.length() - 1;
+  int cur = first;
+  while (cur >= 0 &&
+         (_regions.at(cur)->is_empty()
+          && !_regions.at(cur)->is_reserved()
+          && (first == cur
+              || (_regions.at(cur+1)->bottom() ==
+                  _regions.at(cur)->end())))) {
+      res++;
+      cur--;
+  }
+  return res;
+}
+
+HeapWord* HeapRegionSeq::obj_allocate(size_t word_size) {
+  int cur = _alloc_search_start;
+  // Make sure "cur" is a valid index.
+  assert(cur >= 0, "Invariant.");
+  HeapWord* res = alloc_obj_from_region_index(cur, word_size);
+  if (res == NULL)
+    res = alloc_obj_from_region_index(0, word_size);
+  return res;
+}
+
+void HeapRegionSeq::iterate(HeapRegionClosure* blk) {
+  iterate_from((HeapRegion*)NULL, blk);
+}
+
+// The first argument r is the heap region at which iteration begins.
+// This operation runs fastest when r is NULL, or the heap region for
+// which a HeapRegionClosure most recently returned true, or the
+// heap region immediately to its right in the sequence.  In all
+// other cases a linear search is required to find the index of r.
+
+void HeapRegionSeq::iterate_from(HeapRegion* r, HeapRegionClosure* blk) {
+
+  // :::: FIXME ::::
+  // Static cache value is bad, especially when we start doing parallel
+  // remembered set update. For now just don't cache anything (the
+  // code in the def'd out blocks).
+
+#if 0
+  static int cached_j = 0;
+#endif
+  int len = _regions.length();
+  int j = 0;
+  // Find the index of r.
+  if (r != NULL) {
+#if 0
+    assert(cached_j >= 0, "Invariant.");
+    if ((cached_j < len) && (r == _regions.at(cached_j))) {
+      j = cached_j;
+    } else if ((cached_j + 1 < len) && (r == _regions.at(cached_j + 1))) {
+      j = cached_j + 1;
+    } else {
+      j = find(r);
+#endif
+      if (j < 0) {
+        j = 0;
+      }
+#if 0
+    }
+#endif
+  }
+  int i;
+  for (i = j; i < len; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < j; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+void HeapRegionSeq::iterate_from(int idx, HeapRegionClosure* blk) {
+  int len = _regions.length();
+  int i;
+  for (i = idx; i < len; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < idx; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+MemRegion HeapRegionSeq::shrink_by(size_t shrink_bytes,
+                                   size_t& num_regions_deleted) {
+  assert(shrink_bytes % os::vm_page_size() == 0, "unaligned");
+  assert(shrink_bytes % HeapRegion::GrainBytes == 0, "unaligned");
+
+  if (_regions.length() == 0) {
+    num_regions_deleted = 0;
+    return MemRegion();
+  }
+  int j = _regions.length() - 1;
+  HeapWord* end = _regions.at(j)->end();
+  HeapWord* last_start = end;
+  while (j >= 0 && shrink_bytes > 0) {
+    HeapRegion* cur = _regions.at(j);
+    // We have to leave humongous regions where they are,
+    // and work around them.
+    if (cur->isHumongous()) {
+      return MemRegion(last_start, end);
+    }
+    cur->reset_zero_fill();
+    assert(cur == _regions.top(), "Should be top");
+    if (!cur->is_empty()) break;
+    shrink_bytes -= cur->capacity();
+    num_regions_deleted++;
+    _regions.pop();
+    last_start = cur->bottom();
+    // We need to delete these somehow, but can't currently do so here: if
+    // we do, the ZF thread may still access the deleted region.  We'll
+    // leave this here as a reminder that we have to do something about
+    // this.
+    // delete cur;
+    j--;
+  }
+  return MemRegion(last_start, end);
+}
+
+
+class PrintHeapRegionClosure : public  HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    gclog_or_tty->print(PTR_FORMAT ":", r);
+    r->print();
+    return false;
+  }
+};
+
+void HeapRegionSeq::print() {
+  PrintHeapRegionClosure cl;
+  iterate(&cl);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class HeapRegionClosure;
+
+class HeapRegionSeq: public CHeapObj {
+
+  // _regions is kept sorted by start address order, and no two regions are
+  // overlapping.
+  GrowableArray<HeapRegion*> _regions;
+
+  // The index in "_regions" at which to start the next allocation search.
+  // (For efficiency only; private to obj_allocate after initialization.)
+  int _alloc_search_start;
+
+  // Attempts to allocate a block of the (assumed humongous) word_size,
+  // starting at the region "ind".
+  HeapWord* alloc_obj_from_region_index(int ind, size_t word_size);
+
+  // Currently, we're choosing collection sets in a round-robin fashion,
+  // starting here.
+  int _next_rr_candidate;
+
+  // The bottom address of the bottom-most region, or else NULL if there
+  // are no regions in the sequence.
+  char* _seq_bottom;
+
+ public:
+  // Initializes "this" to the empty sequence of regions.
+  HeapRegionSeq(const size_t max_size);
+
+  // Adds "hr" to "this" sequence.  Requires "hr" not to overlap with
+  // any region already in "this".  (Will perform better if regions are
+  // inserted in ascending address order.)
+  void insert(HeapRegion* hr);
+
+  // Given a HeapRegion*, returns its index within _regions,
+  // or returns -1 if not found.
+  int find(HeapRegion* hr);
+
+  // Requires the index to be valid, and return the region at the index.
+  HeapRegion* at(size_t i) { return _regions.at((int)i); }
+
+  // Return the number of regions in the sequence.
+  size_t length();
+
+  // Returns the number of contiguous regions at the end of the sequence
+  // that are available for allocation.
+  size_t free_suffix();
+
+  // Requires "word_size" to be humongous (in the technical sense).  If
+  // possible, allocates a contiguous subsequence of the heap regions to
+  // satisfy the allocation, and returns the address of the beginning of
+  // that sequence, otherwise returns NULL.
+  HeapWord* obj_allocate(size_t word_size);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // in address order, terminating the iteration early
+  // if the "doHeapRegion" method returns "true".
+  void iterate(HeapRegionClosure* blk);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // starting at "r" (or first region, if "r" is NULL), in a circular
+  // manner, terminating the iteration early if the "doHeapRegion" method
+  // returns "true".
+  void iterate_from(HeapRegion* r, HeapRegionClosure* blk);
+
+  // As above, but start from a given index in the sequence
+  // instead of a given heap region.
+  void iterate_from(int idx, HeapRegionClosure* blk);
+
+  // Requires "shrink_bytes" to be a multiple of the page size and heap
+  // region granularity.  Deletes as many "rightmost" completely free heap
+  // regions from the sequence as comprise shrink_bytes bytes.  Returns the
+  // MemRegion indicating the region those regions comprised, and sets
+  // "num_regions_deleted" to the number of regions deleted.
+  MemRegion shrink_by(size_t shrink_bytes, size_t& num_regions_deleted);
+
+  // If "addr" falls within a region in the sequence, return that region,
+  // or else NULL.
+  HeapRegion* addr_to_region(const void* addr);
+
+  void print();
+
+  // Prints out runs of empty regions.  If the arg is "true" reserved
+  // (popular regions are considered "empty".
+  void print_empty_runs(bool reserved_are_empty);
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapRegion* HeapRegionSeq::addr_to_region(const void* addr) {
+  assert(_seq_bottom != NULL, "bad _seq_bottom in addr_to_region");
+  if ((char*) addr >= _seq_bottom) {
+    size_t diff = (size_t) pointer_delta((HeapWord*) addr,
+                                         (HeapWord*) _seq_bottom);
+    int index = (int) (diff >> HeapRegion::LogOfHRGrainWords);
+    assert(index >= 0, "invariant / paranoia");
+    if (index < _regions.length()) {
+      HeapRegion* hr = _regions.at(index);
+      assert(hr->is_in_reserved(addr),
+             "addr_to_region is wrong...");
+      return hr;
+    }
+  }
+  return NULL;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_ptrQueue.cpp.incl"
+
+PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm) :
+  _qset(qset_), _buf(NULL), _index(0), _active(false),
+  _perm(perm), _lock(NULL)
+{}
+
+PtrQueue::~PtrQueue() {
+  if (!_perm && _buf != NULL) {
+    if (_index == _sz) {
+      // No work to do.
+      qset()->deallocate_buffer(_buf);
+    } else {
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < _index; i += oopSize) {
+        _buf[byte_index_to_index((int)i)] = NULL;
+      }
+      qset()->enqueue_complete_buffer(_buf);
+      _buf = NULL;
+    }
+  }
+}
+
+
+static int byte_index_to_index(int ind) {
+  assert((ind % oopSize) == 0, "Invariant.");
+  return ind / oopSize;
+}
+
+static int index_to_byte_index(int byte_ind) {
+  return byte_ind * oopSize;
+}
+
+void PtrQueue::enqueue_known_active(void* ptr) {
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  assert(_index == 0 || _buf != NULL, "invariant");
+
+  while (_index == 0) {
+    handle_zero_index();
+  }
+  assert(_index > 0, "postcondition");
+
+  _index -= oopSize;
+  _buf[byte_index_to_index((int)_index)] = ptr;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+}
+
+void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
+  assert(_lock->owned_by_self(), "Required.");
+  _lock->unlock();
+  qset()->enqueue_complete_buffer(buf);
+  // We must relock only because the caller will unlock, for the normal
+  // case.
+  _lock->lock_without_safepoint_check();
+}
+
+
+PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
+  _max_completed_queue(0),
+  _cbl_mon(NULL), _fl_lock(NULL),
+  _notify_when_complete(notify_when_complete),
+  _sz(0),
+  _completed_buffers_head(NULL),
+  _completed_buffers_tail(NULL),
+  _n_completed_buffers(0),
+  _process_completed_threshold(0), _process_completed(false),
+  _buf_free_list(NULL), _buf_free_list_sz(0)
+{}
+
+void** PtrQueueSet::allocate_buffer() {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_buf_free_list != NULL) {
+    void** res = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    _buf_free_list_sz--;
+    // Just override the next pointer with NULL, just in case we scan this part
+    // of the buffer.
+    res[0] = NULL;
+    return res;
+  } else {
+    return NEW_C_HEAP_ARRAY(void*, _sz);
+  }
+}
+
+void PtrQueueSet::deallocate_buffer(void** buf) {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  buf[0] = (void*)_buf_free_list;
+  _buf_free_list = buf;
+  _buf_free_list_sz++;
+}
+
+void PtrQueueSet::reduce_free_list() {
+  // For now we'll adopt the strategy of deleting half.
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  size_t n = _buf_free_list_sz / 2;
+  while (n > 0) {
+    assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
+    void** head = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    FREE_C_HEAP_ARRAY(void*,head);
+    n--;
+  }
+}
+
+void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) {
+  // I use explicit locking here because there's a bailout in the middle.
+  _cbl_mon->lock_without_safepoint_check();
+
+  Thread* thread = Thread::current();
+  assert( ignore_max_completed ||
+          thread->is_Java_thread() ||
+          SafepointSynchronize::is_at_safepoint(),
+          "invariant" );
+  ignore_max_completed = ignore_max_completed || !thread->is_Java_thread();
+
+  if (!ignore_max_completed && _max_completed_queue > 0 &&
+      _n_completed_buffers >= (size_t) _max_completed_queue) {
+    _cbl_mon->unlock();
+    bool b = mut_process_buffer(buf);
+    if (b) {
+      deallocate_buffer(buf);
+      return;
+    }
+
+    // Otherwise, go ahead and enqueue the buffer.  Must reaquire the lock.
+    _cbl_mon->lock_without_safepoint_check();
+  }
+
+  // Here we still hold the _cbl_mon.
+  CompletedBufferNode* cbn = new CompletedBufferNode;
+  cbn->buf = buf;
+  cbn->next = NULL;
+  cbn->index = index;
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = cbn;
+    _completed_buffers_tail = cbn;
+  } else {
+    _completed_buffers_tail->next = cbn;
+    _completed_buffers_tail = cbn;
+  }
+  _n_completed_buffers++;
+
+  if (!_process_completed &&
+      _n_completed_buffers == _process_completed_threshold) {
+    _process_completed = true;
+    if (_notify_when_complete)
+      _cbl_mon->notify_all();
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  _cbl_mon->unlock();
+}
+
+int PtrQueueSet::completed_buffers_list_length() {
+  int n = 0;
+  CompletedBufferNode* cbn = _completed_buffers_head;
+  while (cbn != NULL) {
+    n++;
+    cbn = cbn->next;
+  }
+  return n;
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct() {
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  assert_completed_buffer_list_len_correct_locked();
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() {
+  guarantee((size_t)completed_buffers_list_length() ==  _n_completed_buffers,
+            "Completed buffer length is wrong.");
+}
+
+void PtrQueueSet::set_buffer_size(size_t sz) {
+  assert(_sz == 0 && sz > 0, "Should be called only once.");
+  _sz = sz * oopSize;
+}
+
+void PtrQueueSet::set_process_completed_threshold(size_t sz) {
+  _process_completed_threshold = sz;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// There are various techniques that require threads to be able to log
+// addresses.  For example, a generational write barrier might log
+// the addresses of modified old-generation objects.  This type supports
+// this operation.
+
+class PtrQueueSet;
+
+class PtrQueue: public CHeapObj {
+
+protected:
+  // The ptr queue set to which this queue belongs.
+  PtrQueueSet* _qset;
+
+  // Whether updates should be logged.
+  bool _active;
+
+  // The buffer.
+  void** _buf;
+  // The index at which an object was last enqueued.  Starts at "_sz"
+  // (indicating an empty buffer) and goes towards zero.
+  size_t _index;
+
+  // The size of the buffer.
+  size_t _sz;
+
+  // If true, the queue is permanent, and doesn't need to deallocate
+  // its buffer in the destructor (since that obtains a lock which may not
+  // be legally locked by then.
+  bool _perm;
+
+  // If there is a lock associated with this buffer, this is that lock.
+  Mutex* _lock;
+
+  PtrQueueSet* qset() { return _qset; }
+
+public:
+  // Initialize this queue to contain a null buffer, and be part of the
+  // given PtrQueueSet.
+  PtrQueue(PtrQueueSet*, bool perm = false);
+  // Release any contained resources.
+  ~PtrQueue();
+
+  // Associate a lock with a ptr queue.
+  void set_lock(Mutex* lock) { _lock = lock; }
+
+  void reset() { if (_buf != NULL) _index = _sz; }
+
+  // Enqueues the given "obj".
+  void enqueue(void* ptr) {
+    if (!_active) return;
+    else enqueue_known_active(ptr);
+  }
+
+  inline void handle_zero_index();
+  void locking_enqueue_completed_buffer(void** buf);
+
+  void enqueue_known_active(void* ptr);
+
+  size_t size() {
+    assert(_sz >= _index, "Invariant.");
+    return _buf == NULL ? 0 : _sz - _index;
+  }
+
+  // Set the "active" property of the queue to "b".  An enqueue to an
+  // inactive thread is a no-op.  Setting a queue to inactive resets its
+  // log to the empty state.
+  void set_active(bool b) {
+    _active = b;
+    if (!b && _buf != NULL) {
+      _index = _sz;
+    } else if (b && _buf != NULL) {
+      assert(_index == _sz, "invariant: queues are empty when activated.");
+    }
+  }
+
+  static int byte_index_to_index(int ind) {
+    assert((ind % oopSize) == 0, "Invariant.");
+    return ind / oopSize;
+  }
+
+  static int index_to_byte_index(int byte_ind) {
+    return byte_ind * oopSize;
+  }
+
+  // To support compiler.
+  static ByteSize byte_offset_of_index() {
+    return byte_offset_of(PtrQueue, _index);
+  }
+  static ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }
+
+  static ByteSize byte_offset_of_buf() {
+    return byte_offset_of(PtrQueue, _buf);
+  }
+  static ByteSize byte_width_of_buf() { return in_ByteSize(sizeof(void*)); }
+
+  static ByteSize byte_offset_of_active() {
+    return byte_offset_of(PtrQueue, _active);
+  }
+  static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); }
+
+};
+
+// A PtrQueueSet represents resources common to a set of pointer queues.
+// In particular, the individual queues allocate buffers from this shared
+// set, and return completed buffers to the set.
+// All these variables are are protected by the TLOQ_CBL_mon. XXX ???
+class PtrQueueSet: public CHeapObj {
+
+protected:
+
+  class CompletedBufferNode: public CHeapObj {
+  public:
+    void** buf;
+    size_t index;
+    CompletedBufferNode* next;
+    CompletedBufferNode() : buf(NULL),
+      index(0), next(NULL){ }
+  };
+
+  Monitor* _cbl_mon;  // Protects the fields below.
+  CompletedBufferNode* _completed_buffers_head;
+  CompletedBufferNode* _completed_buffers_tail;
+  size_t _n_completed_buffers;
+  size_t _process_completed_threshold;
+  volatile bool _process_completed;
+
+  // This (and the interpretation of the first element as a "next"
+  // pointer) are protected by the TLOQ_FL_lock.
+  Mutex* _fl_lock;
+  void** _buf_free_list;
+  size_t _buf_free_list_sz;
+
+  // The size of all buffers in the set.
+  size_t _sz;
+
+  bool _all_active;
+
+  // If true, notify_all on _cbl_mon when the threshold is reached.
+  bool _notify_when_complete;
+
+  // Maximum number of elements allowed on completed queue: after that,
+  // enqueuer does the work itself.  Zero indicates no maximum.
+  int _max_completed_queue;
+
+  int completed_buffers_list_length();
+  void assert_completed_buffer_list_len_correct_locked();
+  void assert_completed_buffer_list_len_correct();
+
+protected:
+  // A mutator thread does the the work of processing a buffer.
+  // Returns "true" iff the work is complete (and the buffer may be
+  // deallocated).
+  virtual bool mut_process_buffer(void** buf) {
+    ShouldNotReachHere();
+    return false;
+  }
+
+public:
+  // Create an empty ptr queue set.
+  PtrQueueSet(bool notify_when_complete = false);
+
+  // Because of init-order concerns, we can't pass these as constructor
+  // arguments.
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0) {
+    _max_completed_queue = max_completed_queue;
+    assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
+    _cbl_mon = cbl_mon; _fl_lock = fl_lock;
+  }
+
+  // Return an empty oop array of size _sz (required to be non-zero).
+  void** allocate_buffer();
+
+  // Return an empty buffer to the free list.  The "buf" argument is
+  // required to be a pointer to the head of an array of length "_sz".
+  void deallocate_buffer(void** buf);
+
+  // Declares that "buf" is a complete buffer.
+  void enqueue_complete_buffer(void** buf, size_t index = 0,
+                               bool ignore_max_completed = false);
+
+  bool completed_buffers_exist_dirty() {
+    return _n_completed_buffers > 0;
+  }
+
+  bool process_completed_buffers() { return _process_completed; }
+
+  bool active() { return _all_active; }
+
+  // Set the buffer size.  Should be called before any "enqueue" operation
+  // can be called.  And should only be called once.
+  void set_buffer_size(size_t sz);
+
+  // Get the buffer size.
+  size_t buffer_size() { return _sz; }
+
+  // Set the number of completed buffers that triggers log processing.
+  void set_process_completed_threshold(size_t sz);
+
+  // Must only be called at a safe point.  Indicates that the buffer free
+  // list size may be reduced, if that is deemed desirable.
+  void reduce_free_list();
+
+  size_t completed_buffers_num() { return _n_completed_buffers; }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+void PtrQueue::handle_zero_index() {
+  assert(0 == _index, "Precondition.");
+  // This thread records the full buffer and allocates a new one (while
+  // holding the lock if there is one).
+  void** buf = _buf;
+  _buf = qset()->allocate_buffer();
+  _sz = qset()->buffer_size();
+  _index = _sz;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  if (buf != NULL) {
+    if (_lock) {
+      locking_enqueue_completed_buffer(buf);
+    } else {
+      qset()->enqueue_complete_buffer(buf);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_satbQueue.cpp.incl"
+
+void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
+    _index = _sz;
+  }
+}
+
+void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl,
+                                          void** buf, size_t index, size_t sz) {
+  if (cl == NULL) return;
+  for (size_t i = index; i < sz; i += oopSize) {
+    oop obj = (oop)buf[byte_index_to_index((int)i)];
+    // There can be NULL entries because of destructors.
+    if (obj != NULL) {
+      cl->do_object(obj);
+    }
+  }
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+SATBMarkQueueSet::SATBMarkQueueSet() :
+  PtrQueueSet(),
+  _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/)
+{}
+
+void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                  int max_completed_queue,
+                                  Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  _shared_satb_queue.set_lock(lock);
+  if (ParallelGCThreads > 0) {
+    _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads);
+  }
+}
+
+
+void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->satb_mark_queue().handle_zero_index();
+}
+
+void SATBMarkQueueSet::set_active_all_threads(bool b) {
+  _all_active = b;
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().set_active(b);
+  }
+}
+
+void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
+  _closure = closure;
+}
+
+void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) {
+  assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition");
+  _par_closures[i] = par_closure;
+}
+
+void SATBMarkQueueSet::iterate_closure_all_threads() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(_closure);
+  }
+  shared_satb_queue()->apply_closure(_closure);
+}
+
+void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
+  SharedHeap* sh = SharedHeap::heap();
+  int parity = sh->strong_roots_parity();
+
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    if (t->claim_oops_do(true, parity)) {
+      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+    }
+  }
+  // We'll have worker 0 do this one.
+  if (worker == 0) {
+    shared_satb_queue()->apply_closure(_par_closures[0]);
+  }
+}
+
+bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
+                                                              int worker) {
+  CompletedBufferNode* nd = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    if (_completed_buffers_head != NULL) {
+      nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL;
+      _n_completed_buffers--;
+      if (_n_completed_buffers == 0) _process_completed = false;
+    }
+  }
+  ObjectClosure* cl = (par ? _par_closures[worker] : _closure);
+  if (nd != NULL) {
+    ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz);
+    deallocate_buffer(nd->buf);
+    delete nd;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void SATBMarkQueueSet::abandon_partial_marking() {
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _completed_buffers_tail = NULL;
+    _n_completed_buffers = 0;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  // So we can safely manipulate these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().reset();
+  }
+  shared_satb_queue()->reset();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ObjectClosure;
+class JavaThread;
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class ObjPtrQueue: public PtrQueue {
+public:
+  ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {}
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
+};
+
+
+
+class SATBMarkQueueSet: public PtrQueueSet {
+  ObjectClosure* _closure;
+  ObjectClosure** _par_closures;  // One per ParGCThread.
+
+  ObjPtrQueue _shared_satb_queue;
+
+  // Utility function to support sequential and parallel versions.  If
+  // "par" is true, then "worker" is the par thread id; if "false", worker
+  // is ignored.
+  bool apply_closure_to_completed_buffer_work(bool par, int worker);
+
+
+public:
+  SATBMarkQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Apply "set_active(b)" to all thread tloq's.  Should be called only
+  // with the world stopped.
+  void set_active_all_threads(bool b);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(ObjectClosure* closure);
+  // Set the parallel closures: pointer is an array of pointers to
+  // closures, one for each parallel GC thread.
+  void set_par_closure(int i, ObjectClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)
+  void iterate_closure_all_threads();
+  // Parallel version of the above.
+  void par_iterate_closure_all_threads(int worker);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, and return true.  If no
+  // completed buffers exist, return false.
+  bool apply_closure_to_completed_buffer() {
+    return apply_closure_to_completed_buffer_work(false, 0);
+  }
+  // Parallel version of the above.
+  bool par_apply_closure_to_completed_buffer(int worker) {
+    return apply_closure_to_completed_buffer_work(true, worker);
+  }
+
+  ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
+
+  // If a marking is being abandoned, reset any unprocessed log buffers.
+  void abandon_partial_marking();
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,530 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_sparsePRT.cpp.incl"
+
+#define SPARSE_PRT_VERBOSE 0
+
+#define UNROLL_CARD_LOOPS 1
+
+void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) {
+    sprt_iter->init(this);
+}
+
+void SparsePRTEntry::init(short region_ind) {
+  _region_ind = region_ind;
+  _next_index = NullEntry;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  _cards[0] = NullEntry;
+  _cards[1] = NullEntry;
+  _cards[2] = NullEntry;
+  _cards[3] = NullEntry;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry;
+#endif
+}
+
+bool SparsePRTEntry::contains_card(short card_index) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] == card_index) return true;
+  if (_cards[1] == card_index) return true;
+  if (_cards[2] == card_index) return true;
+  if (_cards[3] == card_index) return true;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] == card_index) return true;
+  }
+#endif
+  // Otherwise, we're full.
+  return false;
+}
+
+int SparsePRTEntry::num_valid_cards() const {
+  int sum = 0;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] != NullEntry) sum++;
+  if (_cards[1] != NullEntry) sum++;
+  if (_cards[2] != NullEntry) sum++;
+  if (_cards[3] != NullEntry) sum++;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] != NulLEntry) sum++;
+  }
+#endif
+  // Otherwise, we're full.
+  return sum;
+}
+
+SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  short c = _cards[0];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[0] = card_index; return added; }
+  c = _cards[1];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[1] = card_index; return added; }
+  c = _cards[2];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[2] = card_index; return added; }
+  c = _cards[3];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[3] = card_index; return added; }
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    short c = _cards[i];
+    if (c == card_index) return found;
+    if (c == NullEntry) { _cards[i] = card_index; return added; }
+  }
+#endif
+  // Otherwise, we're full.
+  return overflow;
+}
+
+void SparsePRTEntry::copy_cards(short* cards) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  cards[0] = _cards[0];
+  cards[1] = _cards[1];
+  cards[2] = _cards[2];
+  cards[3] = _cards[3];
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    cards[i] = _cards[i];
+  }
+#endif
+}
+
+void SparsePRTEntry::copy_cards(SparsePRTEntry* e) const {
+  copy_cards(&e->_cards[0]);
+}
+
+// ----------------------------------------------------------------------
+
+RSHashTable::RSHashTable(size_t capacity) :
+  _capacity(capacity), _capacity_mask(capacity-1),
+  _occupied_entries(0), _occupied_cards(0),
+  _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)),
+  _buckets(NEW_C_HEAP_ARRAY(short, capacity)),
+  _next_deleted(NULL), _deleted(false),
+  _free_list(NullEntry), _free_region(0)
+{
+  clear();
+}
+
+RSHashTable::~RSHashTable() {
+  if (_entries != NULL) {
+    FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries);
+    _entries = NULL;
+  }
+  if (_buckets != NULL) {
+    FREE_C_HEAP_ARRAY(short, _buckets);
+    _buckets = NULL;
+  }
+}
+
+void RSHashTable::clear() {
+  _occupied_entries = 0;
+  _occupied_cards = 0;
+  guarantee(_entries != NULL, "INV");
+  guarantee(_buckets != NULL, "INV");
+  // This will put -1 == NullEntry in the key field of all entries.
+  memset(_entries, -1, _capacity * sizeof(SparsePRTEntry));
+  memset(_buckets, -1, _capacity * sizeof(short));
+  _free_list = NullEntry;
+  _free_region = 0;
+}
+
+bool RSHashTable::add_card(short region_ind, short card_index) {
+  SparsePRTEntry* e = entry_for_region_ind_create(region_ind);
+  assert(e != NULL && e->r_ind() == region_ind,
+         "Postcondition of call above.");
+  SparsePRTEntry::AddCardResult res = e->add_card(card_index);
+  if (res == SparsePRTEntry::added) _occupied_cards++;
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("       after add_card[%d]: valid-cards = %d.",
+                pointer_delta(e, _entries, sizeof(SparsePRTEntry)),
+                e->num_valid_cards());
+#endif
+  assert(e->num_valid_cards() > 0, "Postcondition");
+  return res != SparsePRTEntry::overflow;
+}
+
+bool RSHashTable::get_cards(short region_ind, short* cards) {
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise...
+  assert(cur->r_ind() == region_ind, "Postcondition of loop + test above.");
+  assert(cur->num_valid_cards() > 0, "Inv");
+  cur->copy_cards(cards);
+  return true;
+}
+
+bool RSHashTable::delete_entry(short region_ind) {
+  short ind = (short) (region_ind & capacity_mask());
+  short* prev_loc = &_buckets[ind];
+  short cur_ind = *prev_loc;
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    prev_loc = cur->next_index_addr();
+    cur_ind = *prev_loc;
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise, splice out "cur".
+  *prev_loc = cur->next_index();
+  _occupied_cards -= cur->num_valid_cards();
+  free_entry(cur_ind);
+  _occupied_entries--;
+  return true;
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const {
+  assert(occupied_entries() < capacity(), "Precondition");
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  // XXX
+  // int k = 0;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    /*
+    k++;
+    if (k > 10) {
+      gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): "
+                    "k = %d, cur_ind = %d.", region_ind, k, cur_ind);
+      if (k >= 1000) {
+        while (1) ;
+      }
+    }
+    */
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind != NullEntry) {
+    assert(cur->r_ind() == region_ind, "Loop postcondition + test");
+    return cur;
+  } else {
+    return NULL;
+  }
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) {
+  SparsePRTEntry* res = entry_for_region_ind(region_ind);
+  if (res == NULL) {
+    short new_ind = alloc_entry();
+    assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room.");
+    res = entry(new_ind);
+    res->init(region_ind);
+    // Insert at front.
+    short ind = (short) (region_ind & capacity_mask());
+    res->set_next_index(_buckets[ind]);
+    _buckets[ind] = new_ind;
+    _occupied_entries++;
+  }
+  return res;
+}
+
+short RSHashTable::alloc_entry() {
+  short res;
+  if (_free_list != NullEntry) {
+    res = _free_list;
+    _free_list = entry(res)->next_index();
+    return res;
+  } else if ((size_t) _free_region+1 < capacity()) {
+    res = _free_region;
+    _free_region++;
+    return res;
+  } else {
+    return NullEntry;
+  }
+}
+
+
+void RSHashTable::free_entry(short fi) {
+  entry(fi)->set_next_index(_free_list);
+  _free_list = fi;
+}
+
+
+void RSHashTable::add_entry(SparsePRTEntry* e) {
+  assert(e->num_valid_cards() > 0, "Precondition.");
+  SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind());
+  e->copy_cards(e2);
+  _occupied_cards += e2->num_valid_cards();
+  assert(e2->num_valid_cards() > 0, "Postcondition.");
+}
+
+RSHashTable* RSHashTable::_head_deleted_list = NULL;
+
+void RSHashTable::add_to_deleted_list(RSHashTable* rsht) {
+  assert(!rsht->deleted(), "Should delete only once.");
+  rsht->set_deleted(true);
+  RSHashTable* hd = _head_deleted_list;
+  while (true) {
+    rsht->_next_deleted = hd;
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(rsht, &_head_deleted_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+RSHashTable* RSHashTable::get_from_deleted_list() {
+  RSHashTable* hd = _head_deleted_list;
+  while (hd != NULL) {
+    RSHashTable* next = hd->next_deleted();
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(next, &_head_deleted_list, hd);
+    if (res == hd) {
+      hd->set_next_deleted(NULL);
+      hd->set_deleted(false);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() {
+  short res;
+  while (_bl_ind != RSHashTable::NullEntry) {
+    res = _rsht->entry(_bl_ind)->card(0);
+    if (res != SparsePRTEntry::NullEntry) {
+      return res;
+    } else {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+    }
+  }
+  // Otherwise, none found:
+  return SparsePRTEntry::NullEntry;
+}
+
+size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) {
+  return
+    _heap_bot_card_ind
+    + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion)
+    + ci;
+}
+
+bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) {
+  _card_ind++;
+  short ci;
+  if (_card_ind < SparsePRTEntry::CardsPerEntry &&
+      ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) !=
+       SparsePRTEntry::NullEntry)) {
+    card_index = compute_card_ind(ci);
+    return true;
+  }
+  // Otherwise, must find the next valid entry.
+  _card_ind = 0;
+
+  if (_bl_ind != RSHashTable::NullEntry) {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+      ci = find_first_card_in_list();
+      if (ci != SparsePRTEntry::NullEntry) {
+        card_index = compute_card_ind(ci);
+        return true;
+      }
+  }
+  // If we didn't return above, must go to the next non-null table index.
+  _tbl_ind++;
+  while ((size_t)_tbl_ind < _rsht->capacity()) {
+    _bl_ind = _rsht->_buckets[_tbl_ind];
+    ci = find_first_card_in_list();
+    if (ci != SparsePRTEntry::NullEntry) {
+      card_index = compute_card_ind(ci);
+      return true;
+    }
+    // Otherwise, try next entry.
+    _tbl_ind++;
+  }
+  // Otherwise, there were no entry.
+  return false;
+}
+
+bool RSHashTable::contains_card(short region_index, short card_index) const {
+  SparsePRTEntry* e = entry_for_region_ind(region_index);
+  return (e != NULL && e->contains_card(card_index));
+}
+
+size_t RSHashTable::mem_size() const {
+  return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short));
+}
+
+
+// ----------------------------------------------------------------------
+
+SparsePRT* SparsePRT::_head_expanded_list = NULL;
+
+void SparsePRT::add_to_expanded_list(SparsePRT* sprt) {
+  // We could expand multiple times in a pause -- only put on list once.
+  if (sprt->expanded()) return;
+  sprt->set_expanded(true);
+  SparsePRT* hd = _head_expanded_list;
+  while (true) {
+    sprt->_next_expanded = hd;
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(sprt, &_head_expanded_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+SparsePRT* SparsePRT::get_from_expanded_list() {
+  SparsePRT* hd = _head_expanded_list;
+  while (hd != NULL) {
+    SparsePRT* next = hd->next_expanded();
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(next, &_head_expanded_list, hd);
+    if (res == hd) {
+      hd->set_next_expanded(NULL);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+
+void SparsePRT::cleanup_all() {
+  // First clean up all expanded tables so they agree on next and cur.
+  SparsePRT* sprt = get_from_expanded_list();
+  while (sprt != NULL) {
+    sprt->cleanup();
+    sprt = get_from_expanded_list();
+  }
+  // Now delete all deleted RSHashTables.
+  RSHashTable* rsht = RSHashTable::get_from_deleted_list();
+  while (rsht != NULL) {
+#if SPARSE_PRT_VERBOSE
+    gclog_or_tty->print_cr("About to delete RSHT " PTR_FORMAT ".", rsht);
+#endif
+    delete rsht;
+    rsht = RSHashTable::get_from_deleted_list();
+  }
+}
+
+
+SparsePRT::SparsePRT(HeapRegion* hr) :
+  _expanded(false), _next_expanded(NULL)
+{
+  _cur = new RSHashTable(InitialCapacity);
+  _next = _cur;
+}
+
+SparsePRT::~SparsePRT() {
+  assert(_next != NULL && _cur != NULL, "Inv");
+  if (_cur != _next) { delete _cur; }
+  delete _next;
+}
+
+
+size_t SparsePRT::mem_size() const {
+  // We ignore "_cur" here, because it either = _next, or else it is
+  // on the deleted list.
+  return sizeof(this) + _next->mem_size();
+}
+
+bool SparsePRT::add_card(short region_id, short card_index) {
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Adding card %d from region %d to region %d sparse.",
+                card_index, region_id, _hr->hrs_index());
+#endif
+  if (_next->occupied_entries() * 2 > _next->capacity()) {
+    expand();
+  }
+  return _next->add_card(region_id, card_index);
+}
+
+bool SparsePRT::get_cards(short region_id, short* cards) {
+  return _next->get_cards(region_id, cards);
+}
+
+bool SparsePRT::delete_entry(short region_id) {
+  return _next->delete_entry(region_id);
+}
+
+void SparsePRT::clear() {
+  // If they differ, _next is bigger then cur, so next has no chance of
+  // being the initial size.
+  if (_next != _cur) {
+    delete _next;
+  }
+
+  if (_cur->capacity() != InitialCapacity) {
+    delete _cur;
+    _cur = new RSHashTable(InitialCapacity);
+  } else {
+    _cur->clear();
+  }
+  _next = _cur;
+}
+
+void SparsePRT::cleanup() {
+  // Make sure that the current and next tables agree.  (Another mechanism
+  // takes care of deleting now-unused tables.)
+  _cur = _next;
+}
+
+void SparsePRT::expand() {
+  RSHashTable* last = _next;
+  _next = new RSHashTable(last->capacity() * 2);
+
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Expanded sparse table for %d to %d.",
+                _hr->hrs_index(), _next->capacity());
+#endif
+  for (size_t i = 0; i < last->capacity(); i++) {
+    SparsePRTEntry* e = last->entry((int)i);
+    if (e->valid_entry()) {
+#if SPARSE_PRT_VERBOSE
+      gclog_or_tty->print_cr("    During expansion, transferred entry for %d.",
+                    e->r_ind());
+#endif
+      _next->add_entry(e);
+    }
+  }
+  if (last != _cur)
+    RSHashTable::add_to_deleted_list(last);
+  add_to_expanded_list(this);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Sparse remembered set for a heap region (the "owning" region).  Maps
+// indices of other regions to short sequences of cards in the other region
+// that might contain pointers into the owner region.
+
+// These tables only expand while they are accessed in parallel --
+// deletions may be done in single-threaded code.  This allows us to allow
+// unsynchronized reads/iterations, as long as expansions caused by
+// insertions only enqueue old versions for deletions, but do not delete
+// old versions synchronously.
+
+
+class SparsePRTEntry {
+public:
+  enum SomePublicConstants {
+    CardsPerEntry = (short)4,
+    NullEntry = (short)-1,
+    DeletedEntry = (short)-2
+  };
+
+private:
+  short _region_ind;
+  short _next_index;
+  short _cards[CardsPerEntry];
+
+public:
+
+  // Set the region_ind to the given value, and delete all cards.
+  inline void init(short region_ind);
+
+  short r_ind() const { return _region_ind; }
+  bool valid_entry() const { return r_ind() >= 0; }
+  void set_r_ind(short rind) { _region_ind = rind; }
+
+  short next_index() const { return _next_index; }
+  short* next_index_addr() { return &_next_index; }
+  void set_next_index(short ni) { _next_index = ni; }
+
+  // Returns "true" iff the entry contains the given card index.
+  inline bool contains_card(short card_index) const;
+
+  // Returns the number of non-NULL card entries.
+  inline int num_valid_cards() const;
+
+  // Requires that the entry not contain the given card index.  If there is
+  // space available, add the given card index to the entry and return
+  // "true"; otherwise, return "false" to indicate that the entry is full.
+  enum AddCardResult {
+    overflow,
+    found,
+    added
+  };
+  inline AddCardResult add_card(short card_index);
+
+  // Copy the current entry's cards into "cards".
+  inline void copy_cards(short* cards) const;
+  // Copy the current entry's cards into the "_card" array of "e."
+  inline void copy_cards(SparsePRTEntry* e) const;
+
+  inline short card(int i) const { return _cards[i]; }
+};
+
+
+class RSHashTable : public CHeapObj {
+
+  friend class RSHashTableIter;
+
+  enum SomePrivateConstants {
+    NullEntry = -1
+  };
+
+  size_t _capacity;
+  size_t _capacity_mask;
+  size_t _occupied_entries;
+  size_t _occupied_cards;
+
+  SparsePRTEntry* _entries;
+  short* _buckets;
+  short  _free_region;
+  short  _free_list;
+
+  static RSHashTable* _head_deleted_list;
+  RSHashTable* _next_deleted;
+  RSHashTable* next_deleted() { return _next_deleted; }
+  void set_next_deleted(RSHashTable* rsht) { _next_deleted = rsht; }
+  bool _deleted;
+  void set_deleted(bool b) { _deleted = b; }
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise returns "NULL."
+  SparsePRTEntry* entry_for_region_ind(short region_ind) const;
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise allocates, initializes, inserts and
+  // returns a new entry for "region_ind".
+  SparsePRTEntry* entry_for_region_ind_create(short region_ind);
+
+  // Returns the index of the next free entry in "_entries".
+  short alloc_entry();
+  // Declares the entry "fi" to be free.  (It must have already been
+  // deleted from any bucket lists.
+  void free_entry(short fi);
+
+public:
+  RSHashTable(size_t capacity);
+  ~RSHashTable();
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  bool get_cards(short region_id, short* cards);
+  bool delete_entry(short region_id);
+
+  bool contains_card(short region_id, short card_index) const;
+
+  void add_entry(SparsePRTEntry* e);
+
+  void clear();
+
+  size_t capacity() const      { return _capacity;       }
+  size_t capacity_mask() const { return _capacity_mask;  }
+  size_t occupied_entries() const { return _occupied_entries; }
+  size_t occupied_cards() const   { return _occupied_cards;   }
+  size_t mem_size() const;
+  bool deleted() { return _deleted; }
+
+  SparsePRTEntry* entry(int i) const { return &_entries[i]; }
+
+  void print();
+
+  static void add_to_deleted_list(RSHashTable* rsht);
+  static RSHashTable* get_from_deleted_list();
+
+
+};
+
+  // ValueObj because will be embedded in HRRS iterator.
+class RSHashTableIter: public CHeapObj {
+    short _tbl_ind;
+    short _bl_ind;
+    short _card_ind;
+    RSHashTable* _rsht;
+    size_t _heap_bot_card_ind;
+
+    enum SomePrivateConstants {
+      CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+    };
+
+    // If the bucket list pointed to by _bl_ind contains a card, sets
+    // _bl_ind to the index of that entry, and returns the card.
+    // Otherwise, returns SparseEntry::NullEnty.
+    short find_first_card_in_list();
+    // Computes the proper card index for the card whose offset in the
+    // current region (as indicated by _bl_ind) is "ci".
+    // This is subject to errors when there is iteration concurrent with
+    // modification, but these errors should be benign.
+    size_t compute_card_ind(short ci);
+
+  public:
+    RSHashTableIter(size_t heap_bot_card_ind) :
+      _tbl_ind(RSHashTable::NullEntry),
+      _bl_ind(RSHashTable::NullEntry),
+      _card_ind((SparsePRTEntry::CardsPerEntry-1)),
+      _rsht(NULL),
+      _heap_bot_card_ind(heap_bot_card_ind)
+    {}
+
+    void init(RSHashTable* rsht) {
+      _rsht = rsht;
+      _tbl_ind = -1; // So that first increment gets to 0.
+      _bl_ind = RSHashTable::NullEntry;
+      _card_ind = (SparsePRTEntry::CardsPerEntry-1);
+    }
+
+    bool has_next(size_t& card_index);
+
+  };
+
+// Concurrent accesss to a SparsePRT must be serialized by some external
+// mutex.
+
+class SparsePRTIter;
+
+class SparsePRT : public CHeapObj {
+  //  Iterations are done on the _cur hash table, since they only need to
+  //  see entries visible at the start of a collection pause.
+  //  All other operations are done using the _next hash table.
+  RSHashTable* _cur;
+  RSHashTable* _next;
+
+  HeapRegion* _hr;
+
+  enum SomeAdditionalPrivateConstants {
+    InitialCapacity = 16
+  };
+
+  void expand();
+
+  bool _expanded;
+
+  bool expanded() { return _expanded; }
+  void set_expanded(bool b) { _expanded = b; }
+
+  SparsePRT* _next_expanded;
+
+  SparsePRT* next_expanded() { return _next_expanded; }
+  void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
+
+
+  static SparsePRT* _head_expanded_list;
+
+public:
+  SparsePRT(HeapRegion* hr);
+
+  ~SparsePRT();
+
+  size_t occupied() const { return _next->occupied_cards(); }
+  size_t mem_size() const;
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  // If the table hold an entry for "region_ind",  Copies its
+  // cards into "cards", which must be an array of length at least
+  // "CardsPerEntry", and returns "true"; otherwise, returns "false".
+  bool get_cards(short region_ind, short* cards);
+
+  // If there is an entry for "region_ind", removes it and return "true";
+  // otherwise returns "false."
+  bool delete_entry(short region_ind);
+
+  // Clear the table, and reinitialize to initial capacity.
+  void clear();
+
+  // Ensure that "_cur" and "_next" point to the same table.
+  void cleanup();
+
+  // Clean up all tables on the expanded list.  Called single threaded.
+  static void cleanup_all();
+  RSHashTable* next() const { return _next; }
+
+
+  void init_iterator(SparsePRTIter* sprt_iter);
+
+  static void add_to_expanded_list(SparsePRT* sprt);
+  static SparsePRT* get_from_expanded_list();
+
+  bool contains_card(short region_id, short card_index) const {
+    return _next->contains_card(region_id, card_index);
+  }
+
+#if 0
+  void verify_is_cleared();
+  void print();
+#endif
+};
+
+
+class SparsePRTIter: public /* RSHashTable:: */RSHashTableIter {
+public:
+  SparsePRTIter(size_t heap_bot_card_ind) :
+    /* RSHashTable:: */RSHashTableIter(heap_bot_card_ind)
+  {}
+
+  void init(const SparsePRT* sprt) {
+    RSHashTableIter::init(sprt->next());
+  }
+  bool has_next(size_t& card_index) {
+    return RSHashTableIter::has_next(card_index);
+  }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_survRateGroup.cpp.incl"
+
+SurvRateGroup::SurvRateGroup(G1CollectorPolicy* g1p,
+                             const char* name,
+                             size_t summary_surv_rates_len) :
+    _g1p(g1p), _name(name),
+    _all_regions_allocated(0),
+    _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0),
+    _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL),
+    _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0),
+    _summary_surv_rates_len(summary_surv_rates_len),
+    _summary_surv_rates_max_len(0),
+    _summary_surv_rates(NULL) {
+
+  // the following will set up the arrays with length 1
+  _curr_length = 1;
+  stop_adding_regions();
+  guarantee( _array_length == 1, "invariant" );
+  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
+  _surv_rate_pred[0]->add(0.4);
+  all_surviving_words_recorded(false);
+  _curr_length = 0;
+
+  if (summary_surv_rates_len > 0) {
+    size_t length = summary_surv_rates_len;
+      _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
+    if (_summary_surv_rates == NULL) {
+      vm_exit_out_of_memory(sizeof(NumberSeq*) * length,
+                            "Not enough space for surv rate summary");
+    }
+    for (size_t i = 0; i < length; ++i)
+      _summary_surv_rates[i] = new NumberSeq();
+  }
+
+  start_adding_regions();
+}
+
+void
+SurvRateGroup::start_adding_regions() {
+  _setup_seq_num   = _array_length;
+  _curr_length     = _scan_only_prefix;
+  _accum_surv_rate = 0.0;
+
+#if 0
+  gclog_or_tty->print_cr("start adding regions, seq num %d, length %d",
+                         _setup_seq_num, _curr_length);
+#endif // 0
+}
+
+void
+SurvRateGroup::stop_adding_regions() {
+  size_t length = _curr_length;
+
+#if 0
+  gclog_or_tty->print_cr("stop adding regions, length %d", length);
+#endif // 0
+
+  if (length > _array_length) {
+    double* old_surv_rate = _surv_rate;
+    double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
+    TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;
+
+    _surv_rate = NEW_C_HEAP_ARRAY(double, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                            "Not enough space for surv rate array.");
+    }
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length);
+    if (_accum_surv_rate_pred == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                         "Not enough space for accum surv rate pred array.");
+    }
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length,
+                            "Not enough space for surv rate pred array.");
+    }
+
+    for (size_t i = 0; i < _array_length; ++i)
+      _surv_rate_pred[i] = old_surv_rate_pred[i];
+
+#if 0
+    gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d",
+                  _array_length, length - 1);
+#endif // 0
+
+    for (size_t i = _array_length; i < length; ++i) {
+      _surv_rate_pred[i] = new TruncatedSeq(10);
+      // _surv_rate_pred[i]->add(last_pred);
+    }
+
+    _array_length = length;
+
+    if (old_surv_rate != NULL)
+      FREE_C_HEAP_ARRAY(double, old_surv_rate);
+    if (old_accum_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred);
+    if (old_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred);
+  }
+
+  for (size_t i = 0; i < _array_length; ++i)
+    _surv_rate[i] = 0.0;
+}
+
+double
+SurvRateGroup::accum_surv_rate(size_t adjustment) {
+  // we might relax this one in the future...
+  guarantee( adjustment == 0 || adjustment == 1, "pre-condition" );
+
+  double ret = _accum_surv_rate;
+  if (adjustment > 0) {
+    TruncatedSeq* seq = get_seq(_curr_length+1);
+    double surv_rate = _g1p->get_new_prediction(seq);
+    ret += surv_rate;
+  }
+
+  return ret;
+}
+
+int
+SurvRateGroup::next_age_index() {
+  TruncatedSeq* seq = get_seq(_curr_length);
+  double surv_rate = _g1p->get_new_prediction(seq);
+  _accum_surv_rate += surv_rate;
+
+  ++_curr_length;
+  return (int) ++_all_regions_allocated;
+}
+
+void
+SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) {
+  guarantee( scan_only_prefix <= _curr_length, "pre-condition" );
+  _scan_only_prefix = scan_only_prefix;
+}
+
+void
+SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) {
+  guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length,
+             "pre-condition" );
+  guarantee( _surv_rate[age_in_group] <= 0.00001,
+             "should only update each slot once" );
+
+  double surv_rate = (double) surv_words / (double) HeapRegion::GrainWords;
+  _surv_rate[age_in_group] = surv_rate;
+  _surv_rate_pred[age_in_group]->add(surv_rate);
+  if ((size_t)age_in_group < _summary_surv_rates_len) {
+    _summary_surv_rates[age_in_group]->add(surv_rate);
+    if ((size_t)(age_in_group+1) > _summary_surv_rates_max_len)
+      _summary_surv_rates_max_len = age_in_group+1;
+  }
+}
+
+void
+SurvRateGroup::all_surviving_words_recorded(bool propagate) {
+  if (propagate && _curr_length > 0) { // conservative
+    double surv_rate = _surv_rate_pred[_curr_length-1]->last();
+
+#if 0
+    gclog_or_tty->print_cr("propagating %1.2lf from %d to %d",
+                  surv_rate, _curr_length, _array_length - 1);
+#endif // 0
+
+    for (size_t i = _curr_length; i < _array_length; ++i) {
+      guarantee( _surv_rate[i] <= 0.00001,
+                 "the slot should not have been updated" );
+      _surv_rate_pred[i]->add(surv_rate);
+    }
+  }
+
+  double accum = 0.0;
+  double pred = 0.0;
+  for (size_t i = 0; i < _array_length; ++i) {
+    pred = _g1p->get_new_prediction(_surv_rate_pred[i]);
+    if (pred > 1.0) pred = 1.0;
+    accum += pred;
+    _accum_surv_rate_pred[i] = accum;
+    // gclog_or_tty->print_cr("age %3d, accum %10.2lf", i, accum);
+  }
+  _last_pred = pred;
+}
+
+#ifndef PRODUCT
+void
+SurvRateGroup::print() {
+  gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)",
+                _name, _curr_length, _scan_only_prefix);
+  for (size_t i = 0; i < _curr_length; ++i) {
+    gclog_or_tty->print_cr("    age %4d   surv rate %6.2lf %%   pred %6.2lf %%%s",
+                  i, _surv_rate[i] * 100.0,
+                  _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
+                  (i < _scan_only_prefix) ? " S-O" : "    ");
+  }
+}
+
+void
+SurvRateGroup::print_surv_rate_summary() {
+  size_t length = _summary_surv_rates_max_len;
+  if (length == 0)
+    return;
+
+  gclog_or_tty->print_cr("");
+  gclog_or_tty->print_cr("%s Rate Summary (for up to age %d)", _name, length-1);
+  gclog_or_tty->print_cr("      age range     survival rate (avg)      samples (avg)");
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  size_t index = 0;
+  size_t limit = MIN2((int) length, 10);
+  while (index < limit) {
+    gclog_or_tty->print_cr("           %4d                 %6.2lf%%             %6.2lf",
+                  index, _summary_surv_rates[index]->avg() * 100.0,
+                  (double) _summary_surv_rates[index]->num());
+    ++index;
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  int num = 0;
+  double sum = 0.0;
+  int samples = 0;
+  while (index < length) {
+    ++num;
+    sum += _summary_surv_rates[index]->avg() * 100.0;
+    samples += _summary_surv_rates[index]->num();
+    ++index;
+
+    if (index == length || num % 10 == 0) {
+      gclog_or_tty->print_cr("   %4d .. %4d                 %6.2lf%%             %6.2lf",
+                    (index-1) / 10 * 10, index-1, sum / (double) num,
+                    (double) samples / (double) num);
+      sum = 0.0;
+      num = 0;
+      samples = 0;
+    }
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+}
+#endif // PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectorPolicy;
+
+class SurvRateGroup : public CHeapObj {
+private:
+  G1CollectorPolicy* _g1p;
+  const char* _name;
+
+  size_t  _array_length;
+  double* _surv_rate;
+  double* _accum_surv_rate_pred;
+  double  _last_pred;
+  double  _accum_surv_rate;
+  TruncatedSeq** _surv_rate_pred;
+  NumberSeq**    _summary_surv_rates;
+  size_t         _summary_surv_rates_len;
+  size_t         _summary_surv_rates_max_len;
+
+  int _all_regions_allocated;
+  size_t _curr_length;
+  size_t _scan_only_prefix;
+  size_t _setup_seq_num;
+
+public:
+  SurvRateGroup(G1CollectorPolicy* g1p,
+                const char* name,
+                size_t summary_surv_rates_len);
+  void start_adding_regions();
+  void stop_adding_regions();
+  void record_scan_only_prefix(size_t scan_only_prefix);
+  void record_surviving_words(int age_in_group, size_t surv_words);
+  void all_surviving_words_recorded(bool propagate);
+  const char* name() { return _name; }
+
+  size_t region_num() { return _curr_length; }
+  size_t scan_only_length() { return _scan_only_prefix; }
+  double accum_surv_rate_pred(int age) {
+    assert(age >= 0, "must be");
+    if ((size_t)age < _array_length)
+      return _accum_surv_rate_pred[age];
+    else {
+      double diff = (double) (age - _array_length + 1);
+      return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred;
+    }
+  }
+
+  double accum_surv_rate(size_t adjustment);
+
+  TruncatedSeq* get_seq(size_t age) {
+    guarantee( 0 <= age, "pre-condition" );
+    if (age >= _setup_seq_num) {
+      guarantee( _setup_seq_num > 0, "invariant" );
+      age = _setup_seq_num-1;
+    }
+    TruncatedSeq* seq = _surv_rate_pred[age];
+    guarantee( seq != NULL, "invariant" );
+    return seq;
+  }
+
+  int next_age_index();
+  int age_in_group(int age_index) {
+    int ret = (int) (_all_regions_allocated -  age_index);
+    assert( ret >= 0, "invariant" );
+    return ret;
+  }
+  int recalculate_age_index(int age_index) {
+    int new_age_index = (int) _scan_only_prefix - age_in_group(age_index);
+    guarantee( new_age_index >= 0, "invariant" );
+    return new_age_index;
+  }
+  void finished_recalculating_age_indexes() {
+    _all_regions_allocated = (int) _scan_only_prefix;
+  }
+
+#ifndef PRODUCT
+  void print();
+  void print_surv_rate_summary();
+#endif // PRODUCT
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vm_operations_g1.cpp.incl"
+
+void VM_G1CollectForAllocation::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _res = g1h->satisfy_failed_allocation(_size);
+  assert(g1h->is_in_or_null(_res), "result not in heap");
+}
+
+void VM_G1CollectFull::doit() {
+  JvmtiGCFullMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, _gc_cause);
+  g1h->do_full_collection(false /* clear_all_soft_refs */);
+}
+
+void VM_G1IncCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause);
+  g1h->do_collection_pause_at_safepoint(NULL);
+}
+
+void VM_G1PopRegionCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  g1h->do_collection_pause_at_safepoint(_pop_region);
+}
+
+
+void VM_CGC_Operation::doit() {
+  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+  TraceTime t(_printGCMessage, PrintGC, true, gclog_or_tty);
+  SharedHeap* sh = SharedHeap::heap();
+  // This could go away if CollectedHeap gave access to _gc_is_active...
+  if (sh != NULL) {
+    IsGCActiveMark x;
+    _cl->do_void();
+  } else {
+    _cl->do_void();
+  }
+}
+
+bool VM_CGC_Operation::doit_prologue() {
+  Heap_lock->lock();
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true;
+  return true;
+}
+
+void VM_CGC_Operation::doit_epilogue() {
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false;
+  Heap_lock->unlock();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// VM_operations for the G1 collector.
+// VM_GC_Operation:
+//   - VM_CGC_Operation
+//   - VM_G1CollectFull
+//   - VM_G1CollectForAllocation
+//   - VM_G1IncCollectionPause
+//   - VM_G1PopRegionCollectionPause
+
+class VM_G1CollectFull: public VM_GC_Operation {
+ private:
+ public:
+  VM_G1CollectFull(int gc_count_before,
+                   GCCause::Cause gc_cause)
+    : VM_GC_Operation(gc_count_before)
+  {
+    _gc_cause = gc_cause;
+  }
+  ~VM_G1CollectFull() {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectFull; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "full garbage-first collection";
+  }
+};
+
+class VM_G1CollectForAllocation: public VM_GC_Operation {
+ private:
+  HeapWord*   _res;
+  size_t      _size;                       // size of object to be allocated
+ public:
+  VM_G1CollectForAllocation(size_t size, int gc_count_before)
+    : VM_GC_Operation(gc_count_before) {
+    _size        = size;
+    _res         = NULL;
+  }
+  ~VM_G1CollectForAllocation()        {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first collection to satisfy allocation";
+  }
+  HeapWord* result() { return _res; }
+};
+
+class VM_G1IncCollectionPause: public VM_GC_Operation {
+ public:
+  VM_G1IncCollectionPause(int gc_count_before) :
+    VM_GC_Operation(gc_count_before) {}
+  virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first incremental collection pause";
+  }
+};
+
+class VM_G1PopRegionCollectionPause: public VM_GC_Operation {
+  HeapRegion* _pop_region;
+ public:
+  VM_G1PopRegionCollectionPause(int gc_count_before, HeapRegion* pop_region) :
+    VM_GC_Operation(gc_count_before),
+    _pop_region(pop_region)
+  {}
+  virtual VMOp_Type type() const { return VMOp_G1PopRegionCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first popular region collection pause";
+  }
+};
+
+// Concurrent GC stop-the-world operations such as initial and final mark;
+// consider sharing these with CMS's counterparts.
+class VM_CGC_Operation: public VM_Operation {
+  VoidClosure* _cl;
+  const char* _printGCMessage;
+ public:
+  VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg) :
+    _cl(cl),
+    _printGCMessage(printGCMsg)
+    {}
+
+  ~VM_CGC_Operation() {}
+
+  virtual VMOp_Type type() const { return VMOp_CGC_Operation; }
+  virtual void doit();
+  virtual bool doit_prologue();
+  virtual void doit_epilogue();
+  virtual const char* name() const {
+    return "concurrent gc";
+  }
+};
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Jul 05 16:43:15 2017 +0200
@@ -125,17 +125,6 @@
 
 compactingPermGenGen.cpp                concurrentMarkSweepGeneration.inline.hpp
 
-concurrentGCThread.cpp                  concurrentGCThread.hpp
-concurrentGCThread.cpp                  init.hpp
-concurrentGCThread.cpp                  instanceRefKlass.hpp
-concurrentGCThread.cpp                  interfaceSupport.hpp
-concurrentGCThread.cpp                  java.hpp
-concurrentGCThread.cpp                  javaCalls.hpp
-concurrentGCThread.cpp                  oop.inline.hpp
-concurrentGCThread.cpp                  systemDictionary.hpp
-
-concurrentGCThread.hpp                  thread.hpp
-
 concurrentMarkSweepGeneration.cpp       cardTableRS.hpp
 concurrentMarkSweepGeneration.cpp       cmsAdaptiveSizePolicy.hpp
 concurrentMarkSweepGeneration.cpp       cmsCollectorPolicy.hpp
@@ -167,7 +156,7 @@
 concurrentMarkSweepGeneration.cpp       vmCMSOperations.hpp
 concurrentMarkSweepGeneration.cpp       vmThread.hpp
 
-concurrentMarkSweepGeneration.hpp       bitMap.hpp
+concurrentMarkSweepGeneration.hpp       bitMap.inline.hpp
 concurrentMarkSweepGeneration.hpp       freeBlockDictionary.hpp
 concurrentMarkSweepGeneration.hpp       gSpaceCounters.hpp
 concurrentMarkSweepGeneration.hpp       gcStats.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,351 @@
+//
+// Copyright 2004-2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+
+// NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps!
+
+bufferingOopClosure.hpp			genOopClosures.hpp
+bufferingOopClosure.hpp			generation.hpp
+bufferingOopClosure.hpp			os.hpp
+
+cardTableRS.cpp				concurrentMark.hpp
+cardTableRS.cpp				g1SATBCardTableModRefBS.hpp
+
+collectionSetChooser.cpp		g1CollectedHeap.hpp
+collectionSetChooser.cpp		g1CollectorPolicy.hpp
+collectionSetChooser.cpp		collectionSetChooser.hpp
+
+collectionSetChooser.hpp		heapRegion.hpp
+collectionSetChooser.hpp                growableArray.hpp
+
+concurrentG1Refine.cpp			atomic.hpp
+concurrentG1Refine.cpp			concurrentG1Refine.hpp
+concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
+concurrentG1Refine.cpp			copy.hpp
+concurrentG1Refine.cpp			g1CollectedHeap.hpp
+concurrentG1Refine.cpp			g1RemSet.hpp
+
+concurrentG1Refine.hpp			globalDefinitions.hpp
+
+concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
+concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
+concurrentG1RefineThread.cpp		g1CollectedHeap.hpp
+concurrentG1RefineThread.cpp            g1CollectorPolicy.hpp
+concurrentG1RefineThread.cpp		handles.inline.hpp
+concurrentG1RefineThread.cpp		mutexLocker.hpp
+concurrentG1RefineThread.cpp		resourceArea.hpp
+
+concurrentG1RefineThread.hpp		concurrentGCThread.hpp
+concurrentG1RefineThread.hpp		coTracker.hpp
+
+concurrentMark.cpp			concurrentMark.hpp
+concurrentMark.cpp			concurrentMarkThread.inline.hpp
+concurrentMark.cpp			g1CollectedHeap.inline.hpp
+concurrentMark.cpp                      g1CollectorPolicy.hpp
+concurrentMark.cpp                      g1RemSet.hpp
+concurrentMark.cpp		        gcOverheadReporter.hpp
+concurrentMark.cpp		        genOopClosures.inline.hpp
+concurrentMark.cpp                      heapRegionRemSet.hpp
+concurrentMark.cpp                      heapRegionSeq.inline.hpp
+concurrentMark.cpp                      handles.inline.hpp
+concurrentMark.cpp			java.hpp
+concurrentMark.cpp			oop.inline.hpp
+concurrentMark.cpp                      referencePolicy.hpp
+concurrentMark.cpp			resourceArea.hpp
+concurrentMark.cpp			symbolTable.hpp
+
+concurrentMark.hpp			coTracker.hpp
+concurrentMark.hpp			heapRegion.hpp
+concurrentMark.hpp			taskqueue.hpp
+
+concurrentMarkThread.cpp		concurrentMarkThread.inline.hpp
+concurrentMarkThread.cpp		g1CollectedHeap.inline.hpp
+concurrentMarkThread.cpp		g1CollectorPolicy.hpp
+concurrentMarkThread.cpp                g1MMUTracker.hpp
+concurrentMarkThread.cpp		resourceArea.hpp
+concurrentMarkThread.cpp		vm_operations_g1.hpp
+concurrentMarkThread.cpp                vmThread.hpp
+
+concurrentMarkThread.hpp		concurrentGCThread.hpp
+
+concurrentMarkThread.inline.hpp		concurrentMark.hpp
+concurrentMarkThread.inline.hpp		concurrentMarkThread.hpp
+
+concurrentZFThread.cpp			concurrentZFThread.hpp
+concurrentZFThread.cpp			heapRegion.hpp
+concurrentZFThread.cpp			g1CollectedHeap.inline.hpp
+concurrentZFThread.cpp			copy.hpp
+concurrentZFThread.cpp			mutexLocker.hpp
+concurrentZFThread.cpp			space.inline.hpp
+
+concurrentZFThread.hpp			concurrentGCThread.hpp
+concurrentZFThread.hpp			coTracker.hpp
+
+dirtyCardQueue.cpp                      atomic.hpp
+dirtyCardQueue.cpp                      dirtyCardQueue.hpp
+dirtyCardQueue.cpp			heapRegionRemSet.hpp
+dirtyCardQueue.cpp                      mutexLocker.hpp
+dirtyCardQueue.cpp                      ptrQueue.inline.hpp
+dirtyCardQueue.cpp                      safepoint.hpp
+dirtyCardQueue.cpp                      thread.hpp
+dirtyCardQueue.cpp                      thread_<os_family>.inline.hpp
+dirtyCardQueue.cpp                      workgroup.hpp
+
+dirtyCardQueue.hpp                      allocation.hpp
+dirtyCardQueue.hpp                      ptrQueue.hpp
+
+g1BlockOffsetTable.cpp			g1BlockOffsetTable.inline.hpp
+g1BlockOffsetTable.cpp			java.hpp
+g1BlockOffsetTable.cpp			oop.inline.hpp
+g1BlockOffsetTable.cpp			space.hpp
+
+g1BlockOffsetTable.hpp			globalDefinitions.hpp
+g1BlockOffsetTable.hpp			memRegion.hpp
+g1BlockOffsetTable.hpp			virtualspace.hpp
+
+g1BlockOffsetTable.inline.hpp		g1BlockOffsetTable.hpp
+g1BlockOffsetTable.inline.hpp		space.hpp
+
+g1CollectedHeap.cpp                     aprofiler.hpp
+g1CollectedHeap.cpp                     bufferingOopClosure.hpp
+g1CollectedHeap.cpp                     concurrentG1Refine.hpp
+g1CollectedHeap.cpp                     concurrentG1RefineThread.hpp
+g1CollectedHeap.cpp			concurrentMarkThread.inline.hpp
+g1CollectedHeap.cpp                     concurrentZFThread.hpp
+g1CollectedHeap.cpp                     g1CollectedHeap.inline.hpp
+g1CollectedHeap.cpp                     g1CollectorPolicy.hpp
+g1CollectedHeap.cpp                     g1MarkSweep.hpp
+g1CollectedHeap.cpp                     g1RemSet.hpp
+g1CollectedHeap.cpp                     g1OopClosures.inline.hpp
+g1CollectedHeap.cpp                     genOopClosures.inline.hpp
+g1CollectedHeap.cpp                     gcLocker.inline.hpp
+g1CollectedHeap.cpp                     gcOverheadReporter.hpp
+g1CollectedHeap.cpp                     generationSpec.hpp
+g1CollectedHeap.cpp                     heapRegionRemSet.hpp
+g1CollectedHeap.cpp                     heapRegionSeq.inline.hpp
+g1CollectedHeap.cpp                     icBuffer.hpp
+g1CollectedHeap.cpp                     isGCActiveMark.hpp
+g1CollectedHeap.cpp			oop.inline.hpp
+g1CollectedHeap.cpp			oop.pcgc.inline.hpp
+g1CollectedHeap.cpp			parGCAllocBuffer.hpp
+g1CollectedHeap.cpp                     vm_operations_g1.hpp
+g1CollectedHeap.cpp                     vmThread.hpp
+
+g1CollectedHeap.hpp                     barrierSet.hpp
+g1CollectedHeap.hpp                     heapRegion.hpp
+g1CollectedHeap.hpp                     memRegion.hpp
+g1CollectedHeap.hpp                     sharedHeap.hpp
+
+g1CollectedHeap.inline.hpp              concurrentMark.hpp
+g1CollectedHeap.inline.hpp              g1CollectedHeap.hpp
+g1CollectedHeap.inline.hpp              heapRegionSeq.hpp
+g1CollectedHeap.inline.hpp		taskqueue.hpp
+
+g1CollectorPolicy.cpp			concurrentG1Refine.hpp
+g1CollectorPolicy.cpp			concurrentMark.hpp
+g1CollectorPolicy.cpp			concurrentMarkThread.inline.hpp
+g1CollectorPolicy.cpp			debug.hpp
+g1CollectorPolicy.cpp			java.hpp
+g1CollectorPolicy.cpp                   g1CollectedHeap.hpp
+g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
+g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
+g1CollectorPolicy.cpp			mutexLocker.hpp
+
+g1CollectorPolicy.hpp                   collectorPolicy.hpp
+g1CollectorPolicy.hpp                   collectionSetChooser.hpp
+g1CollectorPolicy.hpp			g1MMUTracker.hpp
+
+g1_globals.cpp				g1_globals.hpp
+
+g1_globals.hpp                          globals.hpp
+
+globals.cpp                             g1_globals.hpp
+top.hpp                                 g1_globals.hpp
+
+g1MarkSweep.cpp                         aprofiler.hpp
+g1MarkSweep.cpp                         biasedLocking.hpp
+g1MarkSweep.cpp                         codeCache.hpp
+g1MarkSweep.cpp                         events.hpp
+g1MarkSweep.cpp                         fprofiler.hpp
+g1MarkSweep.hpp                         g1CollectedHeap.hpp
+g1MarkSweep.cpp                         g1MarkSweep.hpp
+g1MarkSweep.cpp                         gcLocker.hpp
+g1MarkSweep.cpp                         genCollectedHeap.hpp
+g1MarkSweep.hpp                         heapRegion.hpp
+g1MarkSweep.cpp                         icBuffer.hpp
+g1MarkSweep.cpp                         instanceRefKlass.hpp
+g1MarkSweep.cpp                         javaClasses.hpp
+g1MarkSweep.cpp				jvmtiExport.hpp
+g1MarkSweep.cpp                         copy.hpp
+g1MarkSweep.cpp                         modRefBarrierSet.hpp
+g1MarkSweep.cpp                         oop.inline.hpp
+g1MarkSweep.cpp                         referencePolicy.hpp
+g1MarkSweep.cpp                         space.hpp
+g1MarkSweep.cpp                         symbolTable.hpp
+g1MarkSweep.cpp                         synchronizer.hpp
+g1MarkSweep.cpp                         systemDictionary.hpp
+g1MarkSweep.cpp                         thread.hpp
+g1MarkSweep.cpp                         vmSymbols.hpp
+g1MarkSweep.cpp                         vmThread.hpp
+
+g1MarkSweep.hpp                         generation.hpp
+g1MarkSweep.hpp                         growableArray.hpp
+g1MarkSweep.hpp                         markOop.hpp
+g1MarkSweep.hpp                         genMarkSweep.hpp
+g1MarkSweep.hpp                         oop.hpp
+g1MarkSweep.hpp                         timer.hpp
+g1MarkSweep.hpp                         universe.hpp
+
+g1OopClosures.inline.hpp		concurrentMark.hpp
+g1OopClosures.inline.hpp		g1OopClosures.hpp
+g1OopClosures.inline.hpp		g1CollectedHeap.hpp
+g1OopClosures.inline.hpp		g1RemSet.hpp
+
+g1MMUTracker.cpp			g1MMUTracker.hpp
+g1MMUTracker.cpp			ostream.hpp
+g1MMUTracker.cpp			mutexLocker.hpp
+
+g1MMUTracker.hpp			debug.hpp
+
+g1RemSet.cpp				bufferingOopClosure.hpp
+g1RemSet.cpp				concurrentG1Refine.hpp
+g1RemSet.cpp				concurrentG1RefineThread.hpp
+g1RemSet.cpp				g1BlockOffsetTable.inline.hpp
+g1RemSet.cpp				g1CollectedHeap.inline.hpp
+g1RemSet.cpp				g1CollectorPolicy.hpp
+g1RemSet.cpp				g1RemSet.inline.hpp
+g1RemSet.cpp				g1OopClosures.inline.hpp
+g1RemSet.cpp				heapRegionSeq.inline.hpp
+g1RemSet.cpp				intHisto.hpp
+g1RemSet.cpp				iterator.hpp
+g1RemSet.cpp				oop.inline.hpp
+
+g1RemSet.inline.hpp			g1RemSet.hpp
+g1RemSet.inline.hpp			heapRegionRemSet.hpp
+
+g1SATBCardTableModRefBS.cpp		g1SATBCardTableModRefBS.hpp
+g1SATBCardTableModRefBS.cpp		heapRegion.hpp
+g1SATBCardTableModRefBS.cpp		mutexLocker.hpp
+g1SATBCardTableModRefBS.cpp		thread.hpp
+g1SATBCardTableModRefBS.cpp		thread_<os_family>.inline.hpp
+g1SATBCardTableModRefBS.cpp		satbQueue.hpp
+
+g1SATBCardTableModRefBS.hpp		cardTableModRefBS.hpp
+g1SATBCardTableModRefBS.hpp		memRegion.hpp
+
+heapRegion.cpp                          concurrentZFThread.hpp
+heapRegion.cpp                          g1BlockOffsetTable.inline.hpp
+heapRegion.cpp                          g1CollectedHeap.inline.hpp
+heapRegion.cpp                          g1OopClosures.inline.hpp
+heapRegion.cpp                          genOopClosures.inline.hpp
+heapRegion.cpp                          heapRegion.inline.hpp
+heapRegion.cpp                          heapRegionRemSet.hpp
+heapRegion.cpp                          heapRegionSeq.inline.hpp
+heapRegion.cpp                          iterator.hpp
+heapRegion.cpp                          oop.inline.hpp
+
+heapRegion.hpp                          space.hpp
+heapRegion.hpp                          spaceDecorator.hpp
+heapRegion.hpp                          g1BlockOffsetTable.inline.hpp
+heapRegion.hpp                          watermark.hpp
+heapRegion.hpp				g1_specialized_oop_closures.hpp
+heapRegion.hpp				survRateGroup.hpp
+
+heapRegionRemSet.hpp			sparsePRT.hpp
+
+heapRegionRemSet.cpp                    allocation.hpp
+heapRegionRemSet.cpp                    bitMap.inline.hpp
+heapRegionRemSet.cpp                    g1BlockOffsetTable.inline.hpp
+heapRegionRemSet.cpp                    g1CollectedHeap.inline.hpp
+heapRegionRemSet.cpp                    heapRegionRemSet.hpp
+heapRegionRemSet.cpp			heapRegionSeq.inline.hpp
+heapRegionRemSet.cpp                    globalDefinitions.hpp
+heapRegionRemSet.cpp                    space.inline.hpp
+
+heapRegionSeq.cpp                       allocation.hpp
+heapRegionSeq.cpp                       g1CollectedHeap.hpp
+heapRegionSeq.cpp                       heapRegionSeq.hpp
+
+heapRegionSeq.hpp                       growableArray.hpp
+heapRegionSeq.hpp                       heapRegion.hpp
+
+heapRegionSeq.inline.hpp                heapRegionSeq.hpp
+
+klass.hpp				g1OopClosures.hpp
+
+ptrQueue.cpp                            allocation.hpp
+ptrQueue.cpp                            allocation.inline.hpp
+ptrQueue.cpp                            mutex.hpp
+ptrQueue.cpp                            mutexLocker.hpp
+ptrQueue.cpp                            ptrQueue.hpp
+ptrQueue.cpp                            ptrQueue.inline.hpp
+ptrQueue.cpp                            thread_<os_family>.inline.hpp
+
+ptrQueue.hpp                            allocation.hpp
+ptrQueue.hpp                            sizes.hpp
+
+ptrQueue.inline.hpp                     ptrQueue.hpp
+
+satbQueue.cpp                           allocation.inline.hpp
+satbQueue.cpp                           mutexLocker.hpp
+satbQueue.cpp                           ptrQueue.inline.hpp
+satbQueue.cpp                           satbQueue.hpp
+satbQueue.cpp                           sharedHeap.hpp
+satbQueue.cpp                           thread.hpp
+
+satbQueue.hpp                           ptrQueue.hpp
+
+sparsePRT.cpp				allocation.inline.hpp
+sparsePRT.cpp				cardTableModRefBS.hpp
+sparsePRT.cpp				heapRegion.hpp
+sparsePRT.cpp				heapRegionRemSet.hpp
+sparsePRT.cpp				mutexLocker.hpp
+sparsePRT.cpp				sparsePRT.hpp
+sparsePRT.cpp				space.inline.hpp
+
+sparsePRT.hpp				allocation.hpp
+sparsePRT.hpp				cardTableModRefBS.hpp
+sparsePRT.hpp				globalDefinitions.hpp
+sparsePRT.hpp				heapRegion.hpp
+sparsePRT.hpp				mutex.hpp
+
+specialized_oop_closures.hpp		g1_specialized_oop_closures.hpp
+
+survRateGroup.hpp			numberSeq.hpp
+
+survRateGroup.cpp			allocation.hpp
+survRateGroup.cpp			g1CollectedHeap.hpp
+survRateGroup.cpp			g1CollectorPolicy.hpp
+survRateGroup.cpp			heapRegion.hpp
+survRateGroup.cpp			survRateGroup.hpp
+
+thread.cpp				concurrentMarkThread.inline.hpp
+
+universe.cpp                            g1CollectedHeap.hpp
+universe.cpp                            g1CollectorPolicy.hpp
+
+vm_operations_g1.hpp			vmGCOperations.hpp
+
+vm_operations_g1.cpp			vm_operations_g1.hpp
+vm_operations_g1.cpp                    g1CollectedHeap.hpp
+vm_operations_g1.cpp                    isGCActiveMark.hpp
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Wed Jul 05 16:43:15 2017 +0200
@@ -150,7 +150,6 @@
 parallelScavengeHeap.hpp                psYoungGen.hpp
 parallelScavengeHeap.hpp                ostream.hpp
 
-parMarkBitMap.cpp			bitMap.hpp
 parMarkBitMap.cpp			bitMap.inline.hpp
 parMarkBitMap.cpp			oop.inline.hpp
 parMarkBitMap.cpp			os.hpp
@@ -159,7 +158,6 @@
 parMarkBitMap.cpp			parMarkBitMap.inline.hpp
 parMarkBitMap.cpp                       psParallelCompact.hpp
 
-parMarkBitMap.hpp			bitMap.hpp
 parMarkBitMap.hpp			bitMap.inline.hpp
 parMarkBitMap.hpp			psVirtualspace.hpp
 
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared	Wed Jul 05 16:43:15 2017 +0200
@@ -24,6 +24,23 @@
 
 // NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps!
 
+concurrentGCThread.cpp                  concurrentGCThread.hpp
+concurrentGCThread.cpp                  init.hpp
+concurrentGCThread.cpp                  instanceRefKlass.hpp
+concurrentGCThread.cpp                  interfaceSupport.hpp
+concurrentGCThread.cpp                  java.hpp
+concurrentGCThread.cpp                  javaCalls.hpp
+concurrentGCThread.cpp                  oop.inline.hpp
+concurrentGCThread.cpp                  systemDictionary.hpp
+
+concurrentGCThread.hpp                  thread.hpp
+
+coTracker.hpp                           globalDefinitions.hpp
+coTracker.hpp                           numberSeq.hpp
+
+coTracker.cpp                           coTracker.hpp
+coTracker.cpp                           os.hpp
+
 allocationStats.cpp                     allocationStats.hpp
 allocationStats.cpp                     ostream.hpp
 
@@ -37,6 +54,13 @@
 gcAdaptivePolicyCounters.cpp            resourceArea.hpp
 gcAdaptivePolicyCounters.cpp            gcAdaptivePolicyCounters.hpp
 
+gcOverheadReporter.cpp                  allocation.inline.hpp
+gcOverheadReporter.cpp                  concurrentGCThread.hpp
+gcOverheadReporter.cpp                  coTracker.hpp
+gcOverheadReporter.cpp                  gcOverheadReporter.hpp
+gcOverheadReporter.cpp                  ostream.hpp
+gcOverheadReporter.cpp                  thread_<os_family>.inline.hpp
+
 gSpaceCounters.cpp                      generation.hpp
 gSpaceCounters.cpp                      resourceArea.hpp
 gSpaceCounters.cpp                      gSpaceCounters.hpp
@@ -75,3 +99,5 @@
 spaceCounters.hpp                       mutableSpace.hpp
 spaceCounters.hpp                       perfData.hpp
 spaceCounters.hpp                       generationCounters.hpp
+
+vmGCOperations.cpp                      g1CollectedHeap.hpp
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -192,16 +192,16 @@
 };
 
 inline ParMarkBitMap::ParMarkBitMap():
-  _beg_bits(NULL, 0),
-  _end_bits(NULL, 0)
+  _beg_bits(),
+  _end_bits()
 {
   _region_start = 0;
   _virtual_space = 0;
 }
 
 inline ParMarkBitMap::ParMarkBitMap(MemRegion covered_region):
-  _beg_bits(NULL, 0),
-  _end_bits(NULL, 0)
+  _beg_bits(),
+  _end_bits()
 {
   initialize(covered_region);
 }
@@ -325,7 +325,7 @@
 
 inline size_t ParMarkBitMap::obj_size(idx_t beg_bit) const
 {
-  const idx_t end_bit = _end_bits.find_next_one_bit(beg_bit, size());
+  const idx_t end_bit = _end_bits.get_next_one_offset_inline(beg_bit, size());
   assert(is_marked(beg_bit), "obj not marked");
   assert(end_bit < size(), "end bit missing");
   return obj_size(beg_bit, end_bit);
@@ -384,13 +384,13 @@
 inline ParMarkBitMap::idx_t
 ParMarkBitMap::find_obj_beg(idx_t beg, idx_t end) const
 {
-  return _beg_bits.find_next_one_bit(beg, end);
+  return _beg_bits.get_next_one_offset_inline_aligned_right(beg, end);
 }
 
 inline ParMarkBitMap::idx_t
 ParMarkBitMap::find_obj_end(idx_t beg, idx_t end) const
 {
-  return _end_bits.find_next_one_bit(beg, end);
+  return _end_bits.get_next_one_offset_inline_aligned_right(beg, end);
 }
 
 inline HeapWord*
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -210,10 +210,6 @@
   PSScavenge::initialize();
   if (UseParallelOldGC) {
     PSParallelCompact::post_initialize();
-    if (VerifyParallelOldWithMarkSweep) {
-      // Will be used for verification of par old.
-      PSMarkSweep::initialize();
-    }
   } else {
     PSMarkSweep::initialize();
   }
@@ -402,7 +398,7 @@
         return result;
       }
       if (!is_tlab &&
-          size >= (young_gen()->eden_space()->capacity_in_words() / 2)) {
+          size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) {
         result = old_gen()->allocate(size, is_tlab);
         if (result != NULL) {
           return result;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -184,6 +184,20 @@
   size_t tlab_capacity(Thread* thr) const;
   size_t unsafe_max_tlab_alloc(Thread* thr) const;
 
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    return true;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    return true;
+  }
+
   void oop_iterate(OopClosure* cl);
   void object_iterate(ObjectClosure* cl);
   void permanent_oop_iterate(OopClosure* cl);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -146,7 +146,7 @@
 {
   ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array();
+  RegionTaskQueueSet* qset = ParCompactionManager::region_array();
   ParallelTaskTerminator terminator(parallel_gc_threads, qset);
   GCTaskQueue* q = GCTaskQueue::create();
   for(uint i=0; i<parallel_gc_threads; i++) {
@@ -205,38 +205,38 @@
 }
 
 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //
 
 
-StealChunkCompactionTask::StealChunkCompactionTask(ParallelTaskTerminator* t) :
-  _terminator(t) {};
+StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t):
+  _terminator(t) {}
 
-void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {
+void StealRegionCompactionTask::do_it(GCTaskManager* manager, uint which) {
   assert(Universe::heap()->is_gc_active(), "called outside gc");
 
-  NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask",
+  NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask",
     PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));
 
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
-  // Has to drain stacks first because there may be chunks on
+  // Has to drain stacks first because there may be regions on
   // preloaded onto the stack and this thread may never have
   // done a draining task.  Are the draining tasks needed?
 
-  cm->drain_chunk_stacks();
+  cm->drain_region_stacks();
 
-  size_t chunk_index = 0;
+  size_t region_index = 0;
   int random_seed = 17;
 
   // If we're the termination task, try 10 rounds of stealing before
   // setting the termination flag
 
   while(true) {
-    if (ParCompactionManager::steal(which, &random_seed, chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(cm, chunk_index);
-      cm->drain_chunk_stacks();
+    if (ParCompactionManager::steal(which, &random_seed, region_index)) {
+      PSParallelCompact::fill_and_update_region(cm, region_index);
+      cm->drain_region_stacks();
     } else {
       if (terminator()->offer_termination()) {
         break;
@@ -249,11 +249,10 @@
 
 UpdateDensePrefixTask::UpdateDensePrefixTask(
                                    PSParallelCompact::SpaceId space_id,
-                                   size_t chunk_index_start,
-                                   size_t chunk_index_end) :
-  _space_id(space_id), _chunk_index_start(chunk_index_start),
-  _chunk_index_end(chunk_index_end)
-{}
+                                   size_t region_index_start,
+                                   size_t region_index_end) :
+  _space_id(space_id), _region_index_start(region_index_start),
+  _region_index_end(region_index_end) {}
 
 void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
 
@@ -265,8 +264,8 @@
 
   PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
                                                          _space_id,
-                                                         _chunk_index_start,
-                                                         _chunk_index_end);
+                                                         _region_index_start,
+                                                         _region_index_end);
 }
 
 void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
@@ -278,6 +277,6 @@
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
-  // Process any chunks already in the compaction managers stacks.
-  cm->drain_chunk_stacks();
+  // Process any regions already in the compaction managers stacks.
+  cm->drain_region_stacks();
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -188,18 +188,18 @@
 };
 
 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //
 // This task is used to distribute work to idle threads.
 //
 
-class StealChunkCompactionTask : public GCTask {
+class StealRegionCompactionTask : public GCTask {
  private:
    ParallelTaskTerminator* const _terminator;
  public:
-  StealChunkCompactionTask(ParallelTaskTerminator* t);
+  StealRegionCompactionTask(ParallelTaskTerminator* t);
 
-  char* name() { return (char *)"steal-chunk-task"; }
+  char* name() { return (char *)"steal-region-task"; }
   ParallelTaskTerminator* terminator() { return _terminator; }
 
   virtual void do_it(GCTaskManager* manager, uint which);
@@ -215,15 +215,15 @@
 class UpdateDensePrefixTask : public GCTask {
  private:
   PSParallelCompact::SpaceId _space_id;
-  size_t _chunk_index_start;
-  size_t _chunk_index_end;
+  size_t _region_index_start;
+  size_t _region_index_end;
 
  public:
   char* name() { return (char *)"update-dense_prefix-task"; }
 
   UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id,
-                        size_t chunk_index_start,
-                        size_t chunk_index_end);
+                        size_t region_index_start,
+                        size_t region_index_end);
 
   virtual void do_it(GCTaskManager* manager, uint which);
 };
@@ -231,17 +231,17 @@
 //
 // DrainStacksCompactionTask
 //
-// This task processes chunks that have been added to the stacks of each
+// This task processes regions that have been added to the stacks of each
 // compaction manager.
 //
 // Trying to use one draining thread does not work because there are no
 // guarantees about which task will be picked up by which thread.  For example,
-// if thread A gets all the preloaded chunks, thread A may not get a draining
+// if thread A gets all the preloaded regions, thread A may not get a draining
 // task (they may all be done by other threads).
 //
 
 class DrainStacksCompactionTask : public GCTask {
  public:
-  char* name() { return (char *)"drain-chunk-task"; }
+  char* name() { return (char *)"drain-region-task"; }
   virtual void do_it(GCTaskManager* manager, uint which);
 };
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -30,7 +30,7 @@
 OopTaskQueueSet*     ParCompactionManager::_stack_array = NULL;
 ObjectStartArray*    ParCompactionManager::_start_array = NULL;
 ParMarkBitMap*       ParCompactionManager::_mark_bitmap = NULL;
-ChunkTaskQueueSet*   ParCompactionManager::_chunk_array = NULL;
+RegionTaskQueueSet*   ParCompactionManager::_region_array = NULL;
 
 ParCompactionManager::ParCompactionManager() :
     _action(CopyAndUpdate) {
@@ -46,13 +46,13 @@
 
   // We want the overflow stack to be permanent
   _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true);
-#ifdef USE_ChunkTaskQueueWithOverflow
-  chunk_stack()->initialize();
+#ifdef USE_RegionTaskQueueWithOverflow
+  region_stack()->initialize();
 #else
-  chunk_stack()->initialize();
+  region_stack()->initialize();
 
   // We want the overflow stack to be permanent
-  _chunk_overflow_stack =
+  _region_overflow_stack =
     new (ResourceObj::C_HEAP) GrowableArray<size_t>(10, true);
 #endif
 
@@ -86,18 +86,18 @@
 
   _stack_array = new OopTaskQueueSet(parallel_gc_threads);
   guarantee(_stack_array != NULL, "Count not initialize promotion manager");
-  _chunk_array = new ChunkTaskQueueSet(parallel_gc_threads);
-  guarantee(_chunk_array != NULL, "Count not initialize promotion manager");
+  _region_array = new RegionTaskQueueSet(parallel_gc_threads);
+  guarantee(_region_array != NULL, "Count not initialize promotion manager");
 
   // Create and register the ParCompactionManager(s) for the worker threads.
   for(uint i=0; i<parallel_gc_threads; i++) {
     _manager_array[i] = new ParCompactionManager();
     guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
     stack_array()->register_queue(i, _manager_array[i]->marking_stack());
-#ifdef USE_ChunkTaskQueueWithOverflow
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue());
+#ifdef USE_RegionTaskQueueWithOverflow
+    region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue());
 #else
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack());
+    region_array()->register_queue(i, _manager_array[i]->region_stack());
 #endif
   }
 
@@ -153,31 +153,31 @@
   return NULL;
 }
 
-// Save chunk on a stack
-void ParCompactionManager::save_for_processing(size_t chunk_index) {
+// Save region on a stack
+void ParCompactionManager::save_for_processing(size_t region_index) {
 #ifdef ASSERT
   const ParallelCompactData& sd = PSParallelCompact::summary_data();
-  ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index);
-  assert(chunk_ptr->claimed(), "must be claimed");
-  assert(chunk_ptr->_pushed++ == 0, "should only be pushed once");
+  ParallelCompactData::RegionData* const region_ptr = sd.region(region_index);
+  assert(region_ptr->claimed(), "must be claimed");
+  assert(region_ptr->_pushed++ == 0, "should only be pushed once");
 #endif
-  chunk_stack_push(chunk_index);
+  region_stack_push(region_index);
 }
 
-void ParCompactionManager::chunk_stack_push(size_t chunk_index) {
+void ParCompactionManager::region_stack_push(size_t region_index) {
 
-#ifdef USE_ChunkTaskQueueWithOverflow
-  chunk_stack()->save(chunk_index);
+#ifdef USE_RegionTaskQueueWithOverflow
+  region_stack()->save(region_index);
 #else
-  if(!chunk_stack()->push(chunk_index)) {
-    chunk_overflow_stack()->push(chunk_index);
+  if(!region_stack()->push(region_index)) {
+    region_overflow_stack()->push(region_index);
   }
 #endif
 }
 
-bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) {
-#ifdef USE_ChunkTaskQueueWithOverflow
-  return chunk_stack()->retrieve(chunk_index);
+bool ParCompactionManager::retrieve_for_processing(size_t& region_index) {
+#ifdef USE_RegionTaskQueueWithOverflow
+  return region_stack()->retrieve(region_index);
 #else
   // Should not be used in the parallel case
   ShouldNotReachHere();
@@ -230,14 +230,14 @@
   assert(overflow_stack()->length() == 0, "Sanity");
 }
 
-void ParCompactionManager::drain_chunk_overflow_stack() {
-  size_t chunk_index = (size_t) -1;
-  while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
-    PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+void ParCompactionManager::drain_region_overflow_stack() {
+  size_t region_index = (size_t) -1;
+  while(region_stack()->retrieve_from_overflow(region_index)) {
+    PSParallelCompact::fill_and_update_region(this, region_index);
   }
 }
 
-void ParCompactionManager::drain_chunk_stacks() {
+void ParCompactionManager::drain_region_stacks() {
 #ifdef ASSERT
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
@@ -249,42 +249,42 @@
 #if 1 // def DO_PARALLEL - the serial code hasn't been updated
   do {
 
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
     // Drain overflow stack first, so other threads can steal from
     // claimed stack while we work.
-    size_t chunk_index = (size_t) -1;
-    while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    size_t region_index = (size_t) -1;
+    while(region_stack()->retrieve_from_overflow(region_index)) {
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
 
-    while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    while (region_stack()->retrieve_from_stealable_queue(region_index)) {
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
-  } while (!chunk_stack()->is_empty());
+  } while (!region_stack()->is_empty());
 #else
     // Drain overflow stack first, so other threads can steal from
     // claimed stack while we work.
-    while(!chunk_overflow_stack()->is_empty()) {
-      size_t chunk_index = chunk_overflow_stack()->pop();
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    while(!region_overflow_stack()->is_empty()) {
+      size_t region_index = region_overflow_stack()->pop();
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
 
-    size_t chunk_index = -1;
+    size_t region_index = -1;
     // obj is a reference!!!
-    while (chunk_stack()->pop_local(chunk_index)) {
+    while (region_stack()->pop_local(region_index)) {
       // It would be nice to assert about the type of objects we might
       // pop, but they can come from anywhere, unfortunately.
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
-  } while((chunk_stack()->size() != 0) ||
-          (chunk_overflow_stack()->length() != 0));
+  } while((region_stack()->size() != 0) ||
+          (region_overflow_stack()->length() != 0));
 #endif
 
-#ifdef USE_ChunkTaskQueueWithOverflow
-  assert(chunk_stack()->is_empty(), "Sanity");
+#ifdef USE_RegionTaskQueueWithOverflow
+  assert(region_stack()->is_empty(), "Sanity");
 #else
-  assert(chunk_stack()->size() == 0, "Sanity");
-  assert(chunk_overflow_stack()->length() == 0, "Sanity");
+  assert(region_stack()->size() == 0, "Sanity");
+  assert(region_overflow_stack()->length() == 0, "Sanity");
 #endif
 #else
   oop obj;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -52,7 +52,7 @@
   friend class ParallelTaskTerminator;
   friend class ParMarkBitMap;
   friend class PSParallelCompact;
-  friend class StealChunkCompactionTask;
+  friend class StealRegionCompactionTask;
   friend class UpdateAndFillClosure;
   friend class RefProcTaskExecutor;
 
@@ -72,27 +72,27 @@
 // ------------------------  End don't putback if not needed
 
  private:
-  static ParCompactionManager**  _manager_array;
-  static OopTaskQueueSet*      _stack_array;
-  static ObjectStartArray*     _start_array;
-  static ChunkTaskQueueSet*    _chunk_array;
-  static PSOldGen*             _old_gen;
+  static ParCompactionManager** _manager_array;
+  static OopTaskQueueSet*       _stack_array;
+  static ObjectStartArray*      _start_array;
+  static RegionTaskQueueSet*    _region_array;
+  static PSOldGen*              _old_gen;
 
-  OopTaskQueue                 _marking_stack;
-  GrowableArray<oop>*          _overflow_stack;
+  OopTaskQueue                  _marking_stack;
+  GrowableArray<oop>*           _overflow_stack;
   // Is there a way to reuse the _marking_stack for the
-  // saving empty chunks?  For now just create a different
+  // saving empty regions?  For now just create a different
   // type of TaskQueue.
 
-#ifdef USE_ChunkTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow   _chunk_stack;
+#ifdef USE_RegionTaskQueueWithOverflow
+  RegionTaskQueueWithOverflow   _region_stack;
 #else
-  ChunkTaskQueue               _chunk_stack;
-  GrowableArray<size_t>*       _chunk_overflow_stack;
+  RegionTaskQueue               _region_stack;
+  GrowableArray<size_t>*        _region_overflow_stack;
 #endif
 
 #if 1  // does this happen enough to need a per thread stack?
-  GrowableArray<Klass*>*       _revisit_klass_stack;
+  GrowableArray<Klass*>*        _revisit_klass_stack;
 #endif
   static ParMarkBitMap* _mark_bitmap;
 
@@ -100,21 +100,22 @@
 
   static PSOldGen* old_gen()             { return _old_gen; }
   static ObjectStartArray* start_array() { return _start_array; }
-  static OopTaskQueueSet* stack_array()   { return _stack_array; }
+  static OopTaskQueueSet* stack_array()  { return _stack_array; }
 
   static void initialize(ParMarkBitMap* mbm);
 
  protected:
   // Array of tasks.  Needed by the ParallelTaskTerminator.
-  static ChunkTaskQueueSet* chunk_array()   { return _chunk_array; }
-
-  OopTaskQueue*  marking_stack()          { return &_marking_stack; }
-  GrowableArray<oop>* overflow_stack()    { return _overflow_stack; }
-#ifdef USE_ChunkTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; }
+  static RegionTaskQueueSet* region_array()      { return _region_array; }
+  OopTaskQueue*  marking_stack()                 { return &_marking_stack; }
+  GrowableArray<oop>* overflow_stack()           { return _overflow_stack; }
+#ifdef USE_RegionTaskQueueWithOverflow
+  RegionTaskQueueWithOverflow* region_stack()    { return &_region_stack; }
 #else
-  ChunkTaskQueue*  chunk_stack()          { return &_chunk_stack; }
-  GrowableArray<size_t>* chunk_overflow_stack() { return _chunk_overflow_stack; }
+  RegionTaskQueue*  region_stack()               { return &_region_stack; }
+  GrowableArray<size_t>* region_overflow_stack() {
+    return _region_overflow_stack;
+  }
 #endif
 
   // Pushes onto the marking stack.  If the marking stack is full,
@@ -123,9 +124,9 @@
   // Do not implement an equivalent stack_pop.  Deal with the
   // marking stack and overflow stack directly.
 
-  // Pushes onto the chunk stack.  If the chunk stack is full,
-  // pushes onto the chunk overflow stack.
-  void chunk_stack_push(size_t chunk_index);
+  // Pushes onto the region stack.  If the region stack is full,
+  // pushes onto the region overflow stack.
+  void region_stack_push(size_t region_index);
  public:
 
   Action action() { return _action; }
@@ -160,10 +161,10 @@
   // Get a oop for scanning.  If returns null, no oop were found.
   oop retrieve_for_scanning();
 
-  // Save chunk for later processing.  Must not fail.
-  void save_for_processing(size_t chunk_index);
-  // Get a chunk for processing.  If returns null, no chunk were found.
-  bool retrieve_for_processing(size_t& chunk_index);
+  // Save region for later processing.  Must not fail.
+  void save_for_processing(size_t region_index);
+  // Get a region for processing.  If returns null, no region were found.
+  bool retrieve_for_processing(size_t& region_index);
 
   // Access function for compaction managers
   static ParCompactionManager* gc_thread_compaction_manager(int index);
@@ -172,18 +173,18 @@
     return stack_array()->steal(queue_num, seed, t);
   }
 
-  static bool steal(int queue_num, int* seed, ChunkTask& t) {
-    return chunk_array()->steal(queue_num, seed, t);
+  static bool steal(int queue_num, int* seed, RegionTask& t) {
+    return region_array()->steal(queue_num, seed, t);
   }
 
   // Process tasks remaining on any stack
   void drain_marking_stacks(OopClosure *blk);
 
   // Process tasks remaining on any stack
-  void drain_chunk_stacks();
+  void drain_region_stacks();
 
   // Process tasks remaining on any stack
-  void drain_chunk_overflow_stack();
+  void drain_region_overflow_stack();
 
   // Debugging support
 #ifdef ASSERT
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -35,9 +35,7 @@
   _ref_processor = new ReferenceProcessor(mr,
                                           true,    // atomic_discovery
                                           false);  // mt_discovery
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-    _counters = new CollectorCounters("PSMarkSweep", 1);
-  }
+  _counters = new CollectorCounters("PSMarkSweep", 1);
 }
 
 // This method contains all heap specific policy for invoking mark sweep.
@@ -518,9 +516,6 @@
   follow_stack();
 
   // Process reference objects found during marking
-
-  // Skipping the reference processing for VerifyParallelOldWithMarkSweep
-  // affects the marking (makes it different).
   {
     ReferencePolicy *soft_ref_policy;
     if (clear_all_softrefs) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -152,20 +152,15 @@
         oop(q)->forward_to(oop(compact_top));
         assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
       } else {
-        // Don't clear the mark since it's confuses parallel old
-        // verification.
-        if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-          // if the object isn't moving we can just set the mark to the default
-          // mark and handle it specially later on.
-          oop(q)->init_mark();
-        }
+        // if the object isn't moving we can just set the mark to the default
+        // mark and handle it specially later on.
+        oop(q)->init_mark();
         assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
       }
 
       // Update object start array
-      if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-        if (start_array)
-          start_array->allocate_block(compact_top);
+      if (start_array) {
+        start_array->allocate_block(compact_top);
       }
 
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size));
@@ -219,19 +214,14 @@
             assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
           } else {
             // if the object isn't moving we can just set the mark to the default
-            // Don't clear the mark since it's confuses parallel old
-            // verification.
-            if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-              // mark and handle it specially later on.
-              oop(q)->init_mark();
-            }
+            // mark and handle it specially later on.
+            oop(q)->init_mark();
             assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
           }
 
-          if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-            // Update object start array
-            if (start_array)
-              start_array->allocate_block(compact_top);
+          // Update object start array
+          if (start_array) {
+            start_array->allocate_block(compact_top);
           }
 
           VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz));
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -152,9 +152,7 @@
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
 
   // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
   start_array()->reset();
-  debug_only(})
 
   object_mark_sweep()->precompact();
 
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -28,43 +28,31 @@
 #include <math.h>
 
 // All sizes are in HeapWords.
-const size_t ParallelCompactData::Log2ChunkSize  = 9; // 512 words
-const size_t ParallelCompactData::ChunkSize      = (size_t)1 << Log2ChunkSize;
-const size_t ParallelCompactData::ChunkSizeBytes = ChunkSize << LogHeapWordSize;
-const size_t ParallelCompactData::ChunkSizeOffsetMask = ChunkSize - 1;
-const size_t ParallelCompactData::ChunkAddrOffsetMask = ChunkSizeBytes - 1;
-const size_t ParallelCompactData::ChunkAddrMask  = ~ChunkAddrOffsetMask;
-
-// 32-bit:  128 words covers 4 bitmap words
-// 64-bit:  128 words covers 2 bitmap words
-const size_t ParallelCompactData::Log2BlockSize   = 7; // 128 words
-const size_t ParallelCompactData::BlockSize       = (size_t)1 << Log2BlockSize;
-const size_t ParallelCompactData::BlockOffsetMask = BlockSize - 1;
-const size_t ParallelCompactData::BlockMask       = ~BlockOffsetMask;
-
-const size_t ParallelCompactData::BlocksPerChunk = ChunkSize / BlockSize;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_shift = 27;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_mask = ~0U << dc_shift;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_one = 0x1U << dc_shift;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::los_mask = ~dc_mask;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_claimed = 0x8U << dc_shift;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_completed = 0xcU << dc_shift;
-
-#ifdef ASSERT
-short   ParallelCompactData::BlockData::_cur_phase = 0;
-#endif
+const size_t ParallelCompactData::Log2RegionSize  = 9; // 512 words
+const size_t ParallelCompactData::RegionSize      = (size_t)1 << Log2RegionSize;
+const size_t ParallelCompactData::RegionSizeBytes =
+  RegionSize << LogHeapWordSize;
+const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
+const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
+const size_t ParallelCompactData::RegionAddrMask  = ~RegionAddrOffsetMask;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_shift = 27;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_one = 0x1U << dc_shift;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::los_mask = ~dc_mask;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_claimed = 0x8U << dc_shift;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift;
 
 SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id];
 bool      PSParallelCompact::_print_phases = false;
@@ -100,99 +88,12 @@
 GrowableArray<size_t>   * PSParallelCompact::_last_gc_live_oops_size = NULL;
 #endif
 
-// XXX beg - verification code; only works while we also mark in object headers
-static void
-verify_mark_bitmap(ParMarkBitMap& _mark_bitmap)
-{
-  ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
-
-  PSPermGen* perm_gen = heap->perm_gen();
-  PSOldGen* old_gen = heap->old_gen();
-  PSYoungGen* young_gen = heap->young_gen();
-
-  MutableSpace* perm_space = perm_gen->object_space();
-  MutableSpace* old_space = old_gen->object_space();
-  MutableSpace* eden_space = young_gen->eden_space();
-  MutableSpace* from_space = young_gen->from_space();
-  MutableSpace* to_space = young_gen->to_space();
-
-  // 'from_space' here is the survivor space at the lower address.
-  if (to_space->bottom() < from_space->bottom()) {
-    from_space = to_space;
-    to_space = young_gen->from_space();
-  }
-
-  HeapWord* boundaries[12];
-  unsigned int bidx = 0;
-  const unsigned int bidx_max = sizeof(boundaries) / sizeof(boundaries[0]);
-
-  boundaries[0] = perm_space->bottom();
-  boundaries[1] = perm_space->top();
-  boundaries[2] = old_space->bottom();
-  boundaries[3] = old_space->top();
-  boundaries[4] = eden_space->bottom();
-  boundaries[5] = eden_space->top();
-  boundaries[6] = from_space->bottom();
-  boundaries[7] = from_space->top();
-  boundaries[8] = to_space->bottom();
-  boundaries[9] = to_space->top();
-  boundaries[10] = to_space->end();
-  boundaries[11] = to_space->end();
-
-  BitMap::idx_t beg_bit = 0;
-  BitMap::idx_t end_bit;
-  BitMap::idx_t tmp_bit;
-  const BitMap::idx_t last_bit = _mark_bitmap.size();
-  do {
-    HeapWord* addr = _mark_bitmap.bit_to_addr(beg_bit);
-    if (_mark_bitmap.is_marked(beg_bit)) {
-      oop obj = (oop)addr;
-      assert(obj->is_gc_marked(), "obj header is not marked");
-      end_bit = _mark_bitmap.find_obj_end(beg_bit, last_bit);
-      const size_t size = _mark_bitmap.obj_size(beg_bit, end_bit);
-      assert(size == (size_t)obj->size(), "end bit wrong?");
-      beg_bit = _mark_bitmap.find_obj_beg(beg_bit + 1, last_bit);
-      assert(beg_bit > end_bit, "bit set in middle of an obj");
-    } else {
-      if (addr >= boundaries[bidx] && addr < boundaries[bidx + 1]) {
-        // a dead object in the current space.
-        oop obj = (oop)addr;
-        end_bit = _mark_bitmap.addr_to_bit(addr + obj->size());
-        assert(!obj->is_gc_marked(), "obj marked in header, not in bitmap");
-        tmp_bit = beg_bit + 1;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "beg bit set in unmarked obj");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "end bit set in unmarked obj");
-      } else if (addr < boundaries[bidx + 2]) {
-        // addr is between top in the current space and bottom in the next.
-        end_bit = beg_bit + pointer_delta(boundaries[bidx + 2], addr);
-        tmp_bit = beg_bit;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "beg bit set above top");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "end bit set above top");
-        bidx += 2;
-      } else if (bidx < bidx_max - 2) {
-        bidx += 2; // ???
-      } else {
-        tmp_bit = beg_bit;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, last_bit);
-        assert(beg_bit == last_bit, "beg bit set outside heap");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, last_bit);
-        assert(beg_bit == last_bit, "end bit set outside heap");
-      }
-    }
-  } while (beg_bit < last_bit);
-}
-// XXX end - verification code; only works while we also mark in object headers
-
 #ifndef PRODUCT
 const char* PSParallelCompact::space_names[] = {
   "perm", "old ", "eden", "from", "to  "
 };
 
-void PSParallelCompact::print_chunk_ranges()
+void PSParallelCompact::print_region_ranges()
 {
   tty->print_cr("space  bottom     top        end        new_top");
   tty->print_cr("------ ---------- ---------- ---------- ----------");
@@ -203,31 +104,31 @@
                   SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " "
                   SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " ",
                   id, space_names[id],
-                  summary_data().addr_to_chunk_idx(space->bottom()),
-                  summary_data().addr_to_chunk_idx(space->top()),
-                  summary_data().addr_to_chunk_idx(space->end()),
-                  summary_data().addr_to_chunk_idx(_space_info[id].new_top()));
+                  summary_data().addr_to_region_idx(space->bottom()),
+                  summary_data().addr_to_region_idx(space->top()),
+                  summary_data().addr_to_region_idx(space->end()),
+                  summary_data().addr_to_region_idx(_space_info[id].new_top()));
   }
 }
 
 void
-print_generic_summary_chunk(size_t i, const ParallelCompactData::ChunkData* c)
+print_generic_summary_region(size_t i, const ParallelCompactData::RegionData* c)
 {
-#define CHUNK_IDX_FORMAT        SIZE_FORMAT_W(7)
-#define CHUNK_DATA_FORMAT       SIZE_FORMAT_W(5)
+#define REGION_IDX_FORMAT        SIZE_FORMAT_W(7)
+#define REGION_DATA_FORMAT       SIZE_FORMAT_W(5)
 
   ParallelCompactData& sd = PSParallelCompact::summary_data();
-  size_t dci = c->destination() ? sd.addr_to_chunk_idx(c->destination()) : 0;
-  tty->print_cr(CHUNK_IDX_FORMAT " " PTR_FORMAT " "
-                CHUNK_IDX_FORMAT " " PTR_FORMAT " "
-                CHUNK_DATA_FORMAT " " CHUNK_DATA_FORMAT " "
-                CHUNK_DATA_FORMAT " " CHUNK_IDX_FORMAT " %d",
+  size_t dci = c->destination() ? sd.addr_to_region_idx(c->destination()) : 0;
+  tty->print_cr(REGION_IDX_FORMAT " " PTR_FORMAT " "
+                REGION_IDX_FORMAT " " PTR_FORMAT " "
+                REGION_DATA_FORMAT " " REGION_DATA_FORMAT " "
+                REGION_DATA_FORMAT " " REGION_IDX_FORMAT " %d",
                 i, c->data_location(), dci, c->destination(),
                 c->partial_obj_size(), c->live_obj_size(),
-                c->data_size(), c->source_chunk(), c->destination_count());
-
-#undef  CHUNK_IDX_FORMAT
-#undef  CHUNK_DATA_FORMAT
+                c->data_size(), c->source_region(), c->destination_count());
+
+#undef  REGION_IDX_FORMAT
+#undef  REGION_DATA_FORMAT
 }
 
 void
@@ -236,14 +137,14 @@
                            HeapWord* const end_addr)
 {
   size_t total_words = 0;
-  size_t i = summary_data.addr_to_chunk_idx(beg_addr);
-  const size_t last = summary_data.addr_to_chunk_idx(end_addr);
+  size_t i = summary_data.addr_to_region_idx(beg_addr);
+  const size_t last = summary_data.addr_to_region_idx(end_addr);
   HeapWord* pdest = 0;
 
   while (i <= last) {
-    ParallelCompactData::ChunkData* c = summary_data.chunk(i);
+    ParallelCompactData::RegionData* c = summary_data.region(i);
     if (c->data_size() != 0 || c->destination() != pdest) {
-      print_generic_summary_chunk(i, c);
+      print_generic_summary_region(i, c);
       total_words += c->data_size();
       pdest = c->destination();
     }
@@ -265,16 +166,16 @@
 }
 
 void
-print_initial_summary_chunk(size_t i,
-                            const ParallelCompactData::ChunkData* c,
-                            bool newline = true)
+print_initial_summary_region(size_t i,
+                             const ParallelCompactData::RegionData* c,
+                             bool newline = true)
 {
   tty->print(SIZE_FORMAT_W(5) " " PTR_FORMAT " "
              SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " "
              SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " %d",
              i, c->destination(),
              c->partial_obj_size(), c->live_obj_size(),
-             c->data_size(), c->source_chunk(), c->destination_count());
+             c->data_size(), c->source_region(), c->destination_count());
   if (newline) tty->cr();
 }
 
@@ -285,47 +186,48 @@
     return;
   }
 
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
-  HeapWord* const top_aligned_up = summary_data.chunk_align_up(space->top());
-  const size_t end_chunk = summary_data.addr_to_chunk_idx(top_aligned_up);
-  const ParallelCompactData::ChunkData* c = summary_data.chunk(end_chunk - 1);
+  const size_t region_size = ParallelCompactData::RegionSize;
+  typedef ParallelCompactData::RegionData RegionData;
+  HeapWord* const top_aligned_up = summary_data.region_align_up(space->top());
+  const size_t end_region = summary_data.addr_to_region_idx(top_aligned_up);
+  const RegionData* c = summary_data.region(end_region - 1);
   HeapWord* end_addr = c->destination() + c->data_size();
   const size_t live_in_space = pointer_delta(end_addr, space->bottom());
 
-  // Print (and count) the full chunks at the beginning of the space.
-  size_t full_chunk_count = 0;
-  size_t i = summary_data.addr_to_chunk_idx(space->bottom());
-  while (i < end_chunk && summary_data.chunk(i)->data_size() == chunk_size) {
-    print_initial_summary_chunk(i, summary_data.chunk(i));
-    ++full_chunk_count;
+  // Print (and count) the full regions at the beginning of the space.
+  size_t full_region_count = 0;
+  size_t i = summary_data.addr_to_region_idx(space->bottom());
+  while (i < end_region && summary_data.region(i)->data_size() == region_size) {
+    print_initial_summary_region(i, summary_data.region(i));
+    ++full_region_count;
     ++i;
   }
 
-  size_t live_to_right = live_in_space - full_chunk_count * chunk_size;
+  size_t live_to_right = live_in_space - full_region_count * region_size;
 
   double max_reclaimed_ratio = 0.0;
-  size_t max_reclaimed_ratio_chunk = 0;
+  size_t max_reclaimed_ratio_region = 0;
   size_t max_dead_to_right = 0;
   size_t max_live_to_right = 0;
 
-  // Print the 'reclaimed ratio' for chunks while there is something live in the
-  // chunk or to the right of it.  The remaining chunks are empty (and
+  // Print the 'reclaimed ratio' for regions while there is something live in
+  // the region or to the right of it.  The remaining regions are empty (and
   // uninteresting), and computing the ratio will result in division by 0.
-  while (i < end_chunk && live_to_right > 0) {
-    c = summary_data.chunk(i);
-    HeapWord* const chunk_addr = summary_data.chunk_to_addr(i);
-    const size_t used_to_right = pointer_delta(space->top(), chunk_addr);
+  while (i < end_region && live_to_right > 0) {
+    c = summary_data.region(i);
+    HeapWord* const region_addr = summary_data.region_to_addr(i);
+    const size_t used_to_right = pointer_delta(space->top(), region_addr);
     const size_t dead_to_right = used_to_right - live_to_right;
     const double reclaimed_ratio = double(dead_to_right) / live_to_right;
 
     if (reclaimed_ratio > max_reclaimed_ratio) {
             max_reclaimed_ratio = reclaimed_ratio;
-            max_reclaimed_ratio_chunk = i;
+            max_reclaimed_ratio_region = i;
             max_dead_to_right = dead_to_right;
             max_live_to_right = live_to_right;
     }
 
-    print_initial_summary_chunk(i, c, false);
+    print_initial_summary_region(i, c, false);
     tty->print_cr(" %12.10f " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10),
                   reclaimed_ratio, dead_to_right, live_to_right);
 
@@ -333,14 +235,14 @@
     ++i;
   }
 
-  // Any remaining chunks are empty.  Print one more if there is one.
-  if (i < end_chunk) {
-    print_initial_summary_chunk(i, summary_data.chunk(i));
+  // Any remaining regions are empty.  Print one more if there is one.
+  if (i < end_region) {
+    print_initial_summary_region(i, summary_data.region(i));
   }
 
   tty->print_cr("max:  " SIZE_FORMAT_W(4) " d2r=" SIZE_FORMAT_W(10) " "
                 "l2r=" SIZE_FORMAT_W(10) " max_ratio=%14.12f",
-                max_reclaimed_ratio_chunk, max_dead_to_right,
+                max_reclaimed_ratio_region, max_dead_to_right,
                 max_live_to_right, max_reclaimed_ratio);
 }
 
@@ -372,13 +274,9 @@
 {
   _region_start = 0;
 
-  _chunk_vspace = 0;
-  _chunk_data = 0;
-  _chunk_count = 0;
-
-  _block_vspace = 0;
-  _block_data = 0;
-  _block_count = 0;
+  _region_vspace = 0;
+  _region_data = 0;
+  _region_count = 0;
 }
 
 bool ParallelCompactData::initialize(MemRegion covered_region)
@@ -387,18 +285,12 @@
   const size_t region_size = covered_region.word_size();
   DEBUG_ONLY(_region_end = _region_start + region_size;)
 
-  assert(chunk_align_down(_region_start) == _region_start,
+  assert(region_align_down(_region_start) == _region_start,
          "region start not aligned");
-  assert((region_size & ChunkSizeOffsetMask) == 0,
-         "region size not a multiple of ChunkSize");
-
-  bool result = initialize_chunk_data(region_size);
-
-  // Initialize the block data if it will be used for updating pointers, or if
-  // this is a debug build.
-  if (!UseParallelOldGCChunkPointerCalc || trueInDebug) {
-    result = result && initialize_block_data(region_size);
-  }
+  assert((region_size & RegionSizeOffsetMask) == 0,
+         "region size not a multiple of RegionSize");
+
+  bool result = initialize_region_data(region_size);
 
   return result;
 }
@@ -429,25 +321,13 @@
   return 0;
 }
 
-bool ParallelCompactData::initialize_chunk_data(size_t region_size)
+bool ParallelCompactData::initialize_region_data(size_t region_size)
 {
-  const size_t count = (region_size + ChunkSizeOffsetMask) >> Log2ChunkSize;
-  _chunk_vspace = create_vspace(count, sizeof(ChunkData));
-  if (_chunk_vspace != 0) {
-    _chunk_data = (ChunkData*)_chunk_vspace->reserved_low_addr();
-    _chunk_count = count;
-    return true;
-  }
-  return false;
-}
-
-bool ParallelCompactData::initialize_block_data(size_t region_size)
-{
-  const size_t count = (region_size + BlockOffsetMask) >> Log2BlockSize;
-  _block_vspace = create_vspace(count, sizeof(BlockData));
-  if (_block_vspace != 0) {
-    _block_data = (BlockData*)_block_vspace->reserved_low_addr();
-    _block_count = count;
+  const size_t count = (region_size + RegionSizeOffsetMask) >> Log2RegionSize;
+  _region_vspace = create_vspace(count, sizeof(RegionData));
+  if (_region_vspace != 0) {
+    _region_data = (RegionData*)_region_vspace->reserved_low_addr();
+    _region_count = count;
     return true;
   }
   return false;
@@ -455,38 +335,27 @@
 
 void ParallelCompactData::clear()
 {
-  if (_block_data) {
-    memset(_block_data, 0, _block_vspace->committed_size());
-  }
-  memset(_chunk_data, 0, _chunk_vspace->committed_size());
+  memset(_region_data, 0, _region_vspace->committed_size());
 }
 
-void ParallelCompactData::clear_range(size_t beg_chunk, size_t end_chunk) {
-  assert(beg_chunk <= _chunk_count, "beg_chunk out of range");
-  assert(end_chunk <= _chunk_count, "end_chunk out of range");
-  assert(ChunkSize % BlockSize == 0, "ChunkSize not a multiple of BlockSize");
-
-  const size_t chunk_cnt = end_chunk - beg_chunk;
-
-  if (_block_data) {
-    const size_t blocks_per_chunk = ChunkSize / BlockSize;
-    const size_t beg_block = beg_chunk * blocks_per_chunk;
-    const size_t block_cnt = chunk_cnt * blocks_per_chunk;
-    memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
-  }
-  memset(_chunk_data + beg_chunk, 0, chunk_cnt * sizeof(ChunkData));
+void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
+  assert(beg_region <= _region_count, "beg_region out of range");
+  assert(end_region <= _region_count, "end_region out of range");
+
+  const size_t region_cnt = end_region - beg_region;
+  memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
 }
 
-HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const
+HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
 {
-  const ChunkData* cur_cp = chunk(chunk_idx);
-  const ChunkData* const end_cp = chunk(chunk_count() - 1);
-
-  HeapWord* result = chunk_to_addr(chunk_idx);
+  const RegionData* cur_cp = region(region_idx);
+  const RegionData* const end_cp = region(region_count() - 1);
+
+  HeapWord* result = region_to_addr(region_idx);
   if (cur_cp < end_cp) {
     do {
       result += cur_cp->partial_obj_size();
-    } while (cur_cp->partial_obj_size() == ChunkSize && ++cur_cp < end_cp);
+    } while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp);
   }
   return result;
 }
@@ -494,56 +363,56 @@
 void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
 {
   const size_t obj_ofs = pointer_delta(addr, _region_start);
-  const size_t beg_chunk = obj_ofs >> Log2ChunkSize;
-  const size_t end_chunk = (obj_ofs + len - 1) >> Log2ChunkSize;
+  const size_t beg_region = obj_ofs >> Log2RegionSize;
+  const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize;
 
   DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);)
   DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);)
 
-  if (beg_chunk == end_chunk) {
-    // All in one chunk.
-    _chunk_data[beg_chunk].add_live_obj(len);
+  if (beg_region == end_region) {
+    // All in one region.
+    _region_data[beg_region].add_live_obj(len);
     return;
   }
 
-  // First chunk.
-  const size_t beg_ofs = chunk_offset(addr);
-  _chunk_data[beg_chunk].add_live_obj(ChunkSize - beg_ofs);
+  // First region.
+  const size_t beg_ofs = region_offset(addr);
+  _region_data[beg_region].add_live_obj(RegionSize - beg_ofs);
 
   klassOop klass = ((oop)addr)->klass();
-  // Middle chunks--completely spanned by this object.
-  for (size_t chunk = beg_chunk + 1; chunk < end_chunk; ++chunk) {
-    _chunk_data[chunk].set_partial_obj_size(ChunkSize);
-    _chunk_data[chunk].set_partial_obj_addr(addr);
+  // Middle regions--completely spanned by this object.
+  for (size_t region = beg_region + 1; region < end_region; ++region) {
+    _region_data[region].set_partial_obj_size(RegionSize);
+    _region_data[region].set_partial_obj_addr(addr);
   }
 
-  // Last chunk.
-  const size_t end_ofs = chunk_offset(addr + len - 1);
-  _chunk_data[end_chunk].set_partial_obj_size(end_ofs + 1);
-  _chunk_data[end_chunk].set_partial_obj_addr(addr);
+  // Last region.
+  const size_t end_ofs = region_offset(addr + len - 1);
+  _region_data[end_region].set_partial_obj_size(end_ofs + 1);
+  _region_data[end_region].set_partial_obj_addr(addr);
 }
 
 void
 ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end)
 {
-  assert(chunk_offset(beg) == 0, "not ChunkSize aligned");
-  assert(chunk_offset(end) == 0, "not ChunkSize aligned");
-
-  size_t cur_chunk = addr_to_chunk_idx(beg);
-  const size_t end_chunk = addr_to_chunk_idx(end);
+  assert(region_offset(beg) == 0, "not RegionSize aligned");
+  assert(region_offset(end) == 0, "not RegionSize aligned");
+
+  size_t cur_region = addr_to_region_idx(beg);
+  const size_t end_region = addr_to_region_idx(end);
   HeapWord* addr = beg;
-  while (cur_chunk < end_chunk) {
-    _chunk_data[cur_chunk].set_destination(addr);
-    _chunk_data[cur_chunk].set_destination_count(0);
-    _chunk_data[cur_chunk].set_source_chunk(cur_chunk);
-    _chunk_data[cur_chunk].set_data_location(addr);
-
-    // Update live_obj_size so the chunk appears completely full.
-    size_t live_size = ChunkSize - _chunk_data[cur_chunk].partial_obj_size();
-    _chunk_data[cur_chunk].set_live_obj_size(live_size);
-
-    ++cur_chunk;
-    addr += ChunkSize;
+  while (cur_region < end_region) {
+    _region_data[cur_region].set_destination(addr);
+    _region_data[cur_region].set_destination_count(0);
+    _region_data[cur_region].set_source_region(cur_region);
+    _region_data[cur_region].set_data_location(addr);
+
+    // Update live_obj_size so the region appears completely full.
+    size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size();
+    _region_data[cur_region].set_live_obj_size(live_size);
+
+    ++cur_region;
+    addr += RegionSize;
   }
 }
 
@@ -552,7 +421,7 @@
                                     HeapWord** target_next,
                                     HeapWord** source_next) {
   // This is too strict.
-  // assert(chunk_offset(source_beg) == 0, "not ChunkSize aligned");
+  // assert(region_offset(source_beg) == 0, "not RegionSize aligned");
 
   if (TraceParallelOldGCSummaryPhase) {
     tty->print_cr("tb=" PTR_FORMAT " te=" PTR_FORMAT " "
@@ -564,125 +433,93 @@
                   source_next != 0 ? *source_next : (HeapWord*) 0);
   }
 
-  size_t cur_chunk = addr_to_chunk_idx(source_beg);
-  const size_t end_chunk = addr_to_chunk_idx(chunk_align_up(source_end));
+  size_t cur_region = addr_to_region_idx(source_beg);
+  const size_t end_region = addr_to_region_idx(region_align_up(source_end));
 
   HeapWord *dest_addr = target_beg;
-  while (cur_chunk < end_chunk) {
-    size_t words = _chunk_data[cur_chunk].data_size();
+  while (cur_region < end_region) {
+    size_t words = _region_data[cur_region].data_size();
 
 #if     1
     assert(pointer_delta(target_end, dest_addr) >= words,
            "source region does not fit into target region");
 #else
-    // XXX - need some work on the corner cases here.  If the chunk does not
-    // fit, then must either make sure any partial_obj from the chunk fits, or
-    // 'undo' the initial part of the partial_obj that is in the previous chunk.
+    // XXX - need some work on the corner cases here.  If the region does not
+    // fit, then must either make sure any partial_obj from the region fits, or
+    // "undo" the initial part of the partial_obj that is in the previous
+    // region.
     if (dest_addr + words >= target_end) {
       // Let the caller know where to continue.
       *target_next = dest_addr;
-      *source_next = chunk_to_addr(cur_chunk);
+      *source_next = region_to_addr(cur_region);
       return false;
     }
 #endif  // #if 1
 
-    _chunk_data[cur_chunk].set_destination(dest_addr);
-
-    // Set the destination_count for cur_chunk, and if necessary, update
-    // source_chunk for a destination chunk.  The source_chunk field is updated
-    // if cur_chunk is the first (left-most) chunk to be copied to a destination
-    // chunk.
+    _region_data[cur_region].set_destination(dest_addr);
+
+    // Set the destination_count for cur_region, and if necessary, update
+    // source_region for a destination region.  The source_region field is
+    // updated if cur_region is the first (left-most) region to be copied to a
+    // destination region.
     //
-    // The destination_count calculation is a bit subtle.  A chunk that has data
-    // that compacts into itself does not count itself as a destination.  This
-    // maintains the invariant that a zero count means the chunk is available
-    // and can be claimed and then filled.
+    // The destination_count calculation is a bit subtle.  A region that has
+    // data that compacts into itself does not count itself as a destination.
+    // This maintains the invariant that a zero count means the region is
+    // available and can be claimed and then filled.
     if (words > 0) {
       HeapWord* const last_addr = dest_addr + words - 1;
-      const size_t dest_chunk_1 = addr_to_chunk_idx(dest_addr);
-      const size_t dest_chunk_2 = addr_to_chunk_idx(last_addr);
+      const size_t dest_region_1 = addr_to_region_idx(dest_addr);
+      const size_t dest_region_2 = addr_to_region_idx(last_addr);
 #if     0
-      // Initially assume that the destination chunks will be the same and
+      // Initially assume that the destination regions will be the same and
       // adjust the value below if necessary.  Under this assumption, if
-      // cur_chunk == dest_chunk_2, then cur_chunk will be compacted completely
-      // into itself.
-      uint destination_count = cur_chunk == dest_chunk_2 ? 0 : 1;
-      if (dest_chunk_1 != dest_chunk_2) {
-        // Destination chunks differ; adjust destination_count.
+      // cur_region == dest_region_2, then cur_region will be compacted
+      // completely into itself.
+      uint destination_count = cur_region == dest_region_2 ? 0 : 1;
+      if (dest_region_1 != dest_region_2) {
+        // Destination regions differ; adjust destination_count.
         destination_count += 1;
-        // Data from cur_chunk will be copied to the start of dest_chunk_2.
-        _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
-      } else if (chunk_offset(dest_addr) == 0) {
-        // Data from cur_chunk will be copied to the start of the destination
-        // chunk.
-        _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
+        // Data from cur_region will be copied to the start of dest_region_2.
+        _region_data[dest_region_2].set_source_region(cur_region);
+      } else if (region_offset(dest_addr) == 0) {
+        // Data from cur_region will be copied to the start of the destination
+        // region.
+        _region_data[dest_region_1].set_source_region(cur_region);
       }
 #else
-      // Initially assume that the destination chunks will be different and
+      // Initially assume that the destination regions will be different and
       // adjust the value below if necessary.  Under this assumption, if
-      // cur_chunk == dest_chunk2, then cur_chunk will be compacted partially
-      // into dest_chunk_1 and partially into itself.
-      uint destination_count = cur_chunk == dest_chunk_2 ? 1 : 2;
-      if (dest_chunk_1 != dest_chunk_2) {
-        // Data from cur_chunk will be copied to the start of dest_chunk_2.
-        _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
+      // cur_region == dest_region2, then cur_region will be compacted partially
+      // into dest_region_1 and partially into itself.
+      uint destination_count = cur_region == dest_region_2 ? 1 : 2;
+      if (dest_region_1 != dest_region_2) {
+        // Data from cur_region will be copied to the start of dest_region_2.
+        _region_data[dest_region_2].set_source_region(cur_region);
       } else {
-        // Destination chunks are the same; adjust destination_count.
+        // Destination regions are the same; adjust destination_count.
         destination_count -= 1;
-        if (chunk_offset(dest_addr) == 0) {
-          // Data from cur_chunk will be copied to the start of the destination
-          // chunk.
-          _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
+        if (region_offset(dest_addr) == 0) {
+          // Data from cur_region will be copied to the start of the destination
+          // region.
+          _region_data[dest_region_1].set_source_region(cur_region);
         }
       }
 #endif  // #if 0
 
-      _chunk_data[cur_chunk].set_destination_count(destination_count);
-      _chunk_data[cur_chunk].set_data_location(chunk_to_addr(cur_chunk));
+      _region_data[cur_region].set_destination_count(destination_count);
+      _region_data[cur_region].set_data_location(region_to_addr(cur_region));
       dest_addr += words;
     }
 
-    ++cur_chunk;
+    ++cur_region;
   }
 
   *target_next = dest_addr;
   return true;
 }
 
-bool ParallelCompactData::partial_obj_ends_in_block(size_t block_index) {
-  HeapWord* block_addr = block_to_addr(block_index);
-  HeapWord* block_end_addr = block_addr + BlockSize;
-  size_t chunk_index = addr_to_chunk_idx(block_addr);
-  HeapWord* partial_obj_end_addr = partial_obj_end(chunk_index);
-
-  // An object that ends at the end of the block, ends
-  // in the block (the last word of the object is to
-  // the left of the end).
-  if ((block_addr < partial_obj_end_addr) &&
-      (partial_obj_end_addr <= block_end_addr)) {
-    return true;
-  }
-
-  return false;
-}
-
 HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
-  HeapWord* result = NULL;
-  if (UseParallelOldGCChunkPointerCalc) {
-    result = chunk_calc_new_pointer(addr);
-  } else {
-    result = block_calc_new_pointer(addr);
-  }
-  return result;
-}
-
-// This method is overly complicated (expensive) to be called
-// for every reference.
-// Try to restructure this so that a NULL is returned if
-// the object is dead.  But don't wast the cycles to explicitly check
-// that it is dead since only live objects should be passed in.
-
-HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
   assert(addr != NULL, "Should detect NULL oop earlier");
   assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
 #ifdef ASSERT
@@ -692,30 +529,30 @@
 #endif
   assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
 
-  // Chunk covering the object.
-  size_t chunk_index = addr_to_chunk_idx(addr);
-  const ChunkData* const chunk_ptr = chunk(chunk_index);
-  HeapWord* const chunk_addr = chunk_align_down(addr);
-
-  assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
-  assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
-
-  HeapWord* result = chunk_ptr->destination();
-
-  // If all the data in the chunk is live, then the new location of the object
-  // can be calculated from the destination of the chunk plus the offset of the
-  // object in the chunk.
-  if (chunk_ptr->data_size() == ChunkSize) {
-    result += pointer_delta(addr, chunk_addr);
+  // Region covering the object.
+  size_t region_index = addr_to_region_idx(addr);
+  const RegionData* const region_ptr = region(region_index);
+  HeapWord* const region_addr = region_align_down(addr);
+
+  assert(addr < region_addr + RegionSize, "Region does not cover object");
+  assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
+
+  HeapWord* result = region_ptr->destination();
+
+  // If all the data in the region is live, then the new location of the object
+  // can be calculated from the destination of the region plus the offset of the
+  // object in the region.
+  if (region_ptr->data_size() == RegionSize) {
+    result += pointer_delta(addr, region_addr);
     return result;
   }
 
   // The new location of the object is
-  //    chunk destination +
-  //    size of the partial object extending onto the chunk +
-  //    sizes of the live objects in the Chunk that are to the left of addr
-  const size_t partial_obj_size = chunk_ptr->partial_obj_size();
-  HeapWord* const search_start = chunk_addr + partial_obj_size;
+  //    region destination +
+  //    size of the partial object extending onto the region +
+  //    sizes of the live objects in the Region that are to the left of addr
+  const size_t partial_obj_size = region_ptr->partial_obj_size();
+  HeapWord* const search_start = region_addr + partial_obj_size;
 
   const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
   size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
@@ -725,50 +562,6 @@
   return result;
 }
 
-HeapWord* ParallelCompactData::block_calc_new_pointer(HeapWord* addr) {
-  assert(addr != NULL, "Should detect NULL oop earlier");
-  assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
-#ifdef ASSERT
-  if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
-    gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
-  }
-#endif
-  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
-
-  // Chunk covering the object.
-  size_t chunk_index = addr_to_chunk_idx(addr);
-  const ChunkData* const chunk_ptr = chunk(chunk_index);
-  HeapWord* const chunk_addr = chunk_align_down(addr);
-
-  assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
-  assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
-
-  HeapWord* result = chunk_ptr->destination();
-
-  // If all the data in the chunk is live, then the new location of the object
-  // can be calculated from the destination of the chunk plus the offset of the
-  // object in the chunk.
-  if (chunk_ptr->data_size() == ChunkSize) {
-    result += pointer_delta(addr, chunk_addr);
-    return result;
-  }
-
-  // The new location of the object is
-  //    chunk destination +
-  //    block offset +
-  //    sizes of the live objects in the Block that are to the left of addr
-  const size_t block_offset = addr_to_block_ptr(addr)->offset();
-  HeapWord* const search_start = chunk_addr + block_offset;
-
-  const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
-  size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
-
-  result += block_offset + live_to_left;
-  assert(result <= addr, "object cannot move to the right");
-  assert(result == chunk_calc_new_pointer(addr), "Should match");
-  return result;
-}
-
 klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) {
   klassOop updated_klass;
   if (PSParallelCompact::should_update_klass(old_klass)) {
@@ -792,15 +585,14 @@
 
 void ParallelCompactData::verify_clear()
 {
-  verify_clear(_chunk_vspace);
-  verify_clear(_block_vspace);
+  verify_clear(_region_vspace);
 }
 #endif  // #ifdef ASSERT
 
 #ifdef NOT_PRODUCT
-ParallelCompactData::ChunkData* debug_chunk(size_t chunk_index) {
+ParallelCompactData::RegionData* debug_region(size_t region_index) {
   ParallelCompactData& sd = PSParallelCompact::summary_data();
-  return sd.chunk(chunk_index);
+  return sd.region(region_index);
 }
 #endif
 
@@ -953,10 +745,10 @@
   const idx_t end_bit = BitMap::word_align_up(_mark_bitmap.addr_to_bit(top));
   _mark_bitmap.clear_range(beg_bit, end_bit);
 
-  const size_t beg_chunk = _summary_data.addr_to_chunk_idx(bot);
-  const size_t end_chunk =
-    _summary_data.addr_to_chunk_idx(_summary_data.chunk_align_up(max_top));
-  _summary_data.clear_range(beg_chunk, end_chunk);
+  const size_t beg_region = _summary_data.addr_to_region_idx(bot);
+  const size_t end_region =
+    _summary_data.addr_to_region_idx(_summary_data.region_align_up(max_top));
+  _summary_data.clear_range(beg_region, end_region);
 }
 
 void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values)
@@ -1072,19 +864,19 @@
 PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
                                                     bool maximum_compaction)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
   const ParallelCompactData& sd = summary_data();
 
   const MutableSpace* const space = _space_info[id].space();
-  HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
-  const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(space->bottom());
-  const ChunkData* const end_cp = sd.addr_to_chunk_ptr(top_aligned_up);
-
-  // Skip full chunks at the beginning of the space--they are necessarily part
+  HeapWord* const top_aligned_up = sd.region_align_up(space->top());
+  const RegionData* const beg_cp = sd.addr_to_region_ptr(space->bottom());
+  const RegionData* const end_cp = sd.addr_to_region_ptr(top_aligned_up);
+
+  // Skip full regions at the beginning of the space--they are necessarily part
   // of the dense prefix.
   size_t full_count = 0;
-  const ChunkData* cp;
-  for (cp = beg_cp; cp < end_cp && cp->data_size() == chunk_size; ++cp) {
+  const RegionData* cp;
+  for (cp = beg_cp; cp < end_cp && cp->data_size() == region_size; ++cp) {
     ++full_count;
   }
 
@@ -1093,7 +885,7 @@
   const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval;
   if (maximum_compaction || cp == end_cp || interval_ended) {
     _maximum_compaction_gc_num = total_invocations();
-    return sd.chunk_to_addr(cp);
+    return sd.region_to_addr(cp);
   }
 
   HeapWord* const new_top = _space_info[id].new_top();
@@ -1116,52 +908,53 @@
   }
 
   // XXX - Use binary search?
-  HeapWord* dense_prefix = sd.chunk_to_addr(cp);
-  const ChunkData* full_cp = cp;
-  const ChunkData* const top_cp = sd.addr_to_chunk_ptr(space->top() - 1);
+  HeapWord* dense_prefix = sd.region_to_addr(cp);
+  const RegionData* full_cp = cp;
+  const RegionData* const top_cp = sd.addr_to_region_ptr(space->top() - 1);
   while (cp < end_cp) {
-    HeapWord* chunk_destination = cp->destination();
-    const size_t cur_deadwood = pointer_delta(dense_prefix, chunk_destination);
+    HeapWord* region_destination = cp->destination();
+    const size_t cur_deadwood = pointer_delta(dense_prefix, region_destination);
     if (TraceParallelOldGCDensePrefix && Verbose) {
       tty->print_cr("c#=" SIZE_FORMAT_W(4) " dst=" PTR_FORMAT " "
                     "dp=" SIZE_FORMAT_W(8) " " "cdw=" SIZE_FORMAT_W(8),
-                    sd.chunk(cp), chunk_destination,
+                    sd.region(cp), region_destination,
                     dense_prefix, cur_deadwood);
     }
 
     if (cur_deadwood >= deadwood_goal) {
-      // Found the chunk that has the correct amount of deadwood to the left.
-      // This typically occurs after crossing a fairly sparse set of chunks, so
-      // iterate backwards over those sparse chunks, looking for the chunk that
-      // has the lowest density of live objects 'to the right.'
-      size_t space_to_left = sd.chunk(cp) * chunk_size;
+      // Found the region that has the correct amount of deadwood to the left.
+      // This typically occurs after crossing a fairly sparse set of regions, so
+      // iterate backwards over those sparse regions, looking for the region
+      // that has the lowest density of live objects 'to the right.'
+      size_t space_to_left = sd.region(cp) * region_size;
       size_t live_to_left = space_to_left - cur_deadwood;
       size_t space_to_right = space_capacity - space_to_left;
       size_t live_to_right = space_live - live_to_left;
       double density_to_right = double(live_to_right) / space_to_right;
       while (cp > full_cp) {
         --cp;
-        const size_t prev_chunk_live_to_right = live_to_right - cp->data_size();
-        const size_t prev_chunk_space_to_right = space_to_right + chunk_size;
-        double prev_chunk_density_to_right =
-          double(prev_chunk_live_to_right) / prev_chunk_space_to_right;
-        if (density_to_right <= prev_chunk_density_to_right) {
+        const size_t prev_region_live_to_right = live_to_right -
+          cp->data_size();
+        const size_t prev_region_space_to_right = space_to_right + region_size;
+        double prev_region_density_to_right =
+          double(prev_region_live_to_right) / prev_region_space_to_right;
+        if (density_to_right <= prev_region_density_to_right) {
           return dense_prefix;
         }
         if (TraceParallelOldGCDensePrefix && Verbose) {
           tty->print_cr("backing up from c=" SIZE_FORMAT_W(4) " d2r=%10.8f "
-                        "pc_d2r=%10.8f", sd.chunk(cp), density_to_right,
-                        prev_chunk_density_to_right);
+                        "pc_d2r=%10.8f", sd.region(cp), density_to_right,
+                        prev_region_density_to_right);
         }
-        dense_prefix -= chunk_size;
-        live_to_right = prev_chunk_live_to_right;
-        space_to_right = prev_chunk_space_to_right;
-        density_to_right = prev_chunk_density_to_right;
+        dense_prefix -= region_size;
+        live_to_right = prev_region_live_to_right;
+        space_to_right = prev_region_space_to_right;
+        density_to_right = prev_region_density_to_right;
       }
       return dense_prefix;
     }
 
-    dense_prefix += chunk_size;
+    dense_prefix += region_size;
     ++cp;
   }
 
@@ -1174,8 +967,8 @@
                                                  const bool maximum_compaction,
                                                  HeapWord* const addr)
 {
-  const size_t chunk_idx = summary_data().addr_to_chunk_idx(addr);
-  ChunkData* const cp = summary_data().chunk(chunk_idx);
+  const size_t region_idx = summary_data().addr_to_region_idx(addr);
+  RegionData* const cp = summary_data().region(region_idx);
   const MutableSpace* const space = _space_info[id].space();
   HeapWord* const new_top = _space_info[id].new_top();
 
@@ -1191,7 +984,7 @@
                 "d2l=" SIZE_FORMAT " d2l%%=%6.4f "
                 "d2r=" SIZE_FORMAT " l2r=" SIZE_FORMAT
                 " ratio=%10.8f",
-                algorithm, addr, chunk_idx,
+                algorithm, addr, region_idx,
                 space_live,
                 dead_to_left, dead_to_left_pct,
                 dead_to_right, live_to_right,
@@ -1253,52 +1046,52 @@
   return MAX2(limit, 0.0);
 }
 
-ParallelCompactData::ChunkData*
-PSParallelCompact::first_dead_space_chunk(const ChunkData* beg,
-                                          const ChunkData* end)
+ParallelCompactData::RegionData*
+PSParallelCompact::first_dead_space_region(const RegionData* beg,
+                                           const RegionData* end)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
   ParallelCompactData& sd = summary_data();
-  size_t left = sd.chunk(beg);
-  size_t right = end > beg ? sd.chunk(end) - 1 : left;
+  size_t left = sd.region(beg);
+  size_t right = end > beg ? sd.region(end) - 1 : left;
 
   // Binary search.
   while (left < right) {
     // Equivalent to (left + right) / 2, but does not overflow.
     const size_t middle = left + (right - left) / 2;
-    ChunkData* const middle_ptr = sd.chunk(middle);
+    RegionData* const middle_ptr = sd.region(middle);
     HeapWord* const dest = middle_ptr->destination();
-    HeapWord* const addr = sd.chunk_to_addr(middle);
+    HeapWord* const addr = sd.region_to_addr(middle);
     assert(dest != NULL, "sanity");
     assert(dest <= addr, "must move left");
 
     if (middle > left && dest < addr) {
       right = middle - 1;
-    } else if (middle < right && middle_ptr->data_size() == chunk_size) {
+    } else if (middle < right && middle_ptr->data_size() == region_size) {
       left = middle + 1;
     } else {
       return middle_ptr;
     }
   }
-  return sd.chunk(left);
+  return sd.region(left);
 }
 
-ParallelCompactData::ChunkData*
-PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg,
-                                         const ChunkData* end,
-                                         size_t dead_words)
+ParallelCompactData::RegionData*
+PSParallelCompact::dead_wood_limit_region(const RegionData* beg,
+                                          const RegionData* end,
+                                          size_t dead_words)
 {
   ParallelCompactData& sd = summary_data();
-  size_t left = sd.chunk(beg);
-  size_t right = end > beg ? sd.chunk(end) - 1 : left;
+  size_t left = sd.region(beg);
+  size_t right = end > beg ? sd.region(end) - 1 : left;
 
   // Binary search.
   while (left < right) {
     // Equivalent to (left + right) / 2, but does not overflow.
     const size_t middle = left + (right - left) / 2;
-    ChunkData* const middle_ptr = sd.chunk(middle);
+    RegionData* const middle_ptr = sd.region(middle);
     HeapWord* const dest = middle_ptr->destination();
-    HeapWord* const addr = sd.chunk_to_addr(middle);
+    HeapWord* const addr = sd.region_to_addr(middle);
     assert(dest != NULL, "sanity");
     assert(dest <= addr, "must move left");
 
@@ -1311,13 +1104,13 @@
       return middle_ptr;
     }
   }
-  return sd.chunk(left);
+  return sd.region(left);
 }
 
 // The result is valid during the summary phase, after the initial summarization
 // of each space into itself, and before final summarization.
 inline double
-PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
+PSParallelCompact::reclaimed_ratio(const RegionData* const cp,
                                    HeapWord* const bottom,
                                    HeapWord* const top,
                                    HeapWord* const new_top)
@@ -1331,12 +1124,13 @@
   assert(top >= new_top, "summary data problem?");
   assert(new_top > bottom, "space is empty; should not be here");
   assert(new_top >= cp->destination(), "sanity");
-  assert(top >= sd.chunk_to_addr(cp), "sanity");
+  assert(top >= sd.region_to_addr(cp), "sanity");
 
   HeapWord* const destination = cp->destination();
   const size_t dense_prefix_live  = pointer_delta(destination, bottom);
   const size_t compacted_region_live = pointer_delta(new_top, destination);
-  const size_t compacted_region_used = pointer_delta(top, sd.chunk_to_addr(cp));
+  const size_t compacted_region_used = pointer_delta(top,
+                                                     sd.region_to_addr(cp));
   const size_t reclaimable = compacted_region_used - compacted_region_live;
 
   const double divisor = dense_prefix_live + 1.25 * compacted_region_live;
@@ -1344,39 +1138,40 @@
 }
 
 // Return the address of the end of the dense prefix, a.k.a. the start of the
-// compacted region.  The address is always on a chunk boundary.
+// compacted region.  The address is always on a region boundary.
 //
-// Completely full chunks at the left are skipped, since no compaction can occur
-// in those chunks.  Then the maximum amount of dead wood to allow is computed,
-// based on the density (amount live / capacity) of the generation; the chunk
-// with approximately that amount of dead space to the left is identified as the
-// limit chunk.  Chunks between the last completely full chunk and the limit
-// chunk are scanned and the one that has the best (maximum) reclaimed_ratio()
-// is selected.
+// Completely full regions at the left are skipped, since no compaction can
+// occur in those regions.  Then the maximum amount of dead wood to allow is
+// computed, based on the density (amount live / capacity) of the generation;
+// the region with approximately that amount of dead space to the left is
+// identified as the limit region.  Regions between the last completely full
+// region and the limit region are scanned and the one that has the best
+// (maximum) reclaimed_ratio() is selected.
 HeapWord*
 PSParallelCompact::compute_dense_prefix(const SpaceId id,
                                         bool maximum_compaction)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
   const ParallelCompactData& sd = summary_data();
 
   const MutableSpace* const space = _space_info[id].space();
   HeapWord* const top = space->top();
-  HeapWord* const top_aligned_up = sd.chunk_align_up(top);
+  HeapWord* const top_aligned_up = sd.region_align_up(top);
   HeapWord* const new_top = _space_info[id].new_top();
-  HeapWord* const new_top_aligned_up = sd.chunk_align_up(new_top);
+  HeapWord* const new_top_aligned_up = sd.region_align_up(new_top);
   HeapWord* const bottom = space->bottom();
-  const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(bottom);
-  const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
-  const ChunkData* const new_top_cp = sd.addr_to_chunk_ptr(new_top_aligned_up);
-
-  // Skip full chunks at the beginning of the space--they are necessarily part
+  const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom);
+  const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
+  const RegionData* const new_top_cp =
+    sd.addr_to_region_ptr(new_top_aligned_up);
+
+  // Skip full regions at the beginning of the space--they are necessarily part
   // of the dense prefix.
-  const ChunkData* const full_cp = first_dead_space_chunk(beg_cp, new_top_cp);
-  assert(full_cp->destination() == sd.chunk_to_addr(full_cp) ||
+  const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp);
+  assert(full_cp->destination() == sd.region_to_addr(full_cp) ||
          space->is_empty(), "no dead space allowed to the left");
-  assert(full_cp->data_size() < chunk_size || full_cp == new_top_cp - 1,
-         "chunk must have dead space");
+  assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1,
+         "region must have dead space");
 
   // The gc number is saved whenever a maximum compaction is done, and used to
   // determine when the maximum compaction interval has expired.  This avoids
@@ -1387,7 +1182,7 @@
     total_invocations() == HeapFirstMaximumCompactionCount;
   if (maximum_compaction || full_cp == top_cp || interval_ended) {
     _maximum_compaction_gc_num = total_invocations();
-    return sd.chunk_to_addr(full_cp);
+    return sd.region_to_addr(full_cp);
   }
 
   const size_t space_live = pointer_delta(new_top, bottom);
@@ -1413,15 +1208,15 @@
                   dead_wood_max, dead_wood_limit);
   }
 
-  // Locate the chunk with the desired amount of dead space to the left.
-  const ChunkData* const limit_cp =
-    dead_wood_limit_chunk(full_cp, top_cp, dead_wood_limit);
-
-  // Scan from the first chunk with dead space to the limit chunk and find the
+  // Locate the region with the desired amount of dead space to the left.
+  const RegionData* const limit_cp =
+    dead_wood_limit_region(full_cp, top_cp, dead_wood_limit);
+
+  // Scan from the first region with dead space to the limit region and find the
   // one with the best (largest) reclaimed ratio.
   double best_ratio = 0.0;
-  const ChunkData* best_cp = full_cp;
-  for (const ChunkData* cp = full_cp; cp < limit_cp; ++cp) {
+  const RegionData* best_cp = full_cp;
+  for (const RegionData* cp = full_cp; cp < limit_cp; ++cp) {
     double tmp_ratio = reclaimed_ratio(cp, bottom, top, new_top);
     if (tmp_ratio > best_ratio) {
       best_cp = cp;
@@ -1430,18 +1225,18 @@
   }
 
 #if     0
-  // Something to consider:  if the chunk with the best ratio is 'close to' the
-  // first chunk w/free space, choose the first chunk with free space
-  // ("first-free").  The first-free chunk is usually near the start of the
+  // Something to consider:  if the region with the best ratio is 'close to' the
+  // first region w/free space, choose the first region with free space
+  // ("first-free").  The first-free region is usually near the start of the
   // heap, which means we are copying most of the heap already, so copy a bit
   // more to get complete compaction.
-  if (pointer_delta(best_cp, full_cp, sizeof(ChunkData)) < 4) {
+  if (pointer_delta(best_cp, full_cp, sizeof(RegionData)) < 4) {
     _maximum_compaction_gc_num = total_invocations();
     best_cp = full_cp;
   }
 #endif  // #if 0
 
-  return sd.chunk_to_addr(best_cp);
+  return sd.region_to_addr(best_cp);
 }
 
 void PSParallelCompact::summarize_spaces_quick()
@@ -1459,9 +1254,9 @@
 void PSParallelCompact::fill_dense_prefix_end(SpaceId id)
 {
   HeapWord* const dense_prefix_end = dense_prefix(id);
-  const ChunkData* chunk = _summary_data.addr_to_chunk_ptr(dense_prefix_end);
+  const RegionData* region = _summary_data.addr_to_region_ptr(dense_prefix_end);
   const idx_t dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end);
-  if (dead_space_crosses_boundary(chunk, dense_prefix_bit)) {
+  if (dead_space_crosses_boundary(region, dense_prefix_bit)) {
     // Only enough dead space is filled so that any remaining dead space to the
     // left is larger than the minimum filler object.  (The remainder is filled
     // during the copy/update phase.)
@@ -1552,7 +1347,7 @@
       fill_dense_prefix_end(id);
     }
 
-    // Compute the destination of each Chunk, and thus each object.
+    // Compute the destination of each Region, and thus each object.
     _summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end);
     _summary_data.summarize(dense_prefix_end, space->end(),
                             dense_prefix_end, space->top(),
@@ -1560,19 +1355,19 @@
   }
 
   if (TraceParallelOldGCSummaryPhase) {
-    const size_t chunk_size = ParallelCompactData::ChunkSize;
+    const size_t region_size = ParallelCompactData::RegionSize;
     HeapWord* const dense_prefix_end = _space_info[id].dense_prefix();
-    const size_t dp_chunk = _summary_data.addr_to_chunk_idx(dense_prefix_end);
+    const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end);
     const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom());
     HeapWord* const new_top = _space_info[id].new_top();
-    const HeapWord* nt_aligned_up = _summary_data.chunk_align_up(new_top);
+    const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top);
     const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end);
     tty->print_cr("id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " "
-                  "dp_chunk=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
+                  "dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
                   "cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT,
                   id, space->capacity_in_words(), dense_prefix_end,
-                  dp_chunk, dp_words / chunk_size,
-                  cr_words / chunk_size, new_top);
+                  dp_region, dp_words / region_size,
+                  cr_words / region_size, new_top);
   }
 }
 
@@ -1584,11 +1379,6 @@
   // trace("2");
 
 #ifdef  ASSERT
-  if (VerifyParallelOldWithMarkSweep  &&
-      (PSParallelCompact::total_invocations() %
-         VerifyParallelOldWithMarkSweepInterval) == 0) {
-    verify_mark_bitmap(_mark_bitmap);
-  }
   if (TraceParallelOldGCMarkingPhase) {
     tty->print_cr("add_obj_count=" SIZE_FORMAT " "
                   "add_obj_bytes=" SIZE_FORMAT,
@@ -1605,7 +1395,7 @@
   if (TraceParallelOldGCSummaryPhase) {
     tty->print_cr("summary_phase:  after summarizing each space to self");
     Universe::print();
-    NOT_PRODUCT(print_chunk_ranges());
+    NOT_PRODUCT(print_region_ranges());
     if (Verbose) {
       NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info));
     }
@@ -1651,14 +1441,15 @@
                               space->bottom(), space->top(),
                               new_top_addr);
 
-      // Clear the source_chunk field for each chunk in the space.
+      // Clear the source_region field for each region in the space.
       HeapWord* const new_top = _space_info[id].new_top();
-      HeapWord* const clear_end = _summary_data.chunk_align_up(new_top);
-      ChunkData* beg_chunk = _summary_data.addr_to_chunk_ptr(space->bottom());
-      ChunkData* end_chunk = _summary_data.addr_to_chunk_ptr(clear_end);
-      while (beg_chunk < end_chunk) {
-        beg_chunk->set_source_chunk(0);
-        ++beg_chunk;
+      HeapWord* const clear_end = _summary_data.region_align_up(new_top);
+      RegionData* beg_region =
+        _summary_data.addr_to_region_ptr(space->bottom());
+      RegionData* end_region = _summary_data.addr_to_region_ptr(clear_end);
+      while (beg_region < end_region) {
+        beg_region->set_source_region(0);
+        ++beg_region;
       }
 
       // Reset the new_top value for the space.
@@ -1666,243 +1457,16 @@
     }
   }
 
-  // Fill in the block data after any changes to the chunks have
-  // been made.
-#ifdef  ASSERT
-  summarize_blocks(cm, perm_space_id);
-  summarize_blocks(cm, old_space_id);
-#else
-  if (!UseParallelOldGCChunkPointerCalc) {
-    summarize_blocks(cm, perm_space_id);
-    summarize_blocks(cm, old_space_id);
-  }
-#endif
-
   if (TraceParallelOldGCSummaryPhase) {
     tty->print_cr("summary_phase:  after final summarization");
     Universe::print();
-    NOT_PRODUCT(print_chunk_ranges());
+    NOT_PRODUCT(print_region_ranges());
     if (Verbose) {
       NOT_PRODUCT(print_generic_summary_data(_summary_data, _space_info));
     }
   }
 }
 
-// Fill in the BlockData.
-// Iterate over the spaces and within each space iterate over
-// the chunks and fill in the BlockData for each chunk.
-
-void PSParallelCompact::summarize_blocks(ParCompactionManager* cm,
-                                         SpaceId first_compaction_space_id) {
-#if     0
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(1);)
-  for (SpaceId cur_space_id = first_compaction_space_id;
-       cur_space_id != last_space_id;
-       cur_space_id = next_compaction_space_id(cur_space_id)) {
-    // Iterate over the chunks in the space
-    size_t start_chunk_index =
-      _summary_data.addr_to_chunk_idx(space(cur_space_id)->bottom());
-    BitBlockUpdateClosure bbu(mark_bitmap(),
-                              cm,
-                              start_chunk_index);
-    // Iterate over blocks.
-    for (size_t chunk_index =  start_chunk_index;
-         chunk_index < _summary_data.chunk_count() &&
-         _summary_data.chunk_to_addr(chunk_index) < space(cur_space_id)->top();
-         chunk_index++) {
-
-      // Reset the closure for the new chunk.  Note that the closure
-      // maintains some data that does not get reset for each chunk
-      // so a new instance of the closure is no appropriate.
-      bbu.reset_chunk(chunk_index);
-
-      // Start the iteration with the first live object.  This
-      // may return the end of the chunk.  That is acceptable since
-      // it will properly limit the iterations.
-      ParMarkBitMap::idx_t left_offset = mark_bitmap()->addr_to_bit(
-        _summary_data.first_live_or_end_in_chunk(chunk_index));
-
-      // End the iteration at the end of the chunk.
-      HeapWord* chunk_addr = _summary_data.chunk_to_addr(chunk_index);
-      HeapWord* chunk_end = chunk_addr + ParallelCompactData::ChunkSize;
-      ParMarkBitMap::idx_t right_offset =
-        mark_bitmap()->addr_to_bit(chunk_end);
-
-      // Blocks that have not objects starting in them can be
-      // skipped because their data will never be used.
-      if (left_offset < right_offset) {
-
-        // Iterate through the objects in the chunk.
-        ParMarkBitMap::idx_t last_offset =
-          mark_bitmap()->pair_iterate(&bbu, left_offset, right_offset);
-
-        // If last_offset is less than right_offset, then the iterations
-        // terminated while it was looking for an end bit.  "last_offset"
-        // is then the offset for the last start bit.  In this situation
-        // the "offset" field for the next block to the right (_cur_block + 1)
-        // will not have been update although there may be live data
-        // to the left of the chunk.
-
-        size_t cur_block_plus_1 = bbu.cur_block() + 1;
-        HeapWord* cur_block_plus_1_addr =
-        _summary_data.block_to_addr(bbu.cur_block()) +
-        ParallelCompactData::BlockSize;
-        HeapWord* last_offset_addr = mark_bitmap()->bit_to_addr(last_offset);
- #if 1  // This code works.  The else doesn't but should.  Why does it?
-        // The current block (cur_block()) has already been updated.
-        // The last block that may need to be updated is either the
-        // next block (current block + 1) or the block where the
-        // last object starts (which can be greater than the
-        // next block if there were no objects found in intervening
-        // blocks).
-        size_t last_block =
-          MAX2(bbu.cur_block() + 1,
-               _summary_data.addr_to_block_idx(last_offset_addr));
- #else
-        // The current block has already been updated.  The only block
-        // that remains to be updated is the block where the last
-        // object in the chunk starts.
-        size_t last_block = _summary_data.addr_to_block_idx(last_offset_addr);
- #endif
-        assert_bit_is_start(last_offset);
-        assert((last_block == _summary_data.block_count()) ||
-             (_summary_data.block(last_block)->raw_offset() == 0),
-          "Should not have been set");
-        // Is the last block still in the current chunk?  If still
-        // in this chunk, update the last block (the counting that
-        // included the current block is meant for the offset of the last
-        // block).  If not in this chunk, do nothing.  Should not
-        // update a block in the next chunk.
-        if (ParallelCompactData::chunk_contains_block(bbu.chunk_index(),
-                                                      last_block)) {
-          if (last_offset < right_offset) {
-            // The last object started in this chunk but ends beyond
-            // this chunk.  Update the block for this last object.
-            assert(mark_bitmap()->is_marked(last_offset), "Should be marked");
-            // No end bit was found.  The closure takes care of
-            // the cases where
-            //   an objects crosses over into the next block
-            //   an objects starts and ends in the next block
-            // It does not handle the case where an object is
-            // the first object in a later block and extends
-            // past the end of the chunk (i.e., the closure
-            // only handles complete objects that are in the range
-            // it is given).  That object is handed back here
-            // for any special consideration necessary.
-            //
-            // Is the first bit in the last block a start or end bit?
-            //
-            // If the partial object ends in the last block L,
-            // then the 1st bit in L may be an end bit.
-            //
-            // Else does the last object start in a block after the current
-            // block? A block AA will already have been updated if an
-            // object ends in the next block AA+1.  An object found to end in
-            // the AA+1 is the trigger that updates AA.  Objects are being
-            // counted in the current block for updaing a following
-            // block.  An object may start in later block
-            // block but may extend beyond the last block in the chunk.
-            // Updates are only done when the end of an object has been
-            // found. If the last object (covered by block L) starts
-            // beyond the current block, then no object ends in L (otherwise
-            // L would be the current block).  So the first bit in L is
-            // a start bit.
-            //
-            // Else the last objects start in the current block and ends
-            // beyond the chunk.  The current block has already been
-            // updated and there is no later block (with an object
-            // starting in it) that needs to be updated.
-            //
-            if (_summary_data.partial_obj_ends_in_block(last_block)) {
-              _summary_data.block(last_block)->set_end_bit_offset(
-                bbu.live_data_left());
-            } else if (last_offset_addr >= cur_block_plus_1_addr) {
-              //   The start of the object is on a later block
-              // (to the right of the current block and there are no
-              // complete live objects to the left of this last object
-              // within the chunk.
-              //   The first bit in the block is for the start of the
-              // last object.
-              _summary_data.block(last_block)->set_start_bit_offset(
-                bbu.live_data_left());
-            } else {
-              //   The start of the last object was found in
-              // the current chunk (which has already
-              // been updated).
-              assert(bbu.cur_block() ==
-                      _summary_data.addr_to_block_idx(last_offset_addr),
-                "Should be a block already processed");
-            }
-#ifdef ASSERT
-            // Is there enough block information to find this object?
-            // The destination of the chunk has not been set so the
-            // values returned by calc_new_pointer() and
-            // block_calc_new_pointer() will only be
-            // offsets.  But they should agree.
-            HeapWord* moved_obj_with_chunks =
-              _summary_data.chunk_calc_new_pointer(last_offset_addr);
-            HeapWord* moved_obj_with_blocks =
-              _summary_data.calc_new_pointer(last_offset_addr);
-            assert(moved_obj_with_chunks == moved_obj_with_blocks,
-              "Block calculation is wrong");
-#endif
-          } else if (last_block < _summary_data.block_count()) {
-            // Iterations ended looking for a start bit (but
-            // did not run off the end of the block table).
-            _summary_data.block(last_block)->set_start_bit_offset(
-              bbu.live_data_left());
-          }
-        }
-#ifdef ASSERT
-        // Is there enough block information to find this object?
-          HeapWord* left_offset_addr = mark_bitmap()->bit_to_addr(left_offset);
-        HeapWord* moved_obj_with_chunks =
-          _summary_data.calc_new_pointer(left_offset_addr);
-        HeapWord* moved_obj_with_blocks =
-          _summary_data.calc_new_pointer(left_offset_addr);
-          assert(moved_obj_with_chunks == moved_obj_with_blocks,
-          "Block calculation is wrong");
-#endif
-
-        // Is there another block after the end of this chunk?
-#ifdef ASSERT
-        if (last_block < _summary_data.block_count()) {
-        // No object may have been found in a block.  If that
-        // block is at the end of the chunk, the iteration will
-        // terminate without incrementing the current block so
-        // that the current block is not the last block in the
-        // chunk.  That situation precludes asserting that the
-        // current block is the last block in the chunk.  Assert
-        // the lesser condition that the current block does not
-        // exceed the chunk.
-          assert(_summary_data.block_to_addr(last_block) <=
-               (_summary_data.chunk_to_addr(chunk_index) +
-                 ParallelCompactData::ChunkSize),
-              "Chunk and block inconsistency");
-          assert(last_offset <= right_offset, "Iteration over ran end");
-        }
-#endif
-      }
-#ifdef ASSERT
-      if (PrintGCDetails && Verbose) {
-        if (_summary_data.chunk(chunk_index)->partial_obj_size() == 1) {
-          size_t first_block =
-            chunk_index / ParallelCompactData::BlocksPerChunk;
-          gclog_or_tty->print_cr("first_block " PTR_FORMAT
-            " _offset " PTR_FORMAT
-            "_first_is_start_bit %d",
-            first_block,
-            _summary_data.block(first_block)->raw_offset(),
-            _summary_data.block(first_block)->first_is_start_bit());
-        }
-      }
-#endif
-    }
-  }
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(16);)
-#endif  // #if 0
-}
-
 // This method should contain all heap-specific policy for invoking a full
 // collection.  invoke_no_policy() will only attempt to compact the heap; it
 // will do nothing further.  If we need to bail out for policy reasons, scavenge
@@ -1937,18 +1501,9 @@
   }
 }
 
-bool ParallelCompactData::chunk_contains(size_t chunk_index, HeapWord* addr) {
-  size_t addr_chunk_index = addr_to_chunk_idx(addr);
-  return chunk_index == addr_chunk_index;
-}
-
-bool ParallelCompactData::chunk_contains_block(size_t chunk_index,
-                                               size_t block_index) {
-  size_t first_block_in_chunk = chunk_index * BlocksPerChunk;
-  size_t last_block_in_chunk = (chunk_index + 1) * BlocksPerChunk - 1;
-
-  return (first_block_in_chunk <= block_index) &&
-         (block_index <= last_block_in_chunk);
+bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
+  size_t addr_region_index = addr_to_region_idx(addr);
+  return region_index == addr_region_index;
 }
 
 // This method contains no policy. You should probably
@@ -2038,39 +1593,9 @@
     }
 #endif  // #ifndef PRODUCT
 
-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      gclog_or_tty->print_cr("Verify marking with mark_sweep_phase1()");
-      if (PrintGCDetails && Verbose) {
-        gclog_or_tty->print_cr("mark_sweep_phase1:");
-      }
-      // Clear the discovered lists so that discovered objects
-      // don't look like they have been discovered twice.
-      ref_processor()->clear_discovered_references();
-
-      PSMarkSweep::allocate_stacks();
-      MemRegion mr = Universe::heap()->reserved_region();
-      PSMarkSweep::ref_processor()->enable_discovery();
-      PSMarkSweep::mark_sweep_phase1(maximum_heap_compaction);
-    }
-#endif
-
     bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc;
     summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc);
 
-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      if (PrintGCDetails && Verbose) {
-        gclog_or_tty->print_cr("mark_sweep_phase2:");
-      }
-      PSMarkSweep::mark_sweep_phase2();
-    }
-#endif
-
     COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity"));
     COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
 
@@ -2078,28 +1603,6 @@
     // needed by the compaction for filling holes in the dense prefix.
     adjust_roots();
 
-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      // Do a separate verify phase so that the verify
-      // code can use the the forwarding pointers to
-      // check the new pointer calculation.  The restore_marks()
-      // has to be done before the real compact.
-      vmthread_cm->set_action(ParCompactionManager::VerifyUpdate);
-      compact_perm(vmthread_cm);
-      compact_serial(vmthread_cm);
-      vmthread_cm->set_action(ParCompactionManager::ResetObjects);
-      compact_perm(vmthread_cm);
-      compact_serial(vmthread_cm);
-      vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy);
-
-      // For debugging only
-      PSMarkSweep::restore_marks();
-      PSMarkSweep::deallocate_stacks();
-    }
-#endif
-
     compaction_start.update();
     // Does the perm gen always have to be done serially because
     // klasses are used in the update of an object?
@@ -2349,7 +1852,7 @@
 
   ParallelScavengeHeap* heap = gc_heap();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
+  TaskQueueSetSuper* qset = ParCompactionManager::region_array();
   ParallelTaskTerminator terminator(parallel_gc_threads, qset);
 
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
@@ -2487,8 +1990,9 @@
   move_and_update(cm, perm_space_id);
 }
 
-void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
-                                                     uint parallel_gc_threads) {
+void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q,
+                                                      uint parallel_gc_threads)
+{
   TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty);
 
   const unsigned int task_count = MAX2(parallel_gc_threads, 1U);
@@ -2496,13 +2000,13 @@
     q->enqueue(new DrainStacksCompactionTask());
   }
 
-  // Find all chunks that are available (can be filled immediately) and
+  // Find all regions that are available (can be filled immediately) and
   // distribute them to the thread stacks.  The iteration is done in reverse
-  // order (high to low) so the chunks will be removed in ascending order.
+  // order (high to low) so the regions will be removed in ascending order.
 
   const ParallelCompactData& sd = PSParallelCompact::summary_data();
 
-  size_t fillable_chunks = 0;   // A count for diagnostic purposes.
+  size_t fillable_regions = 0;   // A count for diagnostic purposes.
   unsigned int which = 0;       // The worker thread number.
 
   for (unsigned int id = to_space_id; id > perm_space_id; --id) {
@@ -2510,25 +2014,26 @@
     MutableSpace* const space = space_info->space();
     HeapWord* const new_top = space_info->new_top();
 
-    const size_t beg_chunk = sd.addr_to_chunk_idx(space_info->dense_prefix());
-    const size_t end_chunk = sd.addr_to_chunk_idx(sd.chunk_align_up(new_top));
-    assert(end_chunk > 0, "perm gen cannot be empty");
-
-    for (size_t cur = end_chunk - 1; cur >= beg_chunk; --cur) {
-      if (sd.chunk(cur)->claim_unsafe()) {
+    const size_t beg_region = sd.addr_to_region_idx(space_info->dense_prefix());
+    const size_t end_region =
+      sd.addr_to_region_idx(sd.region_align_up(new_top));
+    assert(end_region > 0, "perm gen cannot be empty");
+
+    for (size_t cur = end_region - 1; cur >= beg_region; --cur) {
+      if (sd.region(cur)->claim_unsafe()) {
         ParCompactionManager* cm = ParCompactionManager::manager_array(which);
         cm->save_for_processing(cur);
 
         if (TraceParallelOldGCCompactionPhase && Verbose) {
-          const size_t count_mod_8 = fillable_chunks & 7;
+          const size_t count_mod_8 = fillable_regions & 7;
           if (count_mod_8 == 0) gclog_or_tty->print("fillable: ");
           gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur);
           if (count_mod_8 == 7) gclog_or_tty->cr();
         }
 
-        NOT_PRODUCT(++fillable_chunks;)
-
-        // Assign chunks to threads in round-robin fashion.
+        NOT_PRODUCT(++fillable_regions;)
+
+        // Assign regions to threads in round-robin fashion.
         if (++which == task_count) {
           which = 0;
         }
@@ -2537,8 +2042,8 @@
   }
 
   if (TraceParallelOldGCCompactionPhase) {
-    if (Verbose && (fillable_chunks & 7) != 0) gclog_or_tty->cr();
-    gclog_or_tty->print_cr("%u initially fillable chunks", fillable_chunks);
+    if (Verbose && (fillable_regions & 7) != 0) gclog_or_tty->cr();
+    gclog_or_tty->print_cr("%u initially fillable regions", fillable_regions);
   }
 }
 
@@ -2551,7 +2056,7 @@
   ParallelCompactData& sd = PSParallelCompact::summary_data();
 
   // Iterate over all the spaces adding tasks for updating
-  // chunks in the dense prefix.  Assume that 1 gc thread
+  // regions in the dense prefix.  Assume that 1 gc thread
   // will work on opening the gaps and the remaining gc threads
   // will work on the dense prefix.
   SpaceId space_id = old_space_id;
@@ -2565,30 +2070,31 @@
       continue;
     }
 
-    // The dense prefix is before this chunk.
-    size_t chunk_index_end_dense_prefix =
-        sd.addr_to_chunk_idx(dense_prefix_end);
-    ChunkData* const dense_prefix_cp = sd.chunk(chunk_index_end_dense_prefix);
+    // The dense prefix is before this region.
+    size_t region_index_end_dense_prefix =
+        sd.addr_to_region_idx(dense_prefix_end);
+    RegionData* const dense_prefix_cp =
+      sd.region(region_index_end_dense_prefix);
     assert(dense_prefix_end == space->end() ||
            dense_prefix_cp->available() ||
            dense_prefix_cp->claimed(),
-           "The chunk after the dense prefix should always be ready to fill");
-
-    size_t chunk_index_start = sd.addr_to_chunk_idx(space->bottom());
+           "The region after the dense prefix should always be ready to fill");
+
+    size_t region_index_start = sd.addr_to_region_idx(space->bottom());
 
     // Is there dense prefix work?
-    size_t total_dense_prefix_chunks =
-      chunk_index_end_dense_prefix - chunk_index_start;
-    // How many chunks of the dense prefix should be given to
+    size_t total_dense_prefix_regions =
+      region_index_end_dense_prefix - region_index_start;
+    // How many regions of the dense prefix should be given to
     // each thread?
-    if (total_dense_prefix_chunks > 0) {
+    if (total_dense_prefix_regions > 0) {
       uint tasks_for_dense_prefix = 1;
       if (UseParallelDensePrefixUpdate) {
-        if (total_dense_prefix_chunks <=
+        if (total_dense_prefix_regions <=
             (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
           // Don't over partition.  This assumes that
           // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
-          // so there are not many chunks to process.
+          // so there are not many regions to process.
           tasks_for_dense_prefix = parallel_gc_threads;
         } else {
           // Over partition
@@ -2596,50 +2102,50 @@
             PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
         }
       }
-      size_t chunks_per_thread = total_dense_prefix_chunks /
+      size_t regions_per_thread = total_dense_prefix_regions /
         tasks_for_dense_prefix;
-      // Give each thread at least 1 chunk.
-      if (chunks_per_thread == 0) {
-        chunks_per_thread = 1;
+      // Give each thread at least 1 region.
+      if (regions_per_thread == 0) {
+        regions_per_thread = 1;
       }
 
       for (uint k = 0; k < tasks_for_dense_prefix; k++) {
-        if (chunk_index_start >= chunk_index_end_dense_prefix) {
+        if (region_index_start >= region_index_end_dense_prefix) {
           break;
         }
-        // chunk_index_end is not processed
-        size_t chunk_index_end = MIN2(chunk_index_start + chunks_per_thread,
-                                      chunk_index_end_dense_prefix);
+        // region_index_end is not processed
+        size_t region_index_end = MIN2(region_index_start + regions_per_thread,
+                                       region_index_end_dense_prefix);
         q->enqueue(new UpdateDensePrefixTask(
                                  space_id,
-                                 chunk_index_start,
-                                 chunk_index_end));
-        chunk_index_start = chunk_index_end;
+                                 region_index_start,
+                                 region_index_end));
+        region_index_start = region_index_end;
       }
     }
     // This gets any part of the dense prefix that did not
     // fit evenly.
-    if (chunk_index_start < chunk_index_end_dense_prefix) {
+    if (region_index_start < region_index_end_dense_prefix) {
       q->enqueue(new UpdateDensePrefixTask(
                                  space_id,
-                                 chunk_index_start,
-                                 chunk_index_end_dense_prefix));
+                                 region_index_start,
+                                 region_index_end_dense_prefix));
     }
     space_id = next_compaction_space_id(space_id);
   }  // End tasks for dense prefix
 }
 
-void PSParallelCompact::enqueue_chunk_stealing_tasks(
+void PSParallelCompact::enqueue_region_stealing_tasks(
                                      GCTaskQueue* q,
                                      ParallelTaskTerminator* terminator_ptr,
                                      uint parallel_gc_threads) {
   TraceTime tm("steal task setup", print_phases(), true, gclog_or_tty);
 
-  // Once a thread has drained it's stack, it should try to steal chunks from
+  // Once a thread has drained it's stack, it should try to steal regions from
   // other threads.
   if (parallel_gc_threads > 1) {
     for (uint j = 0; j < parallel_gc_threads; j++) {
-      q->enqueue(new StealChunkCompactionTask(terminator_ptr));
+      q->enqueue(new StealRegionCompactionTask(terminator_ptr));
     }
   }
 }
@@ -2654,13 +2160,13 @@
   PSOldGen* old_gen = heap->old_gen();
   old_gen->start_array()->reset();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
+  TaskQueueSetSuper* qset = ParCompactionManager::region_array();
   ParallelTaskTerminator terminator(parallel_gc_threads, qset);
 
   GCTaskQueue* q = GCTaskQueue::create();
-  enqueue_chunk_draining_tasks(q, parallel_gc_threads);
+  enqueue_region_draining_tasks(q, parallel_gc_threads);
   enqueue_dense_prefix_tasks(q, parallel_gc_threads);
-  enqueue_chunk_stealing_tasks(q, &terminator, parallel_gc_threads);
+  enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads);
 
   {
     TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty);
@@ -2676,9 +2182,9 @@
     WaitForBarrierGCTask::destroy(fin);
 
 #ifdef  ASSERT
-    // Verify that all chunks have been processed before the deferred updates.
+    // Verify that all regions have been processed before the deferred updates.
     // Note that perm_space_id is skipped; this type of verification is not
-    // valid until the perm gen is compacted by chunks.
+    // valid until the perm gen is compacted by regions.
     for (unsigned int id = old_space_id; id < last_space_id; ++id) {
       verify_complete(SpaceId(id));
     }
@@ -2697,42 +2203,42 @@
 
 #ifdef  ASSERT
 void PSParallelCompact::verify_complete(SpaceId space_id) {
-  // All Chunks between space bottom() to new_top() should be marked as filled
-  // and all Chunks between new_top() and top() should be available (i.e.,
+  // All Regions between space bottom() to new_top() should be marked as filled
+  // and all Regions between new_top() and top() should be available (i.e.,
   // should have been emptied).
   ParallelCompactData& sd = summary_data();
   SpaceInfo si = _space_info[space_id];
-  HeapWord* new_top_addr = sd.chunk_align_up(si.new_top());
-  HeapWord* old_top_addr = sd.chunk_align_up(si.space()->top());
-  const size_t beg_chunk = sd.addr_to_chunk_idx(si.space()->bottom());
-  const size_t new_top_chunk = sd.addr_to_chunk_idx(new_top_addr);
-  const size_t old_top_chunk = sd.addr_to_chunk_idx(old_top_addr);
+  HeapWord* new_top_addr = sd.region_align_up(si.new_top());
+  HeapWord* old_top_addr = sd.region_align_up(si.space()->top());
+  const size_t beg_region = sd.addr_to_region_idx(si.space()->bottom());
+  const size_t new_top_region = sd.addr_to_region_idx(new_top_addr);
+  const size_t old_top_region = sd.addr_to_region_idx(old_top_addr);
 
   bool issued_a_warning = false;
 
-  size_t cur_chunk;
-  for (cur_chunk = beg_chunk; cur_chunk < new_top_chunk; ++cur_chunk) {
-    const ChunkData* const c = sd.chunk(cur_chunk);
+  size_t cur_region;
+  for (cur_region = beg_region; cur_region < new_top_region; ++cur_region) {
+    const RegionData* const c = sd.region(cur_region);
     if (!c->completed()) {
-      warning("chunk " SIZE_FORMAT " not filled:  "
+      warning("region " SIZE_FORMAT " not filled:  "
               "destination_count=" SIZE_FORMAT,
-              cur_chunk, c->destination_count());
+              cur_region, c->destination_count());
       issued_a_warning = true;
     }
   }
 
-  for (cur_chunk = new_top_chunk; cur_chunk < old_top_chunk; ++cur_chunk) {
-    const ChunkData* const c = sd.chunk(cur_chunk);
+  for (cur_region = new_top_region; cur_region < old_top_region; ++cur_region) {
+    const RegionData* const c = sd.region(cur_region);
     if (!c->available()) {
-      warning("chunk " SIZE_FORMAT " not empty:   "
+      warning("region " SIZE_FORMAT " not empty:   "
               "destination_count=" SIZE_FORMAT,
-              cur_chunk, c->destination_count());
+              cur_region, c->destination_count());
       issued_a_warning = true;
     }
   }
 
   if (issued_a_warning) {
-    print_chunk_ranges();
+    print_region_ranges();
   }
 }
 #endif  // #ifdef ASSERT
@@ -2933,46 +2439,47 @@
 }
 #endif //VALIDATE_MARK_SWEEP
 
-// Update interior oops in the ranges of chunks [beg_chunk, end_chunk).
+// Update interior oops in the ranges of regions [beg_region, end_region).
 void
 PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
                                                        SpaceId space_id,
-                                                       size_t beg_chunk,
-                                                       size_t end_chunk) {
+                                                       size_t beg_region,
+                                                       size_t end_region) {
   ParallelCompactData& sd = summary_data();
   ParMarkBitMap* const mbm = mark_bitmap();
 
-  HeapWord* beg_addr = sd.chunk_to_addr(beg_chunk);
-  HeapWord* const end_addr = sd.chunk_to_addr(end_chunk);
-  assert(beg_chunk <= end_chunk, "bad chunk range");
+  HeapWord* beg_addr = sd.region_to_addr(beg_region);
+  HeapWord* const end_addr = sd.region_to_addr(end_region);
+  assert(beg_region <= end_region, "bad region range");
   assert(end_addr <= dense_prefix(space_id), "not in the dense prefix");
 
 #ifdef  ASSERT
-  // Claim the chunks to avoid triggering an assert when they are marked as
+  // Claim the regions to avoid triggering an assert when they are marked as
   // filled.
-  for (size_t claim_chunk = beg_chunk; claim_chunk < end_chunk; ++claim_chunk) {
-    assert(sd.chunk(claim_chunk)->claim_unsafe(), "claim() failed");
+  for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) {
+    assert(sd.region(claim_region)->claim_unsafe(), "claim() failed");
   }
 #endif  // #ifdef ASSERT
 
   if (beg_addr != space(space_id)->bottom()) {
     // Find the first live object or block of dead space that *starts* in this
-    // range of chunks.  If a partial object crosses onto the chunk, skip it; it
-    // will be marked for 'deferred update' when the object head is processed.
-    // If dead space crosses onto the chunk, it is also skipped; it will be
-    // filled when the prior chunk is processed.  If neither of those apply, the
-    // first word in the chunk is the start of a live object or dead space.
+    // range of regions.  If a partial object crosses onto the region, skip it;
+    // it will be marked for 'deferred update' when the object head is
+    // processed.  If dead space crosses onto the region, it is also skipped; it
+    // will be filled when the prior region is processed.  If neither of those
+    // apply, the first word in the region is the start of a live object or dead
+    // space.
     assert(beg_addr > space(space_id)->bottom(), "sanity");
-    const ChunkData* const cp = sd.chunk(beg_chunk);
+    const RegionData* const cp = sd.region(beg_region);
     if (cp->partial_obj_size() != 0) {
-      beg_addr = sd.partial_obj_end(beg_chunk);
+      beg_addr = sd.partial_obj_end(beg_region);
     } else if (dead_space_crosses_boundary(cp, mbm->addr_to_bit(beg_addr))) {
       beg_addr = mbm->find_obj_beg(beg_addr, end_addr);
     }
   }
 
   if (beg_addr < end_addr) {
-    // A live object or block of dead space starts in this range of Chunks.
+    // A live object or block of dead space starts in this range of Regions.
      HeapWord* const dense_prefix_end = dense_prefix(space_id);
 
     // Create closures and iterate.
@@ -2986,10 +2493,10 @@
     }
   }
 
-  // Mark the chunks as filled.
-  ChunkData* const beg_cp = sd.chunk(beg_chunk);
-  ChunkData* const end_cp = sd.chunk(end_chunk);
-  for (ChunkData* cp = beg_cp; cp < end_cp; ++cp) {
+  // Mark the regions as filled.
+  RegionData* const beg_cp = sd.region(beg_region);
+  RegionData* const end_cp = sd.region(end_region);
+  for (RegionData* cp = beg_cp; cp < end_cp; ++cp) {
     cp->set_completed();
   }
 }
@@ -3021,13 +2528,13 @@
   const MutableSpace* const space = space_info->space();
   assert(space_info->dense_prefix() >= space->bottom(), "dense_prefix not set");
   HeapWord* const beg_addr = space_info->dense_prefix();
-  HeapWord* const end_addr = sd.chunk_align_up(space_info->new_top());
-
-  const ChunkData* const beg_chunk = sd.addr_to_chunk_ptr(beg_addr);
-  const ChunkData* const end_chunk = sd.addr_to_chunk_ptr(end_addr);
-  const ChunkData* cur_chunk;
-  for (cur_chunk = beg_chunk; cur_chunk < end_chunk; ++cur_chunk) {
-    HeapWord* const addr = cur_chunk->deferred_obj_addr();
+  HeapWord* const end_addr = sd.region_align_up(space_info->new_top());
+
+  const RegionData* const beg_region = sd.addr_to_region_ptr(beg_addr);
+  const RegionData* const end_region = sd.addr_to_region_ptr(end_addr);
+  const RegionData* cur_region;
+  for (cur_region = beg_region; cur_region < end_region; ++cur_region) {
+    HeapWord* const addr = cur_region->deferred_obj_addr();
     if (addr != NULL) {
       if (start_array != NULL) {
         start_array->allocate_block(addr);
@@ -3073,45 +2580,45 @@
 
 HeapWord*
 PSParallelCompact::first_src_addr(HeapWord* const dest_addr,
-                                 size_t src_chunk_idx)
+                                 size_t src_region_idx)
 {
   ParMarkBitMap* const bitmap = mark_bitmap();
   const ParallelCompactData& sd = summary_data();
-  const size_t ChunkSize = ParallelCompactData::ChunkSize;
-
-  assert(sd.is_chunk_aligned(dest_addr), "not aligned");
-
-  const ChunkData* const src_chunk_ptr = sd.chunk(src_chunk_idx);
-  const size_t partial_obj_size = src_chunk_ptr->partial_obj_size();
-  HeapWord* const src_chunk_destination = src_chunk_ptr->destination();
-
-  assert(dest_addr >= src_chunk_destination, "wrong src chunk");
-  assert(src_chunk_ptr->data_size() > 0, "src chunk cannot be empty");
-
-  HeapWord* const src_chunk_beg = sd.chunk_to_addr(src_chunk_idx);
-  HeapWord* const src_chunk_end = src_chunk_beg + ChunkSize;
-
-  HeapWord* addr = src_chunk_beg;
-  if (dest_addr == src_chunk_destination) {
-    // Return the first live word in the source chunk.
+  const size_t RegionSize = ParallelCompactData::RegionSize;
+
+  assert(sd.is_region_aligned(dest_addr), "not aligned");
+
+  const RegionData* const src_region_ptr = sd.region(src_region_idx);
+  const size_t partial_obj_size = src_region_ptr->partial_obj_size();
+  HeapWord* const src_region_destination = src_region_ptr->destination();
+
+  assert(dest_addr >= src_region_destination, "wrong src region");
+  assert(src_region_ptr->data_size() > 0, "src region cannot be empty");
+
+  HeapWord* const src_region_beg = sd.region_to_addr(src_region_idx);
+  HeapWord* const src_region_end = src_region_beg + RegionSize;
+
+  HeapWord* addr = src_region_beg;
+  if (dest_addr == src_region_destination) {
+    // Return the first live word in the source region.
     if (partial_obj_size == 0) {
-      addr = bitmap->find_obj_beg(addr, src_chunk_end);
-      assert(addr < src_chunk_end, "no objects start in src chunk");
+      addr = bitmap->find_obj_beg(addr, src_region_end);
+      assert(addr < src_region_end, "no objects start in src region");
     }
     return addr;
   }
 
   // Must skip some live data.
-  size_t words_to_skip = dest_addr - src_chunk_destination;
-  assert(src_chunk_ptr->data_size() > words_to_skip, "wrong src chunk");
+  size_t words_to_skip = dest_addr - src_region_destination;
+  assert(src_region_ptr->data_size() > words_to_skip, "wrong src region");
 
   if (partial_obj_size >= words_to_skip) {
     // All the live words to skip are part of the partial object.
     addr += words_to_skip;
     if (partial_obj_size == words_to_skip) {
       // Find the first live word past the partial object.
-      addr = bitmap->find_obj_beg(addr, src_chunk_end);
-      assert(addr < src_chunk_end, "wrong src chunk");
+      addr = bitmap->find_obj_beg(addr, src_region_end);
+      assert(addr < src_region_end, "wrong src region");
     }
     return addr;
   }
@@ -3122,63 +2629,64 @@
     addr += partial_obj_size;
   }
 
-  // Skip over live words due to objects that start in the chunk.
-  addr = skip_live_words(addr, src_chunk_end, words_to_skip);
-  assert(addr < src_chunk_end, "wrong src chunk");
+  // Skip over live words due to objects that start in the region.
+  addr = skip_live_words(addr, src_region_end, words_to_skip);
+  assert(addr < src_region_end, "wrong src region");
   return addr;
 }
 
 void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm,
-                                                     size_t beg_chunk,
+                                                     size_t beg_region,
                                                      HeapWord* end_addr)
 {
   ParallelCompactData& sd = summary_data();
-  ChunkData* const beg = sd.chunk(beg_chunk);
-  HeapWord* const end_addr_aligned_up = sd.chunk_align_up(end_addr);
-  ChunkData* const end = sd.addr_to_chunk_ptr(end_addr_aligned_up);
-  size_t cur_idx = beg_chunk;
-  for (ChunkData* cur = beg; cur < end; ++cur, ++cur_idx) {
-    assert(cur->data_size() > 0, "chunk must have live data");
+  RegionData* const beg = sd.region(beg_region);
+  HeapWord* const end_addr_aligned_up = sd.region_align_up(end_addr);
+  RegionData* const end = sd.addr_to_region_ptr(end_addr_aligned_up);
+  size_t cur_idx = beg_region;
+  for (RegionData* cur = beg; cur < end; ++cur, ++cur_idx) {
+    assert(cur->data_size() > 0, "region must have live data");
     cur->decrement_destination_count();
-    if (cur_idx <= cur->source_chunk() && cur->available() && cur->claim()) {
+    if (cur_idx <= cur->source_region() && cur->available() && cur->claim()) {
       cm->save_for_processing(cur_idx);
     }
   }
 }
 
-size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
-                                         SpaceId& src_space_id,
-                                         HeapWord*& src_space_top,
-                                         HeapWord* end_addr)
+size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure,
+                                          SpaceId& src_space_id,
+                                          HeapWord*& src_space_top,
+                                          HeapWord* end_addr)
 {
-  typedef ParallelCompactData::ChunkData ChunkData;
+  typedef ParallelCompactData::RegionData RegionData;
 
   ParallelCompactData& sd = PSParallelCompact::summary_data();
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
-
-  size_t src_chunk_idx = 0;
-
-  // Skip empty chunks (if any) up to the top of the space.
-  HeapWord* const src_aligned_up = sd.chunk_align_up(end_addr);
-  ChunkData* src_chunk_ptr = sd.addr_to_chunk_ptr(src_aligned_up);
-  HeapWord* const top_aligned_up = sd.chunk_align_up(src_space_top);
-  const ChunkData* const top_chunk_ptr = sd.addr_to_chunk_ptr(top_aligned_up);
-  while (src_chunk_ptr < top_chunk_ptr && src_chunk_ptr->data_size() == 0) {
-    ++src_chunk_ptr;
+  const size_t region_size = ParallelCompactData::RegionSize;
+
+  size_t src_region_idx = 0;
+
+  // Skip empty regions (if any) up to the top of the space.
+  HeapWord* const src_aligned_up = sd.region_align_up(end_addr);
+  RegionData* src_region_ptr = sd.addr_to_region_ptr(src_aligned_up);
+  HeapWord* const top_aligned_up = sd.region_align_up(src_space_top);
+  const RegionData* const top_region_ptr =
+    sd.addr_to_region_ptr(top_aligned_up);
+  while (src_region_ptr < top_region_ptr && src_region_ptr->data_size() == 0) {
+    ++src_region_ptr;
   }
 
-  if (src_chunk_ptr < top_chunk_ptr) {
-    // The next source chunk is in the current space.  Update src_chunk_idx and
-    // the source address to match src_chunk_ptr.
-    src_chunk_idx = sd.chunk(src_chunk_ptr);
-    HeapWord* const src_chunk_addr = sd.chunk_to_addr(src_chunk_idx);
-    if (src_chunk_addr > closure.source()) {
-      closure.set_source(src_chunk_addr);
+  if (src_region_ptr < top_region_ptr) {
+    // The next source region is in the current space.  Update src_region_idx
+    // and the source address to match src_region_ptr.
+    src_region_idx = sd.region(src_region_ptr);
+    HeapWord* const src_region_addr = sd.region_to_addr(src_region_idx);
+    if (src_region_addr > closure.source()) {
+      closure.set_source(src_region_addr);
     }
-    return src_chunk_idx;
+    return src_region_idx;
   }
 
-  // Switch to a new source space and find the first non-empty chunk.
+  // Switch to a new source space and find the first non-empty region.
   unsigned int space_id = src_space_id + 1;
   assert(space_id < last_space_id, "not enough spaces");
 
@@ -3187,14 +2695,14 @@
   do {
     MutableSpace* space = _space_info[space_id].space();
     HeapWord* const bottom = space->bottom();
-    const ChunkData* const bottom_cp = sd.addr_to_chunk_ptr(bottom);
+    const RegionData* const bottom_cp = sd.addr_to_region_ptr(bottom);
 
     // Iterate over the spaces that do not compact into themselves.
     if (bottom_cp->destination() != bottom) {
-      HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
-      const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
-
-      for (const ChunkData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
+      HeapWord* const top_aligned_up = sd.region_align_up(space->top());
+      const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
+
+      for (const RegionData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
         if (src_cp->live_obj_size() > 0) {
           // Found it.
           assert(src_cp->destination() == destination,
@@ -3204,9 +2712,9 @@
 
           src_space_id = SpaceId(space_id);
           src_space_top = space->top();
-          const size_t src_chunk_idx = sd.chunk(src_cp);
-          closure.set_source(sd.chunk_to_addr(src_chunk_idx));
-          return src_chunk_idx;
+          const size_t src_region_idx = sd.region(src_cp);
+          closure.set_source(sd.region_to_addr(src_region_idx));
+          return src_region_idx;
         } else {
           assert(src_cp->data_size() == 0, "sanity");
         }
@@ -3214,38 +2722,38 @@
     }
   } while (++space_id < last_space_id);
 
-  assert(false, "no source chunk was found");
+  assert(false, "no source region was found");
   return 0;
 }
 
-void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)
+void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
 {
   typedef ParMarkBitMap::IterationStatus IterationStatus;
-  const size_t ChunkSize = ParallelCompactData::ChunkSize;
+  const size_t RegionSize = ParallelCompactData::RegionSize;
   ParMarkBitMap* const bitmap = mark_bitmap();
   ParallelCompactData& sd = summary_data();
-  ChunkData* const chunk_ptr = sd.chunk(chunk_idx);
+  RegionData* const region_ptr = sd.region(region_idx);
 
   // Get the items needed to construct the closure.
-  HeapWord* dest_addr = sd.chunk_to_addr(chunk_idx);
+  HeapWord* dest_addr = sd.region_to_addr(region_idx);
   SpaceId dest_space_id = space_id(dest_addr);
   ObjectStartArray* start_array = _space_info[dest_space_id].start_array();
   HeapWord* new_top = _space_info[dest_space_id].new_top();
   assert(dest_addr < new_top, "sanity");
-  const size_t words = MIN2(pointer_delta(new_top, dest_addr), ChunkSize);
-
-  // Get the source chunk and related info.
-  size_t src_chunk_idx = chunk_ptr->source_chunk();
-  SpaceId src_space_id = space_id(sd.chunk_to_addr(src_chunk_idx));
+  const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize);
+
+  // Get the source region and related info.
+  size_t src_region_idx = region_ptr->source_region();
+  SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx));
   HeapWord* src_space_top = _space_info[src_space_id].space()->top();
 
   MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words);
-  closure.set_source(first_src_addr(dest_addr, src_chunk_idx));
-
-  // Adjust src_chunk_idx to prepare for decrementing destination counts (the
-  // destination count is not decremented when a chunk is copied to itself).
-  if (src_chunk_idx == chunk_idx) {
-    src_chunk_idx += 1;
+  closure.set_source(first_src_addr(dest_addr, src_region_idx));
+
+  // Adjust src_region_idx to prepare for decrementing destination counts (the
+  // destination count is not decremented when a region is copied to itself).
+  if (src_region_idx == region_idx) {
+    src_region_idx += 1;
   }
 
   if (bitmap->is_unmarked(closure.source())) {
@@ -3255,32 +2763,33 @@
     HeapWord* const old_src_addr = closure.source();
     closure.copy_partial_obj();
     if (closure.is_full()) {
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_deferred_obj_addr(NULL);
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_deferred_obj_addr(NULL);
+      region_ptr->set_completed();
       return;
     }
 
-    HeapWord* const end_addr = sd.chunk_align_down(closure.source());
-    if (sd.chunk_align_down(old_src_addr) != end_addr) {
-      // The partial object was copied from more than one source chunk.
-      decrement_destination_counts(cm, src_chunk_idx, end_addr);
-
-      // Move to the next source chunk, possibly switching spaces as well.  All
+    HeapWord* const end_addr = sd.region_align_down(closure.source());
+    if (sd.region_align_down(old_src_addr) != end_addr) {
+      // The partial object was copied from more than one source region.
+      decrement_destination_counts(cm, src_region_idx, end_addr);
+
+      // Move to the next source region, possibly switching spaces as well.  All
       // args except end_addr may be modified.
-      src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
-                                     end_addr);
+      src_region_idx = next_src_region(closure, src_space_id, src_space_top,
+                                       end_addr);
     }
   }
 
   do {
     HeapWord* const cur_addr = closure.source();
-    HeapWord* const end_addr = MIN2(sd.chunk_align_up(cur_addr + 1),
+    HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1),
                                     src_space_top);
     IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr);
 
     if (status == ParMarkBitMap::incomplete) {
-      // The last obj that starts in the source chunk does not end in the chunk.
+      // The last obj that starts in the source region does not end in the
+      // region.
       assert(closure.source() < end_addr, "sanity")
       HeapWord* const obj_beg = closure.source();
       HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(),
@@ -3299,28 +2808,28 @@
 
     if (status == ParMarkBitMap::would_overflow) {
       // The last object did not fit.  Note that interior oop updates were
-      // deferred, then copy enough of the object to fill the chunk.
-      chunk_ptr->set_deferred_obj_addr(closure.destination());
+      // deferred, then copy enough of the object to fill the region.
+      region_ptr->set_deferred_obj_addr(closure.destination());
       status = closure.copy_until_full(); // copies from closure.source()
 
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_completed();
       return;
     }
 
     if (status == ParMarkBitMap::full) {
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_deferred_obj_addr(NULL);
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_deferred_obj_addr(NULL);
+      region_ptr->set_completed();
       return;
     }
 
-    decrement_destination_counts(cm, src_chunk_idx, end_addr);
-
-    // Move to the next source chunk, possibly switching spaces as well.  All
+    decrement_destination_counts(cm, src_region_idx, end_addr);
+
+    // Move to the next source region, possibly switching spaces as well.  All
     // args except end_addr may be modified.
-    src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
-                                   end_addr);
+    src_region_idx = next_src_region(closure, src_space_id, src_space_top,
+                                     end_addr);
   } while (true);
 }
 
@@ -3352,15 +2861,15 @@
   }
 #endif
 
-  const size_t beg_chunk = sd.addr_to_chunk_idx(beg_addr);
-  const size_t dp_chunk = sd.addr_to_chunk_idx(dp_addr);
-  if (beg_chunk < dp_chunk) {
-    update_and_deadwood_in_dense_prefix(cm, space_id, beg_chunk, dp_chunk);
+  const size_t beg_region = sd.addr_to_region_idx(beg_addr);
+  const size_t dp_region = sd.addr_to_region_idx(dp_addr);
+  if (beg_region < dp_region) {
+    update_and_deadwood_in_dense_prefix(cm, space_id, beg_region, dp_region);
   }
 
-  // The destination of the first live object that starts in the chunk is one
-  // past the end of the partial object entering the chunk (if any).
-  HeapWord* const dest_addr = sd.partial_obj_end(dp_chunk);
+  // The destination of the first live object that starts in the region is one
+  // past the end of the partial object entering the region (if any).
+  HeapWord* const dest_addr = sd.partial_obj_end(dp_region);
   HeapWord* const new_top = _space_info[space_id].new_top();
   assert(new_top >= dest_addr, "bad new_top value");
   const size_t words = pointer_delta(new_top, dest_addr);
@@ -3469,172 +2978,6 @@
   return ParMarkBitMap::incomplete;
 }
 
-BitBlockUpdateClosure::BitBlockUpdateClosure(ParMarkBitMap* mbm,
-                        ParCompactionManager* cm,
-                        size_t chunk_index) :
-                        ParMarkBitMapClosure(mbm, cm),
-                        _live_data_left(0),
-                        _cur_block(0) {
-  _chunk_start =
-    PSParallelCompact::summary_data().chunk_to_addr(chunk_index);
-  _chunk_end =
-    PSParallelCompact::summary_data().chunk_to_addr(chunk_index) +
-                 ParallelCompactData::ChunkSize;
-  _chunk_index = chunk_index;
-  _cur_block =
-    PSParallelCompact::summary_data().addr_to_block_idx(_chunk_start);
-}
-
-bool BitBlockUpdateClosure::chunk_contains_cur_block() {
-  return ParallelCompactData::chunk_contains_block(_chunk_index, _cur_block);
-}
-
-void BitBlockUpdateClosure::reset_chunk(size_t chunk_index) {
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(7);)
-  ParallelCompactData& sd = PSParallelCompact::summary_data();
-  _chunk_index = chunk_index;
-  _live_data_left = 0;
-  _chunk_start = sd.chunk_to_addr(chunk_index);
-  _chunk_end = sd.chunk_to_addr(chunk_index) + ParallelCompactData::ChunkSize;
-
-  // The first block in this chunk
-  size_t first_block =  sd.addr_to_block_idx(_chunk_start);
-  size_t partial_live_size = sd.chunk(chunk_index)->partial_obj_size();
-
-  // Set the offset to 0. By definition it should have that value
-  // but it may have been written while processing an earlier chunk.
-  if (partial_live_size == 0) {
-    // No live object extends onto the chunk.  The first bit
-    // in the bit map for the first chunk must be a start bit.
-    // Although there may not be any marked bits, it is safe
-    // to set it as a start bit.
-    sd.block(first_block)->set_start_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(true);
-  } else if (sd.partial_obj_ends_in_block(first_block)) {
-    sd.block(first_block)->set_end_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(false);
-  } else {
-    // The partial object extends beyond the first block.
-    // There is no object starting in the first block
-    // so the offset and bit parity are not needed.
-    // Set the the bit parity to start bit so assertions
-    // work when not bit is found.
-    sd.block(first_block)->set_end_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(false);
-  }
-  _cur_block = first_block;
-#ifdef ASSERT
-  if (sd.block(first_block)->first_is_start_bit()) {
-    assert(!sd.partial_obj_ends_in_block(first_block),
-      "Partial object cannot end in first block");
-  }
-
-  if (PrintGCDetails && Verbose) {
-    if (partial_live_size == 1) {
-    gclog_or_tty->print_cr("first_block " PTR_FORMAT
-      " _offset " PTR_FORMAT
-      " _first_is_start_bit %d",
-      first_block,
-      sd.block(first_block)->raw_offset(),
-      sd.block(first_block)->first_is_start_bit());
-    }
-  }
-#endif
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(17);)
-}
-
-// This method is called when a object has been found (both beginning
-// and end of the object) in the range of iteration.  This method is
-// calculating the words of live data to the left of a block.  That live
-// data includes any object starting to the left of the block (i.e.,
-// the live-data-to-the-left of block AAA will include the full size
-// of any object entering AAA).
-
-ParMarkBitMapClosure::IterationStatus
-BitBlockUpdateClosure::do_addr(HeapWord* addr, size_t words) {
-  // add the size to the block data.
-  HeapWord* obj = addr;
-  ParallelCompactData& sd = PSParallelCompact::summary_data();
-
-  assert(bitmap()->obj_size(obj) == words, "bad size");
-  assert(_chunk_start <= obj, "object is not in chunk");
-  assert(obj + words <= _chunk_end, "object is not in chunk");
-
-  // Update the live data to the left
-  size_t prev_live_data_left = _live_data_left;
-  _live_data_left = _live_data_left + words;
-
-  // Is this object in the current block.
-  size_t block_of_obj = sd.addr_to_block_idx(obj);
-  size_t block_of_obj_last = sd.addr_to_block_idx(obj + words - 1);
-  HeapWord* block_of_obj_last_addr = sd.block_to_addr(block_of_obj_last);
-  if (_cur_block < block_of_obj) {
-
-    //
-    // No object crossed the block boundary and this object was found
-    // on the other side of the block boundary.  Update the offset for
-    // the new block with the data size that does not include this object.
-    //
-    // The first bit in block_of_obj is a start bit except in the
-    // case where the partial object for the chunk extends into
-    // this block.
-    if (sd.partial_obj_ends_in_block(block_of_obj)) {
-      sd.block(block_of_obj)->set_end_bit_offset(prev_live_data_left);
-    } else {
-      sd.block(block_of_obj)->set_start_bit_offset(prev_live_data_left);
-    }
-
-    // Does this object pass beyond the its block?
-    if (block_of_obj < block_of_obj_last) {
-      // Object crosses block boundary.  Two blocks need to be udpated:
-      //        the current block where the object started
-      //        the block where the object ends
-      //
-      // The offset for blocks with no objects starting in them
-      // (e.g., blocks between _cur_block and  block_of_obj_last)
-      // should not be needed.
-      // Note that block_of_obj_last may be in another chunk.  If so,
-      // it should be overwritten later.  This is a problem (writting
-      // into a block in a later chunk) for parallel execution.
-      assert(obj < block_of_obj_last_addr,
-        "Object should start in previous block");
-
-      // obj is crossing into block_of_obj_last so the first bit
-      // is and end bit.
-      sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
-
-      _cur_block = block_of_obj_last;
-    } else {
-      // _first_is_start_bit has already been set correctly
-      // in the if-then-else above so don't reset it here.
-      _cur_block = block_of_obj;
-    }
-  } else {
-    // The current block only changes if the object extends beyound
-    // the block it starts in.
-    //
-    // The object starts in the current block.
-    // Does this object pass beyond the end of it?
-    if (block_of_obj < block_of_obj_last) {
-      // Object crosses block boundary.
-      // See note above on possible blocks between block_of_obj and
-      // block_of_obj_last
-      assert(obj < block_of_obj_last_addr,
-        "Object should start in previous block");
-
-      sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
-
-      _cur_block = block_of_obj_last;
-    }
-  }
-
-  // Return incomplete if there are more blocks to be done.
-  if (chunk_contains_cur_block()) {
-    return ParMarkBitMap::incomplete;
-  }
-  return ParMarkBitMap::complete;
-}
-
 // Verify the new location using the forwarding pointer
 // from MarkSweep::mark_sweep_phase2().  Set the mark_word
 // to the initial value.
@@ -3707,12 +3050,3 @@
       return last_space_id;
   }
 }
-
-// Here temporarily for debugging
-#ifdef ASSERT
-  size_t ParallelCompactData::block_idx(BlockData* block) {
-    size_t index = pointer_delta(block,
-      PSParallelCompact::summary_data()._block_data, sizeof(BlockData));
-    return index;
-  }
-#endif
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -76,87 +76,80 @@
 {
 public:
   // Sizes are in HeapWords, unless indicated otherwise.
-  static const size_t Log2ChunkSize;
-  static const size_t ChunkSize;
-  static const size_t ChunkSizeBytes;
+  static const size_t Log2RegionSize;
+  static const size_t RegionSize;
+  static const size_t RegionSizeBytes;
 
-  // Mask for the bits in a size_t to get an offset within a chunk.
-  static const size_t ChunkSizeOffsetMask;
-  // Mask for the bits in a pointer to get an offset within a chunk.
-  static const size_t ChunkAddrOffsetMask;
-  // Mask for the bits in a pointer to get the address of the start of a chunk.
-  static const size_t ChunkAddrMask;
+  // Mask for the bits in a size_t to get an offset within a region.
+  static const size_t RegionSizeOffsetMask;
+  // Mask for the bits in a pointer to get an offset within a region.
+  static const size_t RegionAddrOffsetMask;
+  // Mask for the bits in a pointer to get the address of the start of a region.
+  static const size_t RegionAddrMask;
 
-  static const size_t Log2BlockSize;
-  static const size_t BlockSize;
-  static const size_t BlockOffsetMask;
-  static const size_t BlockMask;
-
-  static const size_t BlocksPerChunk;
-
-  class ChunkData
+  class RegionData
   {
   public:
-    // Destination address of the chunk.
+    // Destination address of the region.
     HeapWord* destination() const { return _destination; }
 
-    // The first chunk containing data destined for this chunk.
-    size_t source_chunk() const { return _source_chunk; }
+    // The first region containing data destined for this region.
+    size_t source_region() const { return _source_region; }
 
-    // The object (if any) starting in this chunk and ending in a different
-    // chunk that could not be updated during the main (parallel) compaction
+    // The object (if any) starting in this region and ending in a different
+    // region that could not be updated during the main (parallel) compaction
     // phase.  This is different from _partial_obj_addr, which is an object that
-    // extends onto a source chunk.  However, the two uses do not overlap in
+    // extends onto a source region.  However, the two uses do not overlap in
     // time, so the same field is used to save space.
     HeapWord* deferred_obj_addr() const { return _partial_obj_addr; }
 
-    // The starting address of the partial object extending onto the chunk.
+    // The starting address of the partial object extending onto the region.
     HeapWord* partial_obj_addr() const { return _partial_obj_addr; }
 
-    // Size of the partial object extending onto the chunk (words).
+    // Size of the partial object extending onto the region (words).
     size_t partial_obj_size() const { return _partial_obj_size; }
 
-    // Size of live data that lies within this chunk due to objects that start
-    // in this chunk (words).  This does not include the partial object
-    // extending onto the chunk (if any), or the part of an object that extends
-    // onto the next chunk (if any).
+    // Size of live data that lies within this region due to objects that start
+    // in this region (words).  This does not include the partial object
+    // extending onto the region (if any), or the part of an object that extends
+    // onto the next region (if any).
     size_t live_obj_size() const { return _dc_and_los & los_mask; }
 
-    // Total live data that lies within the chunk (words).
+    // Total live data that lies within the region (words).
     size_t data_size() const { return partial_obj_size() + live_obj_size(); }
 
-    // The destination_count is the number of other chunks to which data from
-    // this chunk will be copied.  At the end of the summary phase, the valid
+    // The destination_count is the number of other regions to which data from
+    // this region will be copied.  At the end of the summary phase, the valid
     // values of destination_count are
     //
-    // 0 - data from the chunk will be compacted completely into itself, or the
-    //     chunk is empty.  The chunk can be claimed and then filled.
-    // 1 - data from the chunk will be compacted into 1 other chunk; some
-    //     data from the chunk may also be compacted into the chunk itself.
-    // 2 - data from the chunk will be copied to 2 other chunks.
+    // 0 - data from the region will be compacted completely into itself, or the
+    //     region is empty.  The region can be claimed and then filled.
+    // 1 - data from the region will be compacted into 1 other region; some
+    //     data from the region may also be compacted into the region itself.
+    // 2 - data from the region will be copied to 2 other regions.
     //
-    // During compaction as chunks are emptied, the destination_count is
+    // During compaction as regions are emptied, the destination_count is
     // decremented (atomically) and when it reaches 0, it can be claimed and
     // then filled.
     //
-    // A chunk is claimed for processing by atomically changing the
-    // destination_count to the claimed value (dc_claimed).  After a chunk has
+    // A region is claimed for processing by atomically changing the
+    // destination_count to the claimed value (dc_claimed).  After a region has
     // been filled, the destination_count should be set to the completed value
     // (dc_completed).
     inline uint destination_count() const;
     inline uint destination_count_raw() const;
 
-    // The location of the java heap data that corresponds to this chunk.
+    // The location of the java heap data that corresponds to this region.
     inline HeapWord* data_location() const;
 
-    // The highest address referenced by objects in this chunk.
+    // The highest address referenced by objects in this region.
     inline HeapWord* highest_ref() const;
 
-    // Whether this chunk is available to be claimed, has been claimed, or has
+    // Whether this region is available to be claimed, has been claimed, or has
     // been completed.
     //
-    // Minor subtlety:  claimed() returns true if the chunk is marked
-    // completed(), which is desirable since a chunk must be claimed before it
+    // Minor subtlety:  claimed() returns true if the region is marked
+    // completed(), which is desirable since a region must be claimed before it
     // can be completed.
     bool available() const { return _dc_and_los < dc_one; }
     bool claimed() const   { return _dc_and_los >= dc_claimed; }
@@ -164,11 +157,11 @@
 
     // These are not atomic.
     void set_destination(HeapWord* addr)       { _destination = addr; }
-    void set_source_chunk(size_t chunk)        { _source_chunk = chunk; }
+    void set_source_region(size_t region)      { _source_region = region; }
     void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
     void set_partial_obj_addr(HeapWord* addr)  { _partial_obj_addr = addr; }
     void set_partial_obj_size(size_t words)    {
-      _partial_obj_size = (chunk_sz_t) words;
+      _partial_obj_size = (region_sz_t) words;
     }
 
     inline void set_destination_count(uint count);
@@ -184,129 +177,57 @@
     inline bool claim();
 
   private:
-    // The type used to represent object sizes within a chunk.
-    typedef uint chunk_sz_t;
+    // The type used to represent object sizes within a region.
+    typedef uint region_sz_t;
 
     // Constants for manipulating the _dc_and_los field, which holds both the
     // destination count and live obj size.  The live obj size lives at the
     // least significant end so no masking is necessary when adding.
-    static const chunk_sz_t dc_shift;           // Shift amount.
-    static const chunk_sz_t dc_mask;            // Mask for destination count.
-    static const chunk_sz_t dc_one;             // 1, shifted appropriately.
-    static const chunk_sz_t dc_claimed;         // Chunk has been claimed.
-    static const chunk_sz_t dc_completed;       // Chunk has been completed.
-    static const chunk_sz_t los_mask;           // Mask for live obj size.
+    static const region_sz_t dc_shift;           // Shift amount.
+    static const region_sz_t dc_mask;            // Mask for destination count.
+    static const region_sz_t dc_one;             // 1, shifted appropriately.
+    static const region_sz_t dc_claimed;         // Region has been claimed.
+    static const region_sz_t dc_completed;       // Region has been completed.
+    static const region_sz_t los_mask;           // Mask for live obj size.
 
-    HeapWord*           _destination;
-    size_t              _source_chunk;
-    HeapWord*           _partial_obj_addr;
-    chunk_sz_t          _partial_obj_size;
-    chunk_sz_t volatile _dc_and_los;
+    HeapWord*            _destination;
+    size_t               _source_region;
+    HeapWord*            _partial_obj_addr;
+    region_sz_t          _partial_obj_size;
+    region_sz_t volatile _dc_and_los;
 #ifdef ASSERT
     // These enable optimizations that are only partially implemented.  Use
     // debug builds to prevent the code fragments from breaking.
-    HeapWord*           _data_location;
-    HeapWord*           _highest_ref;
+    HeapWord*            _data_location;
+    HeapWord*            _highest_ref;
 #endif  // #ifdef ASSERT
 
 #ifdef ASSERT
    public:
-    uint            _pushed;    // 0 until chunk is pushed onto a worker's stack
+    uint            _pushed;   // 0 until region is pushed onto a worker's stack
    private:
 #endif
   };
 
-  // 'Blocks' allow shorter sections of the bitmap to be searched.  Each Block
-  // holds an offset, which is the amount of live data in the Chunk to the left
-  // of the first live object in the Block.  This amount of live data will
-  // include any object extending into the block. The first block in
-  // a chunk does not include any partial object extending into the
-  // the chunk.
-  //
-  // The offset also encodes the
-  // 'parity' of the first 1 bit in the Block:  a positive offset means the
-  // first 1 bit marks the start of an object, a negative offset means the first
-  // 1 bit marks the end of an object.
-  class BlockData
-  {
-   public:
-    typedef short int blk_ofs_t;
-
-    blk_ofs_t offset() const { return _offset >= 0 ? _offset : -_offset; }
-    blk_ofs_t raw_offset() const { return _offset; }
-    void set_first_is_start_bit(bool v) { _first_is_start_bit = v; }
-
-#if 0
-    // The need for this method was anticipated but it is
-    // never actually used.  Do not include it for now.  If
-    // it is needed, consider the problem of what is passed
-    // as "v".  To avoid warning errors the method set_start_bit_offset()
-    // was changed to take a size_t as the parameter and to do the
-    // check for the possible overflow.  Doing the cast in these
-    // methods better limits the potential problems because of
-    // the size of the field to this class.
-    void set_raw_offset(blk_ofs_t v) { _offset = v; }
-#endif
-    void set_start_bit_offset(size_t val) {
-      assert(val >= 0, "sanity");
-      _offset = (blk_ofs_t) val;
-      assert(val == (size_t) _offset, "Value is too large");
-      _first_is_start_bit = true;
-    }
-    void set_end_bit_offset(size_t val) {
-      assert(val >= 0, "sanity");
-      _offset = (blk_ofs_t) val;
-      assert(val == (size_t) _offset, "Value is too large");
-      _offset = - _offset;
-      _first_is_start_bit = false;
-    }
-    bool first_is_start_bit() {
-      assert(_set_phase > 0, "Not initialized");
-      return _first_is_start_bit;
-    }
-    bool first_is_end_bit() {
-      assert(_set_phase > 0, "Not initialized");
-      return !_first_is_start_bit;
-    }
-
-   private:
-    blk_ofs_t _offset;
-    // This is temporary until the mark_bitmap is separated into
-    // a start bit array and an end bit array.
-    bool      _first_is_start_bit;
-#ifdef ASSERT
-    short     _set_phase;
-    static short _cur_phase;
-   public:
-    static void set_cur_phase(short v) { _cur_phase = v; }
-#endif
-  };
-
 public:
   ParallelCompactData();
   bool initialize(MemRegion covered_region);
 
-  size_t chunk_count() const { return _chunk_count; }
-
-  // Convert chunk indices to/from ChunkData pointers.
-  inline ChunkData* chunk(size_t chunk_idx) const;
-  inline size_t     chunk(const ChunkData* const chunk_ptr) const;
+  size_t region_count() const { return _region_count; }
 
-  // Returns true if the given address is contained within the chunk
-  bool chunk_contains(size_t chunk_index, HeapWord* addr);
+  // Convert region indices to/from RegionData pointers.
+  inline RegionData* region(size_t region_idx) const;
+  inline size_t     region(const RegionData* const region_ptr) const;
 
-  size_t block_count() const { return _block_count; }
-  inline BlockData* block(size_t n) const;
-
-  // Returns true if the given block is in the given chunk.
-  static bool chunk_contains_block(size_t chunk_index, size_t block_index);
+  // Returns true if the given address is contained within the region
+  bool region_contains(size_t region_index, HeapWord* addr);
 
   void add_obj(HeapWord* addr, size_t len);
   void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
 
-  // Fill in the chunks covering [beg, end) so that no data moves; i.e., the
-  // destination of chunk n is simply the start of chunk n.  The argument beg
-  // must be chunk-aligned; end need not be.
+  // Fill in the regions covering [beg, end) so that no data moves; i.e., the
+  // destination of region n is simply the start of region n.  The argument beg
+  // must be region-aligned; end need not be.
   void summarize_dense_prefix(HeapWord* beg, HeapWord* end);
 
   bool summarize(HeapWord* target_beg, HeapWord* target_end,
@@ -314,48 +235,33 @@
                  HeapWord** target_next, HeapWord** source_next = 0);
 
   void clear();
-  void clear_range(size_t beg_chunk, size_t end_chunk);
+  void clear_range(size_t beg_region, size_t end_region);
   void clear_range(HeapWord* beg, HeapWord* end) {
-    clear_range(addr_to_chunk_idx(beg), addr_to_chunk_idx(end));
+    clear_range(addr_to_region_idx(beg), addr_to_region_idx(end));
   }
 
-  // Return the number of words between addr and the start of the chunk
+  // Return the number of words between addr and the start of the region
   // containing addr.
-  inline size_t     chunk_offset(const HeapWord* addr) const;
-
-  // Convert addresses to/from a chunk index or chunk pointer.
-  inline size_t     addr_to_chunk_idx(const HeapWord* addr) const;
-  inline ChunkData* addr_to_chunk_ptr(const HeapWord* addr) const;
-  inline HeapWord*  chunk_to_addr(size_t chunk) const;
-  inline HeapWord*  chunk_to_addr(size_t chunk, size_t offset) const;
-  inline HeapWord*  chunk_to_addr(const ChunkData* chunk) const;
+  inline size_t     region_offset(const HeapWord* addr) const;
 
-  inline HeapWord*  chunk_align_down(HeapWord* addr) const;
-  inline HeapWord*  chunk_align_up(HeapWord* addr) const;
-  inline bool       is_chunk_aligned(HeapWord* addr) const;
+  // Convert addresses to/from a region index or region pointer.
+  inline size_t     addr_to_region_idx(const HeapWord* addr) const;
+  inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
+  inline HeapWord*  region_to_addr(size_t region) const;
+  inline HeapWord*  region_to_addr(size_t region, size_t offset) const;
+  inline HeapWord*  region_to_addr(const RegionData* region) const;
 
-  // Analogous to chunk_offset() for blocks.
-  size_t     block_offset(const HeapWord* addr) const;
-  size_t     addr_to_block_idx(const HeapWord* addr) const;
-  size_t     addr_to_block_idx(const oop obj) const {
-    return addr_to_block_idx((HeapWord*) obj);
-  }
-  inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
-  inline HeapWord*  block_to_addr(size_t block) const;
+  inline HeapWord*  region_align_down(HeapWord* addr) const;
+  inline HeapWord*  region_align_up(HeapWord* addr) const;
+  inline bool       is_region_aligned(HeapWord* addr) const;
 
   // Return the address one past the end of the partial object.
-  HeapWord* partial_obj_end(size_t chunk_idx) const;
+  HeapWord* partial_obj_end(size_t region_idx) const;
 
   // Return the new location of the object p after the
   // the compaction.
   HeapWord* calc_new_pointer(HeapWord* addr);
 
-  // Same as calc_new_pointer() using blocks.
-  HeapWord* block_calc_new_pointer(HeapWord* addr);
-
-  // Same as calc_new_pointer() using chunks.
-  HeapWord* chunk_calc_new_pointer(HeapWord* addr);
-
   HeapWord* calc_new_pointer(oop p) {
     return calc_new_pointer((HeapWord*) p);
   }
@@ -363,22 +269,13 @@
   // Return the updated address for the given klass
   klassOop calc_new_klass(klassOop);
 
-  // Given a block returns true if the partial object for the
-  // corresponding chunk ends in the block.  Returns false, otherwise
-  // If there is no partial object, returns false.
-  bool partial_obj_ends_in_block(size_t block_index);
-
-  // Returns the block index for the block
-  static size_t block_idx(BlockData* block);
-
 #ifdef  ASSERT
   void verify_clear(const PSVirtualSpace* vspace);
   void verify_clear();
 #endif  // #ifdef ASSERT
 
 private:
-  bool initialize_block_data(size_t region_size);
-  bool initialize_chunk_data(size_t region_size);
+  bool initialize_region_data(size_t region_size);
   PSVirtualSpace* create_vspace(size_t count, size_t element_size);
 
 private:
@@ -387,74 +284,70 @@
   HeapWord*       _region_end;
 #endif  // #ifdef ASSERT
 
-  PSVirtualSpace* _chunk_vspace;
-  ChunkData*      _chunk_data;
-  size_t          _chunk_count;
-
-  PSVirtualSpace* _block_vspace;
-  BlockData*      _block_data;
-  size_t          _block_count;
+  PSVirtualSpace* _region_vspace;
+  RegionData*     _region_data;
+  size_t          _region_count;
 };
 
 inline uint
-ParallelCompactData::ChunkData::destination_count_raw() const
+ParallelCompactData::RegionData::destination_count_raw() const
 {
   return _dc_and_los & dc_mask;
 }
 
 inline uint
-ParallelCompactData::ChunkData::destination_count() const
+ParallelCompactData::RegionData::destination_count() const
 {
   return destination_count_raw() >> dc_shift;
 }
 
 inline void
-ParallelCompactData::ChunkData::set_destination_count(uint count)
+ParallelCompactData::RegionData::set_destination_count(uint count)
 {
   assert(count <= (dc_completed >> dc_shift), "count too large");
-  const chunk_sz_t live_sz = (chunk_sz_t) live_obj_size();
+  const region_sz_t live_sz = (region_sz_t) live_obj_size();
   _dc_and_los = (count << dc_shift) | live_sz;
 }
 
-inline void ParallelCompactData::ChunkData::set_live_obj_size(size_t words)
+inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
 {
   assert(words <= los_mask, "would overflow");
-  _dc_and_los = destination_count_raw() | (chunk_sz_t)words;
+  _dc_and_los = destination_count_raw() | (region_sz_t)words;
 }
 
-inline void ParallelCompactData::ChunkData::decrement_destination_count()
+inline void ParallelCompactData::RegionData::decrement_destination_count()
 {
   assert(_dc_and_los < dc_claimed, "already claimed");
   assert(_dc_and_los >= dc_one, "count would go negative");
   Atomic::add((int)dc_mask, (volatile int*)&_dc_and_los);
 }
 
-inline HeapWord* ParallelCompactData::ChunkData::data_location() const
+inline HeapWord* ParallelCompactData::RegionData::data_location() const
 {
   DEBUG_ONLY(return _data_location;)
   NOT_DEBUG(return NULL;)
 }
 
-inline HeapWord* ParallelCompactData::ChunkData::highest_ref() const
+inline HeapWord* ParallelCompactData::RegionData::highest_ref() const
 {
   DEBUG_ONLY(return _highest_ref;)
   NOT_DEBUG(return NULL;)
 }
 
-inline void ParallelCompactData::ChunkData::set_data_location(HeapWord* addr)
+inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr)
 {
   DEBUG_ONLY(_data_location = addr;)
 }
 
-inline void ParallelCompactData::ChunkData::set_completed()
+inline void ParallelCompactData::RegionData::set_completed()
 {
   assert(claimed(), "must be claimed first");
-  _dc_and_los = dc_completed | (chunk_sz_t) live_obj_size();
+  _dc_and_los = dc_completed | (region_sz_t) live_obj_size();
 }
 
-// MT-unsafe claiming of a chunk.  Should only be used during single threaded
+// MT-unsafe claiming of a region.  Should only be used during single threaded
 // execution.
-inline bool ParallelCompactData::ChunkData::claim_unsafe()
+inline bool ParallelCompactData::RegionData::claim_unsafe()
 {
   if (available()) {
     _dc_and_los |= dc_claimed;
@@ -463,13 +356,13 @@
   return false;
 }
 
-inline void ParallelCompactData::ChunkData::add_live_obj(size_t words)
+inline void ParallelCompactData::RegionData::add_live_obj(size_t words)
 {
   assert(words <= (size_t)los_mask - live_obj_size(), "overflow");
   Atomic::add((int) words, (volatile int*) &_dc_and_los);
 }
 
-inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr)
+inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr)
 {
 #ifdef ASSERT
   HeapWord* tmp = _highest_ref;
@@ -479,7 +372,7 @@
 #endif  // #ifdef ASSERT
 }
 
-inline bool ParallelCompactData::ChunkData::claim()
+inline bool ParallelCompactData::RegionData::claim()
 {
   const int los = (int) live_obj_size();
   const int old = Atomic::cmpxchg(dc_claimed | los,
@@ -487,119 +380,85 @@
   return old == los;
 }
 
-inline ParallelCompactData::ChunkData*
-ParallelCompactData::chunk(size_t chunk_idx) const
+inline ParallelCompactData::RegionData*
+ParallelCompactData::region(size_t region_idx) const
 {
-  assert(chunk_idx <= chunk_count(), "bad arg");
-  return _chunk_data + chunk_idx;
+  assert(region_idx <= region_count(), "bad arg");
+  return _region_data + region_idx;
 }
 
 inline size_t
-ParallelCompactData::chunk(const ChunkData* const chunk_ptr) const
+ParallelCompactData::region(const RegionData* const region_ptr) const
 {
-  assert(chunk_ptr >= _chunk_data, "bad arg");
-  assert(chunk_ptr <= _chunk_data + chunk_count(), "bad arg");
-  return pointer_delta(chunk_ptr, _chunk_data, sizeof(ChunkData));
-}
-
-inline ParallelCompactData::BlockData*
-ParallelCompactData::block(size_t n) const {
-  assert(n < block_count(), "bad arg");
-  return _block_data + n;
+  assert(region_ptr >= _region_data, "bad arg");
+  assert(region_ptr <= _region_data + region_count(), "bad arg");
+  return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
 }
 
 inline size_t
-ParallelCompactData::chunk_offset(const HeapWord* addr) const
+ParallelCompactData::region_offset(const HeapWord* addr) const
 {
   assert(addr >= _region_start, "bad addr");
   assert(addr <= _region_end, "bad addr");
-  return (size_t(addr) & ChunkAddrOffsetMask) >> LogHeapWordSize;
+  return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
 }
 
 inline size_t
-ParallelCompactData::addr_to_chunk_idx(const HeapWord* addr) const
+ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
 {
   assert(addr >= _region_start, "bad addr");
   assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) >> Log2ChunkSize;
-}
-
-inline ParallelCompactData::ChunkData*
-ParallelCompactData::addr_to_chunk_ptr(const HeapWord* addr) const
-{
-  return chunk(addr_to_chunk_idx(addr));
+  return pointer_delta(addr, _region_start) >> Log2RegionSize;
 }
 
-inline HeapWord*
-ParallelCompactData::chunk_to_addr(size_t chunk) const
+inline ParallelCompactData::RegionData*
+ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const
 {
-  assert(chunk <= _chunk_count, "chunk out of range");
-  return _region_start + (chunk << Log2ChunkSize);
-}
-
-inline HeapWord*
-ParallelCompactData::chunk_to_addr(const ChunkData* chunk) const
-{
-  return chunk_to_addr(pointer_delta(chunk, _chunk_data, sizeof(ChunkData)));
+  return region(addr_to_region_idx(addr));
 }
 
 inline HeapWord*
-ParallelCompactData::chunk_to_addr(size_t chunk, size_t offset) const
+ParallelCompactData::region_to_addr(size_t region) const
 {
-  assert(chunk <= _chunk_count, "chunk out of range");
-  assert(offset < ChunkSize, "offset too big");  // This may be too strict.
-  return chunk_to_addr(chunk) + offset;
+  assert(region <= _region_count, "region out of range");
+  return _region_start + (region << Log2RegionSize);
+}
+
+inline HeapWord*
+ParallelCompactData::region_to_addr(const RegionData* region) const
+{
+  return region_to_addr(pointer_delta(region, _region_data,
+                                      sizeof(RegionData)));
 }
 
 inline HeapWord*
-ParallelCompactData::chunk_align_down(HeapWord* addr) const
+ParallelCompactData::region_to_addr(size_t region, size_t offset) const
 {
-  assert(addr >= _region_start, "bad addr");
-  assert(addr < _region_end + ChunkSize, "bad addr");
-  return (HeapWord*)(size_t(addr) & ChunkAddrMask);
+  assert(region <= _region_count, "region out of range");
+  assert(offset < RegionSize, "offset too big");  // This may be too strict.
+  return region_to_addr(region) + offset;
 }
 
 inline HeapWord*
-ParallelCompactData::chunk_align_up(HeapWord* addr) const
+ParallelCompactData::region_align_down(HeapWord* addr) const
+{
+  assert(addr >= _region_start, "bad addr");
+  assert(addr < _region_end + RegionSize, "bad addr");
+  return (HeapWord*)(size_t(addr) & RegionAddrMask);
+}
+
+inline HeapWord*
+ParallelCompactData::region_align_up(HeapWord* addr) const
 {
   assert(addr >= _region_start, "bad addr");
   assert(addr <= _region_end, "bad addr");
-  return chunk_align_down(addr + ChunkSizeOffsetMask);
+  return region_align_down(addr + RegionSizeOffsetMask);
 }
 
 inline bool
-ParallelCompactData::is_chunk_aligned(HeapWord* addr) const
-{
-  return chunk_offset(addr) == 0;
-}
-
-inline size_t
-ParallelCompactData::block_offset(const HeapWord* addr) const
-{
-  assert(addr >= _region_start, "bad addr");
-  assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) & BlockOffsetMask;
-}
-
-inline size_t
-ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
+ParallelCompactData::is_region_aligned(HeapWord* addr) const
 {
-  assert(addr >= _region_start, "bad addr");
-  assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) >> Log2BlockSize;
-}
-
-inline ParallelCompactData::BlockData*
-ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
-{
-  return block(addr_to_block_idx(addr));
-}
-
-inline HeapWord*
-ParallelCompactData::block_to_addr(size_t block) const
-{
-  assert(block < _block_count, "block out of range");
-  return _region_start + (block << Log2BlockSize);
+  return region_offset(addr) == 0;
 }
 
 // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
@@ -687,45 +546,15 @@
   _words_remaining -= words;
 }
 
-// Closure for updating the block data during the summary phase.
-class BitBlockUpdateClosure: public ParMarkBitMapClosure {
-  // ParallelCompactData::BlockData::blk_ofs_t _live_data_left;
-  size_t    _live_data_left;
-  size_t    _cur_block;
-  HeapWord* _chunk_start;
-  HeapWord* _chunk_end;
-  size_t    _chunk_index;
-
- public:
-  BitBlockUpdateClosure(ParMarkBitMap* mbm,
-                        ParCompactionManager* cm,
-                        size_t chunk_index);
-
-  size_t cur_block() { return _cur_block; }
-  size_t chunk_index() { return _chunk_index; }
-  size_t live_data_left() { return _live_data_left; }
-  // Returns true the first bit in the current block (cur_block) is
-  // a start bit.
-  // Returns true if the current block is within the chunk for the closure;
-  bool chunk_contains_cur_block();
-
-  // Set the chunk index and related chunk values for
-  // a new chunk.
-  void reset_chunk(size_t chunk_index);
-
-  virtual IterationStatus do_addr(HeapWord* addr, size_t words);
-};
-
-// The UseParallelOldGC collector is a stop-the-world garbage
-// collector that does parts of the collection using parallel threads.
-// The collection includes the tenured generation and the young
-// generation.  The permanent generation is collected at the same
-// time as the other two generations but the permanent generation
-// is collect by a single GC thread.  The permanent generation is
-// collected serially because of the requirement that during the
-// processing of a klass AAA, any objects reference by AAA must
-// already have been processed.  This requirement is enforced by
-// a left (lower address) to right (higher address) sliding compaction.
+// The UseParallelOldGC collector is a stop-the-world garbage collector that
+// does parts of the collection using parallel threads.  The collection includes
+// the tenured generation and the young generation.  The permanent generation is
+// collected at the same time as the other two generations but the permanent
+// generation is collect by a single GC thread.  The permanent generation is
+// collected serially because of the requirement that during the processing of a
+// klass AAA, any objects reference by AAA must already have been processed.
+// This requirement is enforced by a left (lower address) to right (higher
+// address) sliding compaction.
 //
 // There are four phases of the collection.
 //
@@ -740,81 +569,75 @@
 //      - move the objects to their destination
 //      - update some references and reinitialize some variables
 //
-// These three phases are invoked in PSParallelCompact::invoke_no_policy().
-// The marking phase is implemented in PSParallelCompact::marking_phase()
-// and does a complete marking of the heap.
-// The summary phase is implemented in PSParallelCompact::summary_phase().
-// The move and update phase is implemented in PSParallelCompact::compact().
+// These three phases are invoked in PSParallelCompact::invoke_no_policy().  The
+// marking phase is implemented in PSParallelCompact::marking_phase() and does a
+// complete marking of the heap.  The summary phase is implemented in
+// PSParallelCompact::summary_phase().  The move and update phase is implemented
+// in PSParallelCompact::compact().
 //
-// A space that is being collected is divided into chunks and with
-// each chunk is associated an object of type ParallelCompactData.
-// Each chunk is of a fixed size and typically will contain more than
-// 1 object and may have parts of objects at the front and back of the
-// chunk.
+// A space that is being collected is divided into regions and with each region
+// is associated an object of type ParallelCompactData.  Each region is of a
+// fixed size and typically will contain more than 1 object and may have parts
+// of objects at the front and back of the region.
 //
-// chunk            -----+---------------------+----------
+// region            -----+---------------------+----------
 // objects covered   [ AAA  )[ BBB )[ CCC   )[ DDD     )
 //
-// The marking phase does a complete marking of all live objects in the
-// heap.  The marking also compiles the size of the data for
-// all live objects covered by the chunk.  This size includes the
-// part of any live object spanning onto the chunk (part of AAA
-// if it is live) from the front, all live objects contained in the chunk
-// (BBB and/or CCC if they are live), and the part of any live objects
-// covered by the chunk that extends off the chunk (part of DDD if it is
-// live).  The marking phase uses multiple GC threads and marking is
-// done in a bit array of type ParMarkBitMap.  The marking of the
-// bit map is done atomically as is the accumulation of the size of the
-// live objects covered by a chunk.
+// The marking phase does a complete marking of all live objects in the heap.
+// The marking also compiles the size of the data for all live objects covered
+// by the region.  This size includes the part of any live object spanning onto
+// the region (part of AAA if it is live) from the front, all live objects
+// contained in the region (BBB and/or CCC if they are live), and the part of
+// any live objects covered by the region that extends off the region (part of
+// DDD if it is live).  The marking phase uses multiple GC threads and marking
+// is done in a bit array of type ParMarkBitMap.  The marking of the bit map is
+// done atomically as is the accumulation of the size of the live objects
+// covered by a region.
 //
-// The summary phase calculates the total live data to the left of
-// each chunk XXX.  Based on that total and the bottom of the space,
-// it can calculate the starting location of the live data in XXX.
-// The summary phase calculates for each chunk XXX quantites such as
+// The summary phase calculates the total live data to the left of each region
+// XXX.  Based on that total and the bottom of the space, it can calculate the
+// starting location of the live data in XXX.  The summary phase calculates for
+// each region XXX quantites such as
 //
-//      - the amount of live data at the beginning of a chunk from an object
-//      entering the chunk.
-//      - the location of the first live data on the chunk
-//      - a count of the number of chunks receiving live data from XXX.
+//      - the amount of live data at the beginning of a region from an object
+//        entering the region.
+//      - the location of the first live data on the region
+//      - a count of the number of regions receiving live data from XXX.
 //
 // See ParallelCompactData for precise details.  The summary phase also
-// calculates the dense prefix for the compaction.  The dense prefix
-// is a portion at the beginning of the space that is not moved.  The
-// objects in the dense prefix do need to have their object references
-// updated.  See method summarize_dense_prefix().
+// calculates the dense prefix for the compaction.  The dense prefix is a
+// portion at the beginning of the space that is not moved.  The objects in the
+// dense prefix do need to have their object references updated.  See method
+// summarize_dense_prefix().
 //
 // The summary phase is done using 1 GC thread.
 //
-// The compaction phase moves objects to their new location and updates
-// all references in the object.
+// The compaction phase moves objects to their new location and updates all
+// references in the object.
 //
-// A current exception is that objects that cross a chunk boundary
-// are moved but do not have their references updated.  References are
-// not updated because it cannot easily be determined if the klass
-// pointer KKK for the object AAA has been updated.  KKK likely resides
-// in a chunk to the left of the chunk containing AAA.  These AAA's
-// have there references updated at the end in a clean up phase.
-// See the method PSParallelCompact::update_deferred_objects().  An
-// alternate strategy is being investigated for this deferral of updating.
+// A current exception is that objects that cross a region boundary are moved
+// but do not have their references updated.  References are not updated because
+// it cannot easily be determined if the klass pointer KKK for the object AAA
+// has been updated.  KKK likely resides in a region to the left of the region
+// containing AAA.  These AAA's have there references updated at the end in a
+// clean up phase.  See the method PSParallelCompact::update_deferred_objects().
+// An alternate strategy is being investigated for this deferral of updating.
 //
-// Compaction is done on a chunk basis.  A chunk that is ready to be
-// filled is put on a ready list and GC threads take chunk off the list
-// and fill them.  A chunk is ready to be filled if it
-// empty of live objects.  Such a chunk may have been initially
-// empty (only contained
-// dead objects) or may have had all its live objects copied out already.
-// A chunk that compacts into itself is also ready for filling.  The
-// ready list is initially filled with empty chunks and chunks compacting
-// into themselves.  There is always at least 1 chunk that can be put on
-// the ready list.  The chunks are atomically added and removed from
-// the ready list.
-//
+// Compaction is done on a region basis.  A region that is ready to be filled is
+// put on a ready list and GC threads take region off the list and fill them.  A
+// region is ready to be filled if it empty of live objects.  Such a region may
+// have been initially empty (only contained dead objects) or may have had all
+// its live objects copied out already.  A region that compacts into itself is
+// also ready for filling.  The ready list is initially filled with empty
+// regions and regions compacting into themselves.  There is always at least 1
+// region that can be put on the ready list.  The regions are atomically added
+// and removed from the ready list.
+
 class PSParallelCompact : AllStatic {
  public:
   // Convenient access to type names.
   typedef ParMarkBitMap::idx_t idx_t;
-  typedef ParallelCompactData::ChunkData ChunkData;
-  typedef ParallelCompactData::BlockData BlockData;
+  typedef ParallelCompactData::RegionData RegionData;
 
   typedef enum {
     perm_space_id, old_space_id, eden_space_id,
@@ -977,26 +800,26 @@
   // not reclaimed).
   static double dead_wood_limiter(double density, size_t min_percent);
 
-  // Find the first (left-most) chunk in the range [beg, end) that has at least
+  // Find the first (left-most) region in the range [beg, end) that has at least
   // dead_words of dead space to the left.  The argument beg must be the first
-  // chunk in the space that is not completely live.
-  static ChunkData* dead_wood_limit_chunk(const ChunkData* beg,
-                                          const ChunkData* end,
-                                          size_t dead_words);
+  // region in the space that is not completely live.
+  static RegionData* dead_wood_limit_region(const RegionData* beg,
+                                            const RegionData* end,
+                                            size_t dead_words);
 
-  // Return a pointer to the first chunk in the range [beg, end) that is not
+  // Return a pointer to the first region in the range [beg, end) that is not
   // completely full.
-  static ChunkData* first_dead_space_chunk(const ChunkData* beg,
-                                           const ChunkData* end);
+  static RegionData* first_dead_space_region(const RegionData* beg,
+                                             const RegionData* end);
 
   // Return a value indicating the benefit or 'yield' if the compacted region
   // were to start (or equivalently if the dense prefix were to end) at the
-  // candidate chunk.  Higher values are better.
+  // candidate region.  Higher values are better.
   //
   // The value is based on the amount of space reclaimed vs. the costs of (a)
   // updating references in the dense prefix plus (b) copying objects and
   // updating references in the compacted region.
-  static inline double reclaimed_ratio(const ChunkData* const candidate,
+  static inline double reclaimed_ratio(const RegionData* const candidate,
                                        HeapWord* const bottom,
                                        HeapWord* const top,
                                        HeapWord* const new_top);
@@ -1005,9 +828,9 @@
   static HeapWord* compute_dense_prefix(const SpaceId id,
                                         bool maximum_compaction);
 
-  // Return true if dead space crosses onto the specified Chunk; bit must be the
-  // bit index corresponding to the first word of the Chunk.
-  static inline bool dead_space_crosses_boundary(const ChunkData* chunk,
+  // Return true if dead space crosses onto the specified Region; bit must be
+  // the bit index corresponding to the first word of the Region.
+  static inline bool dead_space_crosses_boundary(const RegionData* region,
                                                  idx_t bit);
 
   // Summary phase utility routine to fill dead space (if any) at the dense
@@ -1019,12 +842,6 @@
   static void summarize_space(SpaceId id, bool maximum_compaction);
   static void summary_phase(ParCompactionManager* cm, bool maximum_compaction);
 
-  static bool block_first_offset(size_t block_index, idx_t* block_offset_ptr);
-
-  // Fill in the BlockData
-  static void summarize_blocks(ParCompactionManager* cm,
-                               SpaceId first_compaction_space_id);
-
   // The space that is compacted after space_id.
   static SpaceId next_compaction_space_id(SpaceId space_id);
 
@@ -1038,16 +855,16 @@
   static void compact_perm(ParCompactionManager* cm);
   static void compact();
 
-  // Add available chunks to the stack and draining tasks to the task queue.
-  static void enqueue_chunk_draining_tasks(GCTaskQueue* q,
-                                           uint parallel_gc_threads);
+  // Add available regions to the stack and draining tasks to the task queue.
+  static void enqueue_region_draining_tasks(GCTaskQueue* q,
+                                            uint parallel_gc_threads);
 
   // Add dense prefix update tasks to the task queue.
   static void enqueue_dense_prefix_tasks(GCTaskQueue* q,
                                          uint parallel_gc_threads);
 
-  // Add chunk stealing tasks to the task queue.
-  static void enqueue_chunk_stealing_tasks(
+  // Add region stealing tasks to the task queue.
+  static void enqueue_region_stealing_tasks(
                                        GCTaskQueue* q,
                                        ParallelTaskTerminator* terminator_ptr,
                                        uint parallel_gc_threads);
@@ -1154,56 +971,56 @@
   // Move and update the live objects in the specified space.
   static void move_and_update(ParCompactionManager* cm, SpaceId space_id);
 
-  // Process the end of the given chunk range in the dense prefix.
+  // Process the end of the given region range in the dense prefix.
   // This includes saving any object not updated.
-  static void dense_prefix_chunks_epilogue(ParCompactionManager* cm,
-                                           size_t chunk_start_index,
-                                           size_t chunk_end_index,
-                                           idx_t exiting_object_offset,
-                                           idx_t chunk_offset_start,
-                                           idx_t chunk_offset_end);
+  static void dense_prefix_regions_epilogue(ParCompactionManager* cm,
+                                            size_t region_start_index,
+                                            size_t region_end_index,
+                                            idx_t exiting_object_offset,
+                                            idx_t region_offset_start,
+                                            idx_t region_offset_end);
 
-  // Update a chunk in the dense prefix.  For each live object
-  // in the chunk, update it's interior references.  For each
+  // Update a region in the dense prefix.  For each live object
+  // in the region, update it's interior references.  For each
   // dead object, fill it with deadwood. Dead space at the end
-  // of a chunk range will be filled to the start of the next
-  // live object regardless of the chunk_index_end.  None of the
+  // of a region range will be filled to the start of the next
+  // live object regardless of the region_index_end.  None of the
   // objects in the dense prefix move and dead space is dead
   // (holds only dead objects that don't need any processing), so
   // dead space can be filled in any order.
   static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
                                                   SpaceId space_id,
-                                                  size_t chunk_index_start,
-                                                  size_t chunk_index_end);
+                                                  size_t region_index_start,
+                                                  size_t region_index_end);
 
   // Return the address of the count + 1st live word in the range [beg, end).
   static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);
 
   // Return the address of the word to be copied to dest_addr, which must be
-  // aligned to a chunk boundary.
+  // aligned to a region boundary.
   static HeapWord* first_src_addr(HeapWord* const dest_addr,
-                                  size_t src_chunk_idx);
+                                  size_t src_region_idx);
 
-  // Determine the next source chunk, set closure.source() to the start of the
-  // new chunk return the chunk index.  Parameter end_addr is the address one
+  // Determine the next source region, set closure.source() to the start of the
+  // new region return the region index.  Parameter end_addr is the address one
   // beyond the end of source range just processed.  If necessary, switch to a
   // new source space and set src_space_id (in-out parameter) and src_space_top
   // (out parameter) accordingly.
-  static size_t next_src_chunk(MoveAndUpdateClosure& closure,
-                               SpaceId& src_space_id,
-                               HeapWord*& src_space_top,
-                               HeapWord* end_addr);
+  static size_t next_src_region(MoveAndUpdateClosure& closure,
+                                SpaceId& src_space_id,
+                                HeapWord*& src_space_top,
+                                HeapWord* end_addr);
 
-  // Decrement the destination count for each non-empty source chunk in the
-  // range [beg_chunk, chunk(chunk_align_up(end_addr))).
+  // Decrement the destination count for each non-empty source region in the
+  // range [beg_region, region(region_align_up(end_addr))).
   static void decrement_destination_counts(ParCompactionManager* cm,
-                                           size_t beg_chunk,
+                                           size_t beg_region,
                                            HeapWord* end_addr);
 
-  // Fill a chunk, copying objects from one or more source chunks.
-  static void fill_chunk(ParCompactionManager* cm, size_t chunk_idx);
-  static void fill_and_update_chunk(ParCompactionManager* cm, size_t chunk) {
-    fill_chunk(cm, chunk);
+  // Fill a region, copying objects from one or more source regions.
+  static void fill_region(ParCompactionManager* cm, size_t region_idx);
+  static void fill_and_update_region(ParCompactionManager* cm, size_t region) {
+    fill_region(cm, region);
   }
 
   // Update the deferred objects in the space.
@@ -1259,7 +1076,7 @@
 #ifndef PRODUCT
   // Debugging support.
   static const char* space_names[last_space_id];
-  static void print_chunk_ranges();
+  static void print_region_ranges();
   static void print_dense_prefix_stats(const char* const algorithm,
                                        const SpaceId id,
                                        const bool maximum_compaction,
@@ -1267,7 +1084,7 @@
 #endif  // #ifndef PRODUCT
 
 #ifdef  ASSERT
-  // Verify that all the chunks have been emptied.
+  // Verify that all the regions have been emptied.
   static void verify_complete(SpaceId space_id);
 #endif  // #ifdef ASSERT
 };
@@ -1376,17 +1193,17 @@
 }
 
 inline bool
-PSParallelCompact::dead_space_crosses_boundary(const ChunkData* chunk,
+PSParallelCompact::dead_space_crosses_boundary(const RegionData* region,
                                                idx_t bit)
 {
-  assert(bit > 0, "cannot call this for the first bit/chunk");
-  assert(_summary_data.chunk_to_addr(chunk) == _mark_bitmap.bit_to_addr(bit),
+  assert(bit > 0, "cannot call this for the first bit/region");
+  assert(_summary_data.region_to_addr(region) == _mark_bitmap.bit_to_addr(bit),
          "sanity check");
 
   // Dead space crosses the boundary if (1) a partial object does not extend
-  // onto the chunk, (2) an object does not start at the beginning of the chunk,
-  // and (3) an object does not end at the end of the prior chunk.
-  return chunk->partial_obj_size() == 0 &&
+  // onto the region, (2) an object does not start at the beginning of the
+  // region, and (3) an object does not end at the end of the prior region.
+  return region->partial_obj_size() == 0 &&
     !_mark_bitmap.is_obj_beg(bit) &&
     !_mark_bitmap.is_obj_end(bit - 1);
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -123,8 +123,6 @@
 
 void PSPermGen::precompact() {
   // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
   _start_array.reset();
-  debug_only(})
   object_mark_sweep()->precompact();
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/coTracker.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_coTracker.cpp.incl"
+
+COTracker* COTracker::_head = NULL;
+double COTracker::_cpu_number = -1.0;
+
+void
+COTracker::resetPeriod(double now_sec, double vnow_sec) {
+  guarantee( _enabled, "invariant" );
+  _period_start_time_sec  = now_sec;
+  _period_start_vtime_sec = vnow_sec;
+}
+
+void
+COTracker::setConcOverhead(double time_stamp_sec,
+                           double conc_overhead) {
+  guarantee( _enabled, "invariant" );
+  _conc_overhead  = conc_overhead;
+  _time_stamp_sec = time_stamp_sec;
+  if (conc_overhead > 0.001)
+    _conc_overhead_seq.add(conc_overhead);
+}
+
+void
+COTracker::reset(double starting_conc_overhead) {
+  guarantee( _enabled, "invariant" );
+  double now_sec = os::elapsedTime();
+  setConcOverhead(now_sec, starting_conc_overhead);
+}
+
+void
+COTracker::start() {
+  guarantee( _enabled, "invariant" );
+  resetPeriod(os::elapsedTime(), os::elapsedVTime());
+}
+
+void
+COTracker::update(bool force_end) {
+  assert( _enabled, "invariant" );
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_sec = end_time_sec - _period_start_time_sec;
+  if (force_end || elapsed_time_sec > _update_period_sec) {
+    // reached the end of the period
+    double end_vtime_sec = os::elapsedVTime();
+    double elapsed_vtime_sec = end_vtime_sec - _period_start_vtime_sec;
+
+    double conc_overhead = elapsed_vtime_sec / elapsed_time_sec;
+
+    setConcOverhead(end_time_sec, conc_overhead);
+    resetPeriod(end_time_sec, end_vtime_sec);
+  }
+}
+
+void
+COTracker::updateForSTW(double start_sec, double end_sec) {
+  if (!_enabled)
+    return;
+
+  // During a STW pause, no concurrent GC thread has done any
+  // work. So, we can safely adjust the start of the current period by
+  // adding the duration of the STW pause to it, so that the STW pause
+  // doesn't affect the reading of the concurrent overhead (it's
+  // basically like excluding the time of the STW pause from the
+  // concurrent overhead calculation).
+
+  double stw_duration_sec = end_sec - start_sec;
+  guarantee( stw_duration_sec > 0.0, "invariant" );
+
+  if (outOfDate(start_sec))
+    _conc_overhead = 0.0;
+  else
+    _time_stamp_sec = end_sec;
+  _period_start_time_sec += stw_duration_sec;
+  _conc_overhead_seq = NumberSeq();
+
+  guarantee( os::elapsedTime() > _period_start_time_sec, "invariant" );
+}
+
+double
+COTracker::predConcOverhead() {
+  if (_enabled) {
+    // tty->print(" %1.2lf", _conc_overhead_seq.maximum());
+    return _conc_overhead_seq.maximum();
+  } else {
+    // tty->print(" DD");
+    return 0.0;
+  }
+}
+
+void
+COTracker::resetPred() {
+  _conc_overhead_seq = NumberSeq();
+}
+
+COTracker::COTracker(int group)
+    : _enabled(false),
+      _group(group),
+      _period_start_time_sec(-1.0),
+      _period_start_vtime_sec(-1.0),
+      _conc_overhead(-1.0),
+      _time_stamp_sec(-1.0),
+      _next(NULL) {
+  // GCOverheadReportingPeriodMS indicates how frequently the
+  // concurrent overhead will be recorded by the GC Overhead
+  // Reporter. We want to take readings less often than that. If we
+  // took readings more often than some of them might be lost.
+  _update_period_sec = ((double) GCOverheadReportingPeriodMS) / 1000.0 * 1.25;
+  _next = _head;
+  _head = this;
+
+  if (_cpu_number < 0.0)
+    _cpu_number = (double) os::processor_count();
+}
+
+// statics
+
+void
+COTracker::updateAllForSTW(double start_sec, double end_sec) {
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    curr->updateForSTW(start_sec, end_sec);
+  }
+}
+
+double
+COTracker::totalConcOverhead(double now_sec) {
+  double total_conc_overhead = 0.0;
+
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    double conc_overhead = curr->concOverhead(now_sec);
+    total_conc_overhead += conc_overhead;
+  }
+
+  return total_conc_overhead;
+}
+
+double
+COTracker::totalConcOverhead(double now_sec,
+                             size_t group_num,
+                             double* co_per_group) {
+  double total_conc_overhead = 0.0;
+
+  for (size_t i = 0; i < group_num; ++i)
+    co_per_group[i] = 0.0;
+
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    size_t group = curr->_group;
+    assert( 0 <= group && group < group_num, "invariant" );
+    double conc_overhead = curr->concOverhead(now_sec);
+
+    co_per_group[group] += conc_overhead;
+    total_conc_overhead += conc_overhead;
+  }
+
+  return total_conc_overhead;
+}
+
+double
+COTracker::totalPredConcOverhead() {
+  double total_pred_conc_overhead = 0.0;
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    total_pred_conc_overhead += curr->predConcOverhead();
+    curr->resetPred();
+  }
+  return total_pred_conc_overhead / _cpu_number;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/coTracker.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// COTracker keeps track of the concurrent overhead of a GC thread.
+
+// A thread that needs to be tracked must, itself, start up its
+// tracker with the start() method and then call the update() method
+// at regular intervals. What the tracker does is to calculate the
+// concurrent overhead of a process at a given update period. The
+// tracker starts and when is detects that it has exceeded the given
+// period, it calculates the duration of the period in wall-clock time
+// and the duration of the period in vtime (i.e. how much time the
+// concurrent processes really took up during this period). The ratio
+// of the latter over the former is the concurrent overhead of that
+// process for that period over a single CPU. This overhead is stored
+// on the tracker, "timestamped" with the wall-clock time of the end
+// of the period. When the concurrent overhead of this process needs
+// to be queried, this last "reading" provides a good approximation
+// (we assume that the concurrent overhead of a particular thread
+// stays largely constant over time). The timestamp is necessary to
+// detect when the process has stopped working and the recorded
+// reading hasn't been updated for some time.
+
+// Each concurrent GC thread is considered to be part of a "group"
+// (i.e. any available concurrent marking threads are part of the
+// "concurrent marking thread group"). A COTracker is associated with
+// a single group at construction-time. It's up to each collector to
+// decide how groups will be mapped to such an id (ids should start
+// from 0 and be consecutive; there's a hardcoded max group num
+// defined on the GCOverheadTracker class). The notion of a group has
+// been introduced to be able to identify how much overhead was
+// imposed by each group, instead of getting a single value that
+// covers all concurrent overhead.
+
+class COTracker {
+private:
+  // It indicates whether this tracker is enabled or not. When the
+  // tracker is disabled, then it returns 0.0 as the latest concurrent
+  // overhead and several methods (reset, start, and update) are not
+  // supposed to be called on it. This enabling / disabling facility
+  // is really provided to make a bit more explicit in the code when a
+  // particulary tracker of a processes that doesn't run all the time
+  // (e.g. concurrent marking) is supposed to be used and not it's not.
+  bool               _enabled;
+
+  // The ID of the group associated with this tracker.
+  int                _group;
+
+  // The update period of the tracker. A new value for the concurrent
+  // overhead of the associated process will be made at intervals no
+  // smaller than this.
+  double             _update_period_sec;
+
+  // The start times (both wall-block time and vtime) of the current
+  // interval.
+  double             _period_start_time_sec;
+  double             _period_start_vtime_sec;
+
+  // Number seq of the concurrent overhead readings within a period
+  NumberSeq          _conc_overhead_seq;
+
+  // The latest reading of the concurrent overhead (over a single CPU)
+  // imposed by the associated concurrent thread, made available at
+  // the indicated wall-clock time.
+  double             _conc_overhead;
+  double             _time_stamp_sec;
+
+  // The number of CPUs that the host machine has (for convenience
+  // really, as we'd have to keep translating it into a double)
+  static double      _cpu_number;
+
+  // Fields that keep a list of all trackers created. This is useful,
+  // since it allows us to sum up the concurrent overhead without
+  // having to write code for a specific collector to broadcast a
+  // request to all its concurrent processes.
+  COTracker*         _next;
+  static COTracker*  _head;
+
+  // It indicates that a new period is starting by updating the
+  // _period_start_time_sec and _period_start_vtime_sec fields.
+  void resetPeriod(double now_sec, double vnow_sec);
+  // It updates the latest concurrent overhead reading, taken at a
+  // given wall-clock time.
+  void setConcOverhead(double time_stamp_sec, double conc_overhead);
+
+  // It determines whether the time stamp of the latest concurrent
+  // overhead reading is out of date or not.
+  bool outOfDate(double now_sec) {
+    // The latest reading is considered out of date, if it was taken
+    // 1.2x the update period.
+    return (now_sec - _time_stamp_sec) > 1.2 * _update_period_sec;
+  }
+
+public:
+  // The constructor which associates the tracker with a group ID.
+  COTracker(int group);
+
+  // Methods to enable / disable the tracker and query whether it is enabled.
+  void enable()  { _enabled = true;  }
+  void disable() { _enabled = false; }
+  bool enabled() { return _enabled;  }
+
+  // It resets the tracker and sets concurrent overhead reading to be
+  // the given parameter and the associated time stamp to be now.
+  void reset(double starting_conc_overhead = 0.0);
+  // The tracker starts tracking. IT should only be called from the
+  // concurrent thread that is tracked by this tracker.
+  void start();
+  // It updates the tracker and, if the current period is longer than
+  // the update period, the concurrent overhead reading will be
+  // updated. force_end being true indicates that it's the last call
+  // to update() by this process before the tracker is disabled (the
+  // tracker can be re-enabled later if necessary).  It should only be
+  // called from the concurrent thread that is tracked by this tracker
+  // and while the thread has joined the STS.
+  void update(bool force_end = false);
+  // It adjusts the contents of the tracker to take into account a STW
+  // pause.
+  void updateForSTW(double start_sec, double end_sec);
+
+  // It returns the last concurrent overhead reading over a single
+  // CPU. If the reading is out of date, or the tracker is disabled,
+  // it returns 0.0.
+  double concCPUOverhead(double now_sec) {
+    if (!_enabled || outOfDate(now_sec))
+      return 0.0;
+    else
+      return _conc_overhead;
+  }
+
+  // It returns the last concurrent overhead reading over all CPUs
+  // that the host machine has. If the reading is out of date, or the
+  // tracker is disabled, it returns 0.0.
+  double concOverhead(double now_sec) {
+    return concCPUOverhead(now_sec) / _cpu_number;
+  }
+
+  double predConcOverhead();
+
+  void resetPred();
+
+  // statics
+
+  // It notifies all trackers about a STW pause.
+  static void updateAllForSTW(double start_sec, double end_sec);
+
+  // It returns the sum of the concurrent overhead readings of all
+  // available (and enabled) trackers for the given time stamp. The
+  // overhead is over all the CPUs of the host machine.
+
+  static double totalConcOverhead(double now_sec);
+  // Like the previous method, but it also sums up the overheads per
+  // group number. The length of the co_per_group array must be at
+  // least as large group_num
+  static double totalConcOverhead(double now_sec,
+                                  size_t group_num,
+                                  double* co_per_group);
+
+  static double totalPredConcOverhead();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// CopyrightVersion 1.2
+
+# include "incls/_precompiled.incl"
+# include "incls/_concurrentGCThread.cpp.incl"
+
+bool ConcurrentGCThread::_should_terminate    = false;
+bool ConcurrentGCThread::_has_terminated      = false;
+int  ConcurrentGCThread::_CGC_flag            = CGC_nil;
+
+SuspendibleThreadSet ConcurrentGCThread::_sts;
+
+ConcurrentGCThread::ConcurrentGCThread() {
+  _sts.initialize();
+};
+
+void ConcurrentGCThread::stopWorldAndDo(VoidClosure* op) {
+  MutexLockerEx x(Heap_lock,
+                  Mutex::_no_safepoint_check_flag);
+  // warning("CGC: about to try stopping world");
+  SafepointSynchronize::begin();
+  // warning("CGC: successfully stopped world");
+  op->do_void();
+  SafepointSynchronize::end();
+  // warning("CGC: successfully restarted world");
+}
+
+void ConcurrentGCThread::safepoint_synchronize() {
+  _sts.suspend_all();
+}
+
+void ConcurrentGCThread::safepoint_desynchronize() {
+  _sts.resume_all();
+}
+
+void ConcurrentGCThread::create_and_start() {
+  if (os::create_thread(this, os::cgc_thread)) {
+    // XXX: need to set this to low priority
+    // unless "agressive mode" set; priority
+    // should be just less than that of VMThread.
+    os::set_priority(this, NearMaxPriority);
+    if (!_should_terminate && !DisableStartThread) {
+      os::start_thread(this);
+    }
+  }
+}
+
+void ConcurrentGCThread::initialize_in_thread() {
+  this->record_stack_base_and_size();
+  this->initialize_thread_local_storage();
+  this->set_active_handles(JNIHandleBlock::allocate_block());
+  // From this time Thread::current() should be working.
+  assert(this == Thread::current(), "just checking");
+}
+
+void ConcurrentGCThread::wait_for_universe_init() {
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  while (!is_init_completed() && !_should_terminate) {
+    CGC_lock->wait(Mutex::_no_safepoint_check_flag, 200);
+  }
+}
+
+void ConcurrentGCThread::terminate() {
+  // Signal that it is terminated
+  {
+    MutexLockerEx mu(Terminator_lock,
+                     Mutex::_no_safepoint_check_flag);
+    _has_terminated = true;
+    Terminator_lock->notify();
+  }
+
+  // Thread destructor usually does this..
+  ThreadLocalStorage::set_thread(NULL);
+}
+
+
+void SuspendibleThreadSet::initialize_work() {
+  MutexLocker x(STS_init_lock);
+  if (!_initialized) {
+    _m             = new Monitor(Mutex::leaf,
+                                 "SuspendibleThreadSetLock", true);
+    _async         = 0;
+    _async_stop    = false;
+    _async_stopped = 0;
+    _initialized   = true;
+  }
+}
+
+void SuspendibleThreadSet::join() {
+  initialize();
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
+  _async++;
+  assert(_async > 0, "Huh.");
+}
+
+void SuspendibleThreadSet::leave() {
+  assert(_initialized, "Must be initialized.");
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  _async--;
+  assert(_async >= 0, "Huh.");
+  if (_async_stop) _m->notify_all();
+}
+
+void SuspendibleThreadSet::yield(const char* id) {
+  assert(_initialized, "Must be initialized.");
+  if (_async_stop) {
+    MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+    if (_async_stop) {
+      _async_stopped++;
+      assert(_async_stopped > 0, "Huh.");
+      if (_async_stopped == _async) {
+        if (ConcGCYieldTimeout > 0) {
+          double now = os::elapsedTime();
+          guarantee((now - _suspend_all_start) * 1000.0 <
+                    (double)ConcGCYieldTimeout,
+                    "Long delay; whodunit?");
+        }
+      }
+      _m->notify_all();
+      while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
+      _async_stopped--;
+      assert(_async >= 0, "Huh");
+      _m->notify_all();
+    }
+  }
+}
+
+void SuspendibleThreadSet::suspend_all() {
+  initialize();  // If necessary.
+  if (ConcGCYieldTimeout > 0) {
+    _suspend_all_start = os::elapsedTime();
+  }
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  assert(!_async_stop, "Only one at a time.");
+  _async_stop = true;
+  while (_async_stopped < _async) _m->wait(Mutex::_no_safepoint_check_flag);
+}
+
+void SuspendibleThreadSet::resume_all() {
+  assert(_initialized, "Must be initialized.");
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  assert(_async_stopped == _async, "Huh.");
+  _async_stop = false;
+  _m->notify_all();
+}
+
+static void _sltLoop(JavaThread* thread, TRAPS) {
+  SurrogateLockerThread* slt = (SurrogateLockerThread*)thread;
+  slt->loop();
+}
+
+SurrogateLockerThread::SurrogateLockerThread() :
+  JavaThread(&_sltLoop),
+  _monitor(Mutex::nonleaf, "SLTMonitor"),
+  _buffer(empty)
+{}
+
+SurrogateLockerThread* SurrogateLockerThread::make(TRAPS) {
+  klassOop k =
+    SystemDictionary::resolve_or_fail(vmSymbolHandles::java_lang_Thread(),
+                                      true, CHECK_NULL);
+  instanceKlassHandle klass (THREAD, k);
+  instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_NULL);
+
+  const char thread_name[] = "Surrogate Locker Thread (CMS)";
+  Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL);
+
+  // Initialize thread_oop to put it into the system threadGroup
+  Handle thread_group (THREAD, Universe::system_thread_group());
+  JavaValue result(T_VOID);
+  JavaCalls::call_special(&result, thread_oop,
+                          klass,
+                          vmSymbolHandles::object_initializer_name(),
+                          vmSymbolHandles::threadgroup_string_void_signature(),
+                          thread_group,
+                          string,
+                          CHECK_NULL);
+
+  SurrogateLockerThread* res;
+  {
+    MutexLocker mu(Threads_lock);
+    res = new SurrogateLockerThread();
+
+    // At this point it may be possible that no osthread was created for the
+    // JavaThread due to lack of memory. We would have to throw an exception
+    // in that case. However, since this must work and we do not allow
+    // exceptions anyway, check and abort if this fails.
+    if (res == NULL || res->osthread() == NULL) {
+      vm_exit_during_initialization("java.lang.OutOfMemoryError",
+                                    "unable to create new native thread");
+    }
+    java_lang_Thread::set_thread(thread_oop(), res);
+    java_lang_Thread::set_priority(thread_oop(), NearMaxPriority);
+    java_lang_Thread::set_daemon(thread_oop());
+
+    res->set_threadObj(thread_oop());
+    Threads::add(res);
+    Thread::start(res);
+  }
+  os::yield(); // This seems to help with initial start-up of SLT
+  return res;
+}
+
+void SurrogateLockerThread::manipulatePLL(SLT_msg_type msg) {
+  MutexLockerEx x(&_monitor, Mutex::_no_safepoint_check_flag);
+  assert(_buffer == empty, "Should be empty");
+  assert(msg != empty, "empty message");
+  _buffer = msg;
+  while (_buffer != empty) {
+    _monitor.notify();
+    _monitor.wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+// ======= Surrogate Locker Thread =============
+
+void SurrogateLockerThread::loop() {
+  BasicLock pll_basic_lock;
+  SLT_msg_type msg;
+  debug_only(unsigned int owned = 0;)
+
+  while (/* !isTerminated() */ 1) {
+    {
+      MutexLocker x(&_monitor);
+      // Since we are a JavaThread, we can't be here at a safepoint.
+      assert(!SafepointSynchronize::is_at_safepoint(),
+             "SLT is a JavaThread");
+      // wait for msg buffer to become non-empty
+      while (_buffer == empty) {
+        _monitor.notify();
+        _monitor.wait();
+      }
+      msg = _buffer;
+    }
+    switch(msg) {
+      case acquirePLL: {
+        instanceRefKlass::acquire_pending_list_lock(&pll_basic_lock);
+        debug_only(owned++;)
+        break;
+      }
+      case releaseAndNotifyPLL: {
+        assert(owned > 0, "Don't have PLL");
+        instanceRefKlass::release_and_notify_pending_list_lock(&pll_basic_lock);
+        debug_only(owned--;)
+        break;
+      }
+      case empty:
+      default: {
+        guarantee(false,"Unexpected message in _buffer");
+        break;
+      }
+    }
+    {
+      MutexLocker x(&_monitor);
+      // Since we are a JavaThread, we can't be here at a safepoint.
+      assert(!SafepointSynchronize::is_at_safepoint(),
+             "SLT is a JavaThread");
+      _buffer = empty;
+      _monitor.notify();
+    }
+  }
+  assert(!_monitor.owned_by_self(), "Should unlock before exit.");
+}
+
+
+// ===== STS Access From Outside CGCT =====
+
+void ConcurrentGCThread::stsYield(const char* id) {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.yield(id);
+}
+
+bool ConcurrentGCThread::stsShouldYield() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  return _sts.should_yield();
+}
+
+void ConcurrentGCThread::stsJoin() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.join();
+}
+
+void ConcurrentGCThread::stsLeave() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.leave();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class VoidClosure;
+
+// A SuspendibleThreadSet is (obviously) a set of threads that can be
+// suspended.  A thread can join and later leave the set, and periodically
+// yield.  If some thread (not in the set) requests, via suspend_all, that
+// the threads be suspended, then the requesting thread is blocked until
+// all the threads in the set have yielded or left the set.  (Threads may
+// not enter the set when an attempted suspension is in progress.)  The
+// suspending thread later calls resume_all, allowing the suspended threads
+// to continue.
+
+class SuspendibleThreadSet {
+  Monitor* _m;
+  int      _async;
+  bool     _async_stop;
+  int      _async_stopped;
+  bool     _initialized;
+  double   _suspend_all_start;
+
+  void initialize_work();
+
+ public:
+  SuspendibleThreadSet() : _initialized(false) {}
+
+  // Add the current thread to the set.  May block if a suspension
+  // is in progress.
+  void join();
+  // Removes the current thread from the set.
+  void leave();
+  // Returns "true" iff an suspension is in progress.
+  bool should_yield() { return _async_stop; }
+  // Suspends the current thread if a suspension is in progress (for
+  // the duration of the suspension.)
+  void yield(const char* id);
+  // Return when all threads in the set are suspended.
+  void suspend_all();
+  // Allow suspended threads to resume.
+  void resume_all();
+  // Redundant initializations okay.
+  void initialize() {
+    // Double-check dirty read idiom.
+    if (!_initialized) initialize_work();
+  }
+};
+
+
+class ConcurrentGCThread: public NamedThread {
+  friend class VMStructs;
+
+protected:
+  static bool _should_terminate;
+  static bool _has_terminated;
+
+  enum CGC_flag_type {
+    CGC_nil           = 0x0,
+    CGC_dont_suspend  = 0x1,
+    CGC_CGC_safepoint = 0x2,
+    CGC_VM_safepoint  = 0x4
+  };
+
+  static int _CGC_flag;
+
+  static bool CGC_flag_is_set(int b)       { return (_CGC_flag & b) != 0; }
+  static int set_CGC_flag(int b)           { return _CGC_flag |= b; }
+  static int reset_CGC_flag(int b)         { return _CGC_flag &= ~b; }
+
+  void stopWorldAndDo(VoidClosure* op);
+
+  // All instances share this one set.
+  static SuspendibleThreadSet _sts;
+
+  // Create and start the thread (setting it's priority high.)
+  void create_and_start();
+
+  // Do initialization steps in the thread: record stack base and size,
+  // init thread local storage, set JNI handle block.
+  void initialize_in_thread();
+
+  // Wait until Universe::is_fully_initialized();
+  void wait_for_universe_init();
+
+  // Record that the current thread is terminating, and will do more
+  // concurrent work.
+  void terminate();
+
+public:
+  // Constructor
+
+  ConcurrentGCThread();
+  ~ConcurrentGCThread() {} // Exists to call NamedThread destructor.
+
+  // Tester
+  bool is_ConcurrentGC_thread() const          { return true;       }
+
+  static void safepoint_synchronize();
+  static void safepoint_desynchronize();
+
+  // All overridings should probably do _sts::yield, but we allow
+  // overriding for distinguished debugging messages.  Default is to do
+  // nothing.
+  virtual void yield() {}
+
+  bool should_yield() { return _sts.should_yield(); }
+
+  // they are prefixed by sts since there are already yield() and
+  // should_yield() (non-static) methods in this class and it was an
+  // easy way to differentiate them.
+  static void stsYield(const char* id);
+  static bool stsShouldYield();
+  static void stsJoin();
+  static void stsLeave();
+
+};
+
+// The SurrogateLockerThread is used by concurrent GC threads for
+// manipulating Java monitors, in particular, currently for
+// manipulating the pending_list_lock. XXX
+class SurrogateLockerThread: public JavaThread {
+  friend class VMStructs;
+ public:
+  enum SLT_msg_type {
+    empty = 0,           // no message
+    acquirePLL,          // acquire pending list lock
+    releaseAndNotifyPLL  // notify and release pending list lock
+  };
+ private:
+  // the following are shared with the CMSThread
+  SLT_msg_type  _buffer;  // communication buffer
+  Monitor       _monitor; // monitor controlling buffer
+  BasicLock     _basicLock; // used for PLL locking
+
+ public:
+  static SurrogateLockerThread* make(TRAPS);
+
+  SurrogateLockerThread();
+
+  bool is_hidden_from_external_view() const     { return true; }
+
+  void loop(); // main method
+
+  void manipulatePLL(SLT_msg_type msg);
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_gcOverheadReporter.cpp.incl"
+
+class COReportingThread : public ConcurrentGCThread {
+private:
+  GCOverheadReporter* _reporter;
+
+public:
+  COReportingThread(GCOverheadReporter* reporter) : _reporter(reporter) {
+    guarantee( _reporter != NULL, "precondition" );
+    create_and_start();
+  }
+
+  virtual void run() {
+    initialize_in_thread();
+    wait_for_universe_init();
+
+    int period_ms = GCOverheadReportingPeriodMS;
+
+    while ( true ) {
+      os::sleep(Thread::current(), period_ms, false);
+
+      _sts.join();
+      double now_sec = os::elapsedTime();
+      _reporter->collect_and_record_conc_overhead(now_sec);
+      _sts.leave();
+    }
+
+    terminate();
+  }
+};
+
+GCOverheadReporter* GCOverheadReporter::_reporter = NULL;
+
+GCOverheadReporter::GCOverheadReporter(size_t group_num,
+                                       const char* group_names[],
+                                       size_t length)
+    : _group_num(group_num), _prev_end_sec(0.0) {
+  guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum,
+             "precondition" );
+
+  _base = NEW_C_HEAP_ARRAY(GCOverheadReporterEntry, length);
+  _top  = _base + length;
+  _curr = _base;
+
+  for (size_t i = 0; i < group_num; ++i) {
+    guarantee( group_names[i] != NULL, "precondition" );
+    _group_names[i] = group_names[i];
+  }
+}
+
+void
+GCOverheadReporter::add(double start_sec, double end_sec,
+                        double* conc_overhead,
+                        double stw_overhead) {
+  assert( _curr <= _top, "invariant" );
+
+  if (_curr == _top) {
+    guarantee( false, "trace full" );
+    return;
+  }
+
+  _curr->_start_sec       = start_sec;
+  _curr->_end_sec         = end_sec;
+  for (size_t i = 0; i < _group_num; ++i) {
+    _curr->_conc_overhead[i] =
+      (conc_overhead != NULL) ? conc_overhead[i] : 0.0;
+  }
+  _curr->_stw_overhead    = stw_overhead;
+
+  ++_curr;
+}
+
+void
+GCOverheadReporter::collect_and_record_conc_overhead(double end_sec) {
+  double start_sec = _prev_end_sec;
+  guarantee( end_sec > start_sec, "invariant" );
+
+  double conc_overhead[MaxGCOverheadGroupNum];
+  COTracker::totalConcOverhead(end_sec, _group_num, conc_overhead);
+  add_conc_overhead(start_sec, end_sec, conc_overhead);
+  _prev_end_sec = end_sec;
+}
+
+void
+GCOverheadReporter::record_stw_start(double start_sec) {
+  guarantee( start_sec > _prev_end_sec, "invariant" );
+  collect_and_record_conc_overhead(start_sec);
+}
+
+void
+GCOverheadReporter::record_stw_end(double end_sec) {
+  double start_sec = _prev_end_sec;
+  COTracker::updateAllForSTW(start_sec, end_sec);
+  add_stw_overhead(start_sec, end_sec, 1.0);
+
+  _prev_end_sec = end_sec;
+}
+
+void
+GCOverheadReporter::print() const {
+  tty->print_cr("");
+  tty->print_cr("GC Overhead (%d entries)", _curr - _base);
+  tty->print_cr("");
+  GCOverheadReporterEntry* curr = _base;
+  while (curr < _curr) {
+    double total = curr->_stw_overhead;
+    for (size_t i = 0; i < _group_num; ++i)
+      total += curr->_conc_overhead[i];
+
+    tty->print("OVERHEAD %12.8lf %12.8lf ",
+               curr->_start_sec, curr->_end_sec);
+
+    for (size_t i = 0; i < _group_num; ++i)
+      tty->print("%s %12.8lf ", _group_names[i], curr->_conc_overhead[i]);
+
+    tty->print_cr("STW %12.8lf TOT %12.8lf", curr->_stw_overhead, total);
+    ++curr;
+  }
+  tty->print_cr("");
+}
+
+// statics
+
+void
+GCOverheadReporter::initGCOverheadReporter(size_t group_num,
+                                           const char* group_names[]) {
+  guarantee( _reporter == NULL, "should only be called once" );
+  guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum,
+             "precondition" );
+  guarantee( group_names != NULL, "pre-condition" );
+
+  if (GCOverheadReporting) {
+    _reporter = new GCOverheadReporter(group_num, group_names);
+    new COReportingThread(_reporter);
+  }
+}
+
+void
+GCOverheadReporter::recordSTWStart(double start_sec) {
+  if (_reporter != NULL)
+    _reporter->record_stw_start(start_sec);
+}
+
+void
+GCOverheadReporter::recordSTWEnd(double end_sec) {
+  if (_reporter != NULL)
+    _reporter->record_stw_end(end_sec);
+}
+
+void
+GCOverheadReporter::printGCOverhead() {
+  if (_reporter != NULL)
+    _reporter->print();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Keeps track of the GC overhead (both concurrent and STW). It stores
+// it in a large array and then prints it to tty at the end of the
+// execution.
+
+// See coTracker.hpp for the explanation on what groups are.
+
+// Let's set a maximum number of concurrent overhead groups, to
+// statically allocate any arrays we need and not to have to
+// malloc/free them. This is just a bit more convenient.
+enum {
+  MaxGCOverheadGroupNum = 4
+};
+
+typedef struct {
+  double _start_sec;
+  double _end_sec;
+
+  double _conc_overhead[MaxGCOverheadGroupNum];
+  double _stw_overhead;
+} GCOverheadReporterEntry;
+
+class GCOverheadReporter {
+  friend class COReportingThread;
+
+private:
+  enum PrivateConstants {
+    DefaultReporterLength = 128 * 1024
+  };
+
+  // Reference to the single instance of this class.
+  static GCOverheadReporter* _reporter;
+
+  // These three references point to the array that contains the GC
+  // overhead entries (_base is the base of the array, _top is the
+  // address passed the last entry of the array, _curr is the next
+  // entry to be used).
+  GCOverheadReporterEntry* _base;
+  GCOverheadReporterEntry* _top;
+  GCOverheadReporterEntry* _curr;
+
+  // The number of concurrent overhead groups.
+  size_t _group_num;
+
+  // The wall-clock time of the end of the last recorded period of GC
+  // overhead.
+  double _prev_end_sec;
+
+  // Names for the concurrent overhead groups.
+  const char* _group_names[MaxGCOverheadGroupNum];
+
+  // Add a new entry to the large array. conc_overhead being NULL is
+  // equivalent to an array full of 0.0s. conc_overhead should have a
+  // length of at least _group_num.
+  void add(double start_sec, double end_sec,
+           double* conc_overhead,
+           double stw_overhead);
+
+  // Add an entry that represents concurrent GC overhead.
+  // conc_overhead must be at least of length _group_num.
+  // conc_overhead being NULL is equivalent to an array full of 0.0s.
+  void add_conc_overhead(double start_sec, double end_sec,
+                         double* conc_overhead) {
+    add(start_sec, end_sec, conc_overhead, 0.0);
+  }
+
+  // Add an entry that represents STW GC overhead.
+  void add_stw_overhead(double start_sec, double end_sec,
+                        double stw_overhead) {
+    add(start_sec, end_sec, NULL, stw_overhead);
+  }
+
+  // It records the start of a STW pause (i.e. it records the
+  // concurrent overhead up to that point)
+  void record_stw_start(double start_sec);
+
+  // It records the end of a STW pause (i.e. it records the overhead
+  // associated with the pause and adjusts all the trackers to reflect
+  // the pause)
+  void record_stw_end(double end_sec);
+
+  // It queries all the trackers of their concurrent overhead and
+  // records it.
+  void collect_and_record_conc_overhead(double end_sec);
+
+  // It prints the contents of the GC overhead array
+  void print() const;
+
+
+  // Constructor. The same preconditions for group_num and group_names
+  // from initGCOverheadReporter apply here too.
+  GCOverheadReporter(size_t group_num,
+                     const char* group_names[],
+                     size_t length = DefaultReporterLength);
+
+public:
+
+  // statics
+
+  // It initialises the GCOverheadReporter and launches the concurrent
+  // overhead reporting thread. Both actions happen only if the
+  // GCOverheadReporting parameter is set. The length of the
+  // group_names array should be >= group_num and group_num should be
+  // <= MaxGCOverheadGroupNum. Entries group_namnes[0..group_num-1]
+  // should not be NULL.
+  static void initGCOverheadReporter(size_t group_num,
+                                     const char* group_names[]);
+
+  // The following three are provided for convenience and they are
+  // wrappers around record_stw_start(start_sec), record_stw_end(end_sec),
+  // and print(). Each of these checks whether GC overhead reporting
+  // is on (i.e. _reporter != NULL) and, if it is, calls the
+  // corresponding method. Saves from repeating this pattern again and
+  // again from the places where they need to be called.
+  static void recordSTWStart(double start_sec);
+  static void recordSTWEnd(double end_sec);
+  static void printGCOverhead();
+};
--- a/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -50,7 +50,8 @@
   size_t capacity_in_bytes() const            { return capacity_in_words() * HeapWordSize; }
 
   // Size computations.  Sizes are in heapwords.
-  size_t capacity_in_words() const            { return pointer_delta(end(), bottom()); }
+  size_t capacity_in_words() const                { return pointer_delta(end(), bottom()); }
+  virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); }
 
   // Iteration.
   virtual void oop_iterate(OopClosure* cl);
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -23,13 +23,6 @@
  */
 
 inline void MarkSweep::mark_object(oop obj) {
-#ifndef SERIALGC
-  if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) {
-    assert(PSParallelCompact::mark_bitmap()->is_marked(obj),
-           "Should be marked in the marking bitmap");
-  }
-#endif // SERIALGC
-
   // some marks may contain information we need to preserve so we store them away
   // and overwrite the mark.  We'll restore it at the end of markSweep.
   markOop mark = obj->mark();
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -181,6 +181,25 @@
   return lgrp_spaces()->at(i)->space()->free_in_bytes();
 }
 
+
+size_t MutableNUMASpace::capacity_in_words(Thread* thr) const {
+  guarantee(thr != NULL, "No thread");
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1) {
+    if (lgrp_spaces()->length() > 0) {
+      return capacity_in_words() / lgrp_spaces()->length();
+    } else {
+      assert(false, "There should be at least one locality group");
+      return 0;
+    }
+  }
+  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
+  if (i == -1) {
+    return 0;
+  }
+  return lgrp_spaces()->at(i)->space()->capacity_in_words();
+}
+
 // Check if the NUMA topology has changed. Add and remove spaces if needed.
 // The update can be forced by setting the force parameter equal to true.
 bool MutableNUMASpace::update_layout(bool force) {
@@ -372,6 +391,8 @@
 }
 
 // Produce a new chunk size. page_size() aligned.
+// This function is expected to be called on sequence of i's from 0 to
+// lgrp_spaces()->length().
 size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) {
   size_t pages_available = base_space_size();
   for (int j = 0; j < i; j++) {
@@ -386,7 +407,7 @@
   size_t chunk_size = 0;
   if (alloc_rate > 0) {
     LGRPSpace *ls = lgrp_spaces()->at(i);
-    chunk_size = (size_t)(ls->alloc_rate()->average() * pages_available / alloc_rate) * page_size();
+    chunk_size = (size_t)(ls->alloc_rate()->average() / alloc_rate * pages_available) * page_size();
   }
   chunk_size = MAX2(chunk_size, page_size());
 
@@ -722,7 +743,8 @@
     i = os::random() % lgrp_spaces()->length();
   }
 
-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace* ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
   HeapWord *p = s->allocate(size);
 
   if (p != NULL) {
@@ -743,6 +765,9 @@
       *(int*)i = 0;
     }
   }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
   return p;
 }
 
@@ -761,7 +786,8 @@
   if (i == -1) {
     i = os::random() % lgrp_spaces()->length();
   }
-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace *ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
   HeapWord *p = s->cas_allocate(size);
   if (p != NULL) {
     size_t remainder = pointer_delta(s->end(), p + size);
@@ -790,6 +816,9 @@
       *(int*)i = 0;
     }
   }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
   return p;
 }
 
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -60,6 +60,7 @@
     MutableSpace* _space;
     MemRegion _invalid_region;
     AdaptiveWeightedAverage *_alloc_rate;
+    bool _allocation_failed;
 
     struct SpaceStats {
       size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
@@ -81,7 +82,7 @@
     char* last_page_scanned()            { return _last_page_scanned; }
     void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
    public:
-    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) {
+    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
       _space = new MutableSpace();
       _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
     }
@@ -103,8 +104,21 @@
       return *(int*)lgrp_id_value == p->lgrp_id();
     }
 
+    // Report a failed allocation.
+    void set_allocation_failed() { _allocation_failed = true;  }
+
     void sample() {
-      alloc_rate()->sample(space()->used_in_bytes());
+      // If there was a failed allocation make allocation rate equal
+      // to the size of the whole chunk. This ensures the progress of
+      // the adaptation process.
+      size_t alloc_rate_sample;
+      if (_allocation_failed) {
+        alloc_rate_sample = space()->capacity_in_bytes();
+        _allocation_failed = false;
+      } else {
+        alloc_rate_sample = space()->used_in_bytes();
+      }
+      alloc_rate()->sample(alloc_rate_sample);
     }
 
     MemRegion invalid_region() const                { return _invalid_region;      }
@@ -190,6 +204,9 @@
   virtual void ensure_parsability();
   virtual size_t used_in_words() const;
   virtual size_t free_in_words() const;
+
+  using MutableSpace::capacity_in_words;
+  virtual size_t capacity_in_words(Thread* thr) const;
   virtual size_t tlab_capacity(Thread* thr) const;
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
 
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -74,6 +74,7 @@
   // If the GC count has changed someone beat us to the collection
   // Get the Heap_lock after the pending_list_lock.
   Heap_lock->lock();
+
   // Check invocations
   if (skip_operation()) {
     // skip collection
@@ -82,6 +83,8 @@
     _prologue_succeeded = false;
   } else {
     _prologue_succeeded = true;
+    SharedHeap* sh = SharedHeap::heap();
+    if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = true;
   }
   return _prologue_succeeded;
 }
@@ -90,6 +93,8 @@
 void VM_GC_Operation::doit_epilogue() {
   assert(Thread::current()->is_Java_thread(), "just checking");
   // Release the Heap_lock first.
+  SharedHeap* sh = SharedHeap::heap();
+  if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = false;
   Heap_lock->unlock();
   release_and_notify_pending_list_lock();
 }
@@ -148,12 +153,27 @@
 void VM_GenCollectForPermanentAllocation::doit() {
   JvmtiGCForAllocationMarker jgcm;
   notify_gc_begin(true);
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  GCCauseSetter gccs(gch, _gc_cause);
-  gch->do_full_collection(gch->must_clear_all_soft_refs(),
-                          gch->n_gens() - 1);
-  _res = gch->perm_gen()->allocate(_size, false);
-  assert(gch->is_in_reserved_or_null(_res), "result not in heap");
+  SharedHeap* heap = (SharedHeap*)Universe::heap();
+  GCCauseSetter gccs(heap, _gc_cause);
+  switch (heap->kind()) {
+    case (CollectedHeap::GenCollectedHeap): {
+      GenCollectedHeap* gch = (GenCollectedHeap*)heap;
+      gch->do_full_collection(gch->must_clear_all_soft_refs(),
+                              gch->n_gens() - 1);
+      break;
+    }
+#ifndef SERIALGC
+    case (CollectedHeap::G1CollectedHeap): {
+      G1CollectedHeap* g1h = (G1CollectedHeap*)heap;
+      g1h->do_full_collection(_gc_cause == GCCause::_last_ditch_collection);
+      break;
+    }
+#endif // SERIALGC
+    default:
+      ShouldNotReachHere();
+  }
+  _res = heap->perm_gen()->allocate(_size, false);
+  assert(heap->is_in_reserved_or_null(_res), "result not in heap");
   if (_res == NULL && GC_locker::is_active_and_needs_gc()) {
     set_gc_locked();
   }
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -138,13 +138,6 @@
   return new_obj;
 }
 
-bool CollectedHeap::can_elide_permanent_oop_store_barriers() const {
-  // %%% This needs refactoring.  (It was gating logic from the server compiler.)
-  guarantee(kind() < CollectedHeap::G1CollectedHeap, "");
-  return !UseConcMarkSweepGC;
-}
-
-
 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
   guarantee(false, "thread-local allocation buffers not supported");
   return NULL;
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -364,10 +364,8 @@
   // Can a compiler initialize a new object without store barriers?
   // This permission only extends from the creation of a new object
   // via a TLAB up to the first subsequent safepoint.
-  virtual bool can_elide_tlab_store_barriers() const {
-    guarantee(kind() < CollectedHeap::G1CollectedHeap, "else change or refactor this");
-    return true;
-  }
+  virtual bool can_elide_tlab_store_barriers() const = 0;
+
   // If a compiler is eliding store barriers for TLAB-allocated objects,
   // there is probably a corresponding slow path which can produce
   // an object allocated anywhere.  The compiler's runtime support
@@ -379,12 +377,10 @@
   // Can a compiler elide a store barrier when it writes
   // a permanent oop into the heap?  Applies when the compiler
   // is storing x to the heap, where x->is_perm() is true.
-  virtual bool can_elide_permanent_oop_store_barriers() const;
+  virtual bool can_elide_permanent_oop_store_barriers() const = 0;
 
   // Does this heap support heap inspection (+PrintClassHistogram?)
-  virtual bool supports_heap_inspection() const {
-    return false;   // Until RFE 5023697 is implemented
-  }
+  virtual bool supports_heap_inspection() const = 0;
 
   // Perform a collection of the heap; intended for use in implementing
   // "System.gc".  This probably implies as full a collection as the
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -122,7 +122,7 @@
       return result;
     }
   }
-  bool gc_overhead_limit_was_exceeded;
+  bool gc_overhead_limit_was_exceeded = false;
   result = Universe::heap()->mem_allocate(size,
                                           is_noref,
                                           false,
--- a/hotspot/src/share/vm/gc_interface/gcCause.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/gc_interface/gcCause.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -60,6 +60,8 @@
     _old_generation_too_full_to_scavenge,
     _adaptive_size_policy,
 
+    _g1_inc_collection_pause, _g1_pop_region_collection_pause,
+
     _last_ditch_collection,
     _last_gc_cause
   };
@@ -68,12 +70,14 @@
     return (cause == GCCause::_java_lang_system_gc ||
             cause == GCCause::_jvmti_force_gc);
   }
+
   inline static bool is_serviceability_requested_gc(GCCause::Cause
                                                              cause) {
     return (cause == GCCause::_jvmti_force_gc ||
             cause == GCCause::_heap_inspection ||
             cause == GCCause::_heap_dump);
   }
+
   // Return a string describing the GCCause.
   static const char* to_string(GCCause::Cause cause);
   // Return true if the GCCause is for a full collection.
--- a/hotspot/src/share/vm/includeDB_compiler1	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_compiler1	Wed Jul 05 16:43:15 2017 +0200
@@ -36,6 +36,9 @@
 c1_CFGPrinter.hpp                       c1_Compilation.hpp
 c1_CFGPrinter.hpp                       c1_Instruction.hpp
 
+cardTableModRefBS.cpp			c1_LIR.hpp
+cardTableModRefBS.cpp			c1_LIRGenerator.hpp
+
 c1_Canonicalizer.cpp                    c1_Canonicalizer.hpp
 c1_Canonicalizer.cpp                    c1_InstructionPrinter.hpp
 c1_Canonicalizer.cpp                    ciArray.hpp
@@ -55,6 +58,7 @@
 c1_CodeStubs_<arch>.cpp                 c1_LIRAssembler.hpp
 c1_CodeStubs_<arch>.cpp                 c1_MacroAssembler.hpp
 c1_CodeStubs_<arch>.cpp                 c1_Runtime1.hpp
+c1_CodeStubs_<arch>.cpp                 g1SATBCardTableModRefBS.hpp
 c1_CodeStubs_<arch>.cpp                 nativeInst_<arch>.hpp
 c1_CodeStubs_<arch>.cpp                 sharedRuntime.hpp
 c1_CodeStubs_<arch>.cpp                 vmreg_<arch>.inline.hpp
@@ -141,6 +145,7 @@
 c1_globals_<os_family>.hpp              globalDefinitions.hpp
 c1_globals_<os_family>.hpp              macros.hpp
 
+c1_GraphBuilder.cpp                     bitMap.inline.hpp
 c1_GraphBuilder.cpp                     bytecode.hpp
 c1_GraphBuilder.cpp                     c1_CFGPrinter.hpp
 c1_GraphBuilder.cpp                     c1_Canonicalizer.hpp
@@ -158,6 +163,7 @@
 c1_GraphBuilder.hpp                     ciMethodData.hpp
 c1_GraphBuilder.hpp                     ciStreams.hpp
 
+c1_IR.cpp                               bitMap.inline.hpp
 c1_IR.cpp                               c1_Compilation.hpp
 c1_IR.cpp                               c1_FrameMap.hpp
 c1_IR.cpp                               c1_GraphBuilder.hpp
@@ -232,20 +238,22 @@
 
 c1_LIRAssembler_<arch>.hpp              generate_platform_dependent_include
 
-c1_LIRGenerator.cpp                    c1_Compilation.hpp
-c1_LIRGenerator.cpp                    c1_FrameMap.hpp
-c1_LIRGenerator.cpp                    c1_Instruction.hpp
-c1_LIRGenerator.cpp                    c1_LIRAssembler.hpp
-c1_LIRGenerator.cpp                    c1_LIRGenerator.hpp
-c1_LIRGenerator.cpp                    c1_ValueStack.hpp
-c1_LIRGenerator.cpp                    ciArrayKlass.hpp
-c1_LIRGenerator.cpp                    ciInstance.hpp
-c1_LIRGenerator.cpp                    sharedRuntime.hpp
+c1_LIRGenerator.cpp                     bitMap.inline.hpp
+c1_LIRGenerator.cpp                     c1_Compilation.hpp
+c1_LIRGenerator.cpp                     c1_FrameMap.hpp
+c1_LIRGenerator.cpp                     c1_Instruction.hpp
+c1_LIRGenerator.cpp                     c1_LIRAssembler.hpp
+c1_LIRGenerator.cpp                     c1_LIRGenerator.hpp
+c1_LIRGenerator.cpp                     c1_ValueStack.hpp
+c1_LIRGenerator.cpp                     ciArrayKlass.hpp
+c1_LIRGenerator.cpp                     ciInstance.hpp
+c1_LIRGenerator.cpp                     heapRegion.hpp
+c1_LIRGenerator.cpp                     sharedRuntime.hpp
 
-c1_LIRGenerator.hpp                    c1_Instruction.hpp
-c1_LIRGenerator.hpp                    c1_LIR.hpp
-c1_LIRGenerator.hpp                    ciMethodData.hpp
-c1_LIRGenerator.hpp                    sizes.hpp
+c1_LIRGenerator.hpp                     c1_Instruction.hpp
+c1_LIRGenerator.hpp                     c1_LIR.hpp
+c1_LIRGenerator.hpp                     ciMethodData.hpp
+c1_LIRGenerator.hpp                     sizes.hpp
 
 c1_LIRGenerator_<arch>.cpp             c1_Compilation.hpp
 c1_LIRGenerator_<arch>.cpp             c1_FrameMap.hpp
@@ -260,6 +268,7 @@
 c1_LIRGenerator_<arch>.cpp             sharedRuntime.hpp
 c1_LIRGenerator_<arch>.cpp             vmreg_<arch>.inline.hpp
 
+c1_LinearScan.cpp                       bitMap.inline.hpp
 c1_LinearScan.cpp                       c1_CFGPrinter.hpp
 c1_LinearScan.cpp                       c1_Compilation.hpp
 c1_LinearScan.cpp                       c1_FrameMap.hpp
@@ -276,6 +285,7 @@
 c1_LinearScan.hpp                       c1_LIR.hpp
 c1_LinearScan.hpp                       c1_LIRGenerator.hpp
 
+c1_LinearScan_<arch>.cpp                bitMap.inline.hpp
 c1_LinearScan_<arch>.cpp                c1_Instruction.hpp
 c1_LinearScan_<arch>.cpp                c1_LinearScan.hpp
 
@@ -298,6 +308,7 @@
 
 c1_MacroAssembler_<arch>.hpp            generate_platform_dependent_include
 
+c1_Optimizer.cpp                        bitMap.inline.hpp
 c1_Optimizer.cpp                        c1_Canonicalizer.hpp
 c1_Optimizer.cpp                        c1_Optimizer.hpp
 c1_Optimizer.cpp                        c1_ValueMap.hpp
@@ -363,6 +374,7 @@
 c1_Runtime1_<arch>.cpp                  vframeArray.hpp
 c1_Runtime1_<arch>.cpp                  vmreg_<arch>.inline.hpp
 
+c1_ValueMap.cpp                         bitMap.inline.hpp
 c1_ValueMap.cpp                         c1_Canonicalizer.hpp
 c1_ValueMap.cpp                         c1_IR.hpp
 c1_ValueMap.cpp                         c1_ValueMap.hpp
@@ -433,4 +445,3 @@
 top.hpp                                 c1_globals.hpp
 
 vmStructs.hpp                           c1_Runtime1.hpp
-
--- a/hotspot/src/share/vm/includeDB_compiler2	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_compiler2	Wed Jul 05 16:43:15 2017 +0200
@@ -461,10 +461,13 @@
 graphKit.cpp                            addnode.hpp
 graphKit.cpp                            barrierSet.hpp
 graphKit.cpp                            cardTableModRefBS.hpp
+graphKit.cpp                            g1SATBCardTableModRefBS.hpp
 graphKit.cpp                            collectedHeap.hpp
 graphKit.cpp                            compileLog.hpp
 graphKit.cpp                            deoptimization.hpp
 graphKit.cpp                            graphKit.hpp
+graphKit.cpp                            heapRegion.hpp
+graphKit.cpp                            idealKit.hpp
 graphKit.cpp                            locknode.hpp
 graphKit.cpp                            machnode.hpp
 graphKit.cpp                            parse.hpp
@@ -484,6 +487,7 @@
 idealKit.cpp                            callnode.hpp
 idealKit.cpp                            cfgnode.hpp
 idealKit.cpp                            idealKit.hpp
+idealKit.cpp				runtime.hpp
 
 idealKit.hpp                            connode.hpp
 idealKit.hpp                            mulnode.hpp
@@ -582,6 +586,7 @@
 loopTransform.cpp                       addnode.hpp
 loopTransform.cpp                       allocation.inline.hpp
 loopTransform.cpp                       connode.hpp
+loopTransform.cpp                       compileLog.hpp
 loopTransform.cpp                       divnode.hpp
 loopTransform.cpp                       loopnode.hpp
 loopTransform.cpp                       mulnode.hpp
@@ -597,6 +602,7 @@
 loopnode.cpp                            allocation.inline.hpp
 loopnode.cpp                            callnode.hpp
 loopnode.cpp                            ciMethodData.hpp
+loopnode.cpp                            compileLog.hpp
 loopnode.cpp                            connode.hpp
 loopnode.cpp                            divnode.hpp
 loopnode.cpp                            loopnode.hpp
@@ -915,9 +921,11 @@
 runtime.cpp                             connode.hpp
 runtime.cpp                             copy.hpp
 runtime.cpp                             fprofiler.hpp
+runtime.cpp                             g1SATBCardTableModRefBS.hpp
 runtime.cpp                             gcLocker.inline.hpp
 runtime.cpp                             graphKit.hpp
 runtime.cpp                             handles.inline.hpp
+runtime.cpp                             heapRegion.hpp
 runtime.cpp                             icBuffer.hpp
 runtime.cpp                             interfaceSupport.hpp
 runtime.cpp                             interpreter.hpp
--- a/hotspot/src/share/vm/includeDB_core	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_core	Wed Jul 05 16:43:15 2017 +0200
@@ -288,6 +288,10 @@
 attachListener.hpp                      debug.hpp
 attachListener.hpp                      ostream.hpp
 
+barrierSet.cpp				barrierSet.hpp
+barrierSet.cpp			        collectedHeap.hpp
+barrierSet.cpp				universe.hpp
+
 barrierSet.hpp                          memRegion.hpp
 barrierSet.hpp                          oopsHierarchy.hpp
 
@@ -295,7 +299,7 @@
 barrierSet.inline.hpp                   cardTableModRefBS.hpp
 
 bcEscapeAnalyzer.cpp                    bcEscapeAnalyzer.hpp
-bcEscapeAnalyzer.cpp                    bitMap.hpp
+bcEscapeAnalyzer.cpp                    bitMap.inline.hpp
 bcEscapeAnalyzer.cpp                    bytecode.hpp
 bcEscapeAnalyzer.cpp                    ciConstant.hpp
 bcEscapeAnalyzer.cpp                    ciField.hpp
@@ -320,13 +324,12 @@
 biasedLocking.hpp                       growableArray.hpp
 biasedLocking.hpp                       handles.hpp
 
-bitMap.cpp                              bitMap.hpp
+bitMap.cpp                              allocation.inline.hpp
 bitMap.cpp                              bitMap.inline.hpp
 bitMap.cpp                              copy.hpp
 bitMap.cpp                              os_<os_family>.inline.hpp
 
 bitMap.hpp                              allocation.hpp
-bitMap.hpp                              ostream.hpp
 bitMap.hpp                              top.hpp
 
 bitMap.inline.hpp                       atomic.hpp
@@ -645,6 +648,7 @@
 ciMethod.cpp                            abstractCompiler.hpp
 ciMethod.cpp                            allocation.inline.hpp
 ciMethod.cpp                            bcEscapeAnalyzer.hpp
+ciMethod.cpp                            bitMap.inline.hpp
 ciMethod.cpp                            ciCallProfile.hpp
 ciMethod.cpp                            ciExceptionHandler.hpp
 ciMethod.cpp                            ciInstanceKlass.hpp
@@ -1759,7 +1763,7 @@
 
 genRemSet.hpp                           oop.hpp
 
-generateOopMap.cpp                      bitMap.hpp
+generateOopMap.cpp                      bitMap.inline.hpp
 generateOopMap.cpp                      bytecodeStream.hpp
 generateOopMap.cpp                      generateOopMap.hpp
 generateOopMap.cpp                      handles.inline.hpp
@@ -1808,6 +1812,8 @@
 generation.inline.hpp                   generation.hpp
 generation.inline.hpp                   space.hpp
 
+genOopClosures.hpp                      oop.hpp
+
 generationSpec.cpp                      compactPermGen.hpp
 generationSpec.cpp                      defNewGeneration.hpp
 generationSpec.cpp                      filemap.hpp
@@ -2219,6 +2225,11 @@
 invocationCounter.hpp                   exceptions.hpp
 invocationCounter.hpp                   handles.hpp
 
+intHisto.cpp				intHisto.hpp
+
+intHisto.hpp				allocation.hpp
+intHisto.hpp                            growableArray.hpp
+
 iterator.cpp                            iterator.hpp
 iterator.cpp                            oop.inline.hpp
 
@@ -2818,6 +2829,7 @@
 methodKlass.hpp                         methodOop.hpp
 
 methodLiveness.cpp                      allocation.inline.hpp
+methodLiveness.cpp                      bitMap.inline.hpp
 methodLiveness.cpp                      bytecode.hpp
 methodLiveness.cpp                      bytecodes.hpp
 methodLiveness.cpp                      ciMethod.hpp
@@ -2964,6 +2976,11 @@
 nmethod.hpp                             codeBlob.hpp
 nmethod.hpp                             pcDesc.hpp
 
+numberSeq.cpp				debug.hpp
+numberSeq.cpp				numberSeq.hpp
+numberSeq.cpp				globalDefinitions.hpp
+numberSeq.cpp				allocation.inline.hpp
+
 objArrayKlass.cpp                       collectedHeap.inline.hpp
 objArrayKlass.cpp                       copy.hpp
 objArrayKlass.cpp                       genOopClosures.inline.hpp
@@ -3406,8 +3423,6 @@
 referencePolicy.cpp                     referencePolicy.hpp
 referencePolicy.cpp                     universe.hpp
 
-referencePolicy.hpp                     oop.hpp
-
 referenceProcessor.cpp                  collectedHeap.hpp
 referenceProcessor.cpp                  collectedHeap.inline.hpp
 referenceProcessor.cpp                  java.hpp
@@ -3758,6 +3773,8 @@
 specialized_oop_closures.cpp            ostream.hpp
 specialized_oop_closures.cpp            specialized_oop_closures.hpp
 
+specialized_oop_closures.hpp            atomic.hpp
+
 stackMapFrame.cpp                       globalDefinitions.hpp
 stackMapFrame.cpp                       handles.inline.hpp
 stackMapFrame.cpp                       oop.inline.hpp
@@ -4000,7 +4017,6 @@
 
 taskqueue.hpp                           allocation.hpp
 taskqueue.hpp                           allocation.inline.hpp
-taskqueue.hpp                           debug.hpp
 taskqueue.hpp                           mutex.hpp
 taskqueue.hpp                           orderAccess_<os_arch>.inline.hpp
 
@@ -4038,6 +4054,7 @@
 
 templateInterpreterGenerator_<arch>.hpp generate_platform_dependent_include
 
+templateTable.cpp                       collectedHeap.hpp
 templateTable.cpp                       templateTable.hpp
 templateTable.cpp                       timer.hpp
 
@@ -4542,6 +4559,7 @@
 vm_operations.cpp                       compilerOracle.hpp
 vm_operations.cpp                       deoptimization.hpp
 vm_operations.cpp                       interfaceSupport.hpp
+vm_operations.cpp                       isGCActiveMark.hpp
 vm_operations.cpp                       resourceArea.hpp
 vm_operations.cpp                       threadService.hpp
 vm_operations.cpp                       thread_<os_family>.inline.hpp
--- a/hotspot/src/share/vm/includeDB_features	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_features	Wed Jul 05 16:43:15 2017 +0200
@@ -99,6 +99,7 @@
 heapDumper.cpp                          reflectionUtils.hpp
 heapDumper.cpp                          symbolTable.hpp
 heapDumper.cpp                          systemDictionary.hpp
+heapDumper.cpp                          threadService.hpp
 heapDumper.cpp                          universe.hpp
 heapDumper.cpp                          vframe.hpp
 heapDumper.cpp                          vmGCOperations.hpp
--- a/hotspot/src/share/vm/includeDB_gc_parallel	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_gc_parallel	Wed Jul 05 16:43:15 2017 +0200
@@ -21,6 +21,10 @@
 // have any questions.
 //  
 
+assembler_<arch>.cpp                    g1SATBCardTableModRefBS.hpp
+assembler_<arch>.cpp                    g1CollectedHeap.inline.hpp
+assembler_<arch>.cpp                    heapRegion.hpp
+
 collectorPolicy.cpp                     cmsAdaptiveSizePolicy.hpp
 collectorPolicy.cpp                     cmsGCAdaptivePolicyCounters.hpp
 
@@ -37,6 +41,9 @@
 
 heapInspection.cpp                      parallelScavengeHeap.hpp
 
+instanceKlass.cpp                       heapRegionSeq.inline.hpp
+instanceKlass.cpp                       g1CollectedHeap.inline.hpp
+instanceKlass.cpp                       g1OopClosures.inline.hpp
 instanceKlass.cpp                       oop.pcgc.inline.hpp
 instanceKlass.cpp                       psPromotionManager.inline.hpp
 instanceKlass.cpp                       psScavenge.inline.hpp
@@ -48,6 +55,9 @@
 instanceKlassKlass.cpp                  psScavenge.inline.hpp
 instanceKlassKlass.cpp                  parOopClosures.inline.hpp
 
+instanceRefKlass.cpp                    heapRegionSeq.inline.hpp
+instanceRefKlass.cpp                    g1CollectedHeap.inline.hpp
+instanceRefKlass.cpp                    g1OopClosures.inline.hpp
 instanceRefKlass.cpp                    oop.pcgc.inline.hpp
 instanceRefKlass.cpp                    psPromotionManager.inline.hpp
 instanceRefKlass.cpp                    psScavenge.inline.hpp
@@ -70,6 +80,7 @@
 
 memoryService.cpp                       cmsPermGen.hpp
 memoryService.cpp                       concurrentMarkSweepGeneration.hpp
+memoryService.cpp                       g1CollectedHeap.inline.hpp
 memoryService.cpp                       parNewGeneration.hpp
 memoryService.cpp                       parallelScavengeHeap.hpp
 memoryService.cpp                       psMemoryPool.hpp
@@ -80,6 +91,9 @@
 methodDataKlass.cpp                     oop.pcgc.inline.hpp
 methodDataKlass.cpp                     psScavenge.inline.hpp
 
+objArrayKlass.cpp                       heapRegionSeq.inline.hpp
+objArrayKlass.cpp                       g1CollectedHeap.inline.hpp
+objArrayKlass.cpp                       g1OopClosures.inline.hpp
 objArrayKlass.cpp                       oop.pcgc.inline.hpp
 objArrayKlass.cpp                       psPromotionManager.inline.hpp
 objArrayKlass.cpp                       psScavenge.inline.hpp
@@ -122,6 +136,9 @@
 thread.cpp                              concurrentMarkSweepThread.hpp
 thread.cpp                              pcTasks.hpp
 
+thread.hpp                              dirtyCardQueue.hpp
+thread.hpp                              satbQueue.hpp
+
 universe.cpp                            parallelScavengeHeap.hpp
 universe.cpp                            cmsCollectorPolicy.hpp
 universe.cpp                            cmsAdaptiveSizePolicy.hpp
--- a/hotspot/src/share/vm/includeDB_jvmti	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_jvmti	Wed Jul 05 16:43:15 2017 +0200
@@ -209,6 +209,7 @@
 jvmtiManageCapabilities.hpp             allocation.hpp
 jvmtiManageCapabilities.hpp             jvmti.h
 
+jvmtiRedefineClasses.cpp                bitMap.inline.hpp
 jvmtiRedefineClasses.cpp                codeCache.hpp
 jvmtiRedefineClasses.cpp                deoptimization.hpp
 jvmtiRedefineClasses.cpp                gcLocker.hpp
--- a/hotspot/src/share/vm/interpreter/templateTable.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/interpreter/templateTable.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -172,6 +172,7 @@
 
 Template*                  TemplateTable::_desc;
 InterpreterMacroAssembler* TemplateTable::_masm;
+BarrierSet*                TemplateTable::_bs;
 
 
 void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(), char filler) {
@@ -244,6 +245,8 @@
   // Initialize table
   TraceTime timer("TemplateTable initialization", TraceStartupTime);
 
+  _bs = Universe::heap()->barrier_set();
+
   // For better readability
   const char _    = ' ';
   const int  ____ = 0;
--- a/hotspot/src/share/vm/interpreter/templateTable.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/interpreter/templateTable.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -82,6 +82,7 @@
   static Template*       _desc;                  // the current template to be generated
   static Bytecodes::Code bytecode()              { return _desc->bytecode(); }
 
+  static BarrierSet*     _bs;                    // Cache the barrier set.
  public:
   //%note templates_1
   static InterpreterMacroAssembler* _masm;       // the assembler used when generating templates
--- a/hotspot/src/share/vm/memory/allocation.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/allocation.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -338,6 +338,12 @@
       DEBUG_ONLY(((ResourceObj *)res)->_allocation = RESOURCE_AREA;)
       return res;
   }
+  void* operator new(size_t size, void* where, allocation_type type) {
+      void* res = where;
+      // Set allocation type in the resource object
+      DEBUG_ONLY(((ResourceObj *)res)->_allocation = type;)
+      return res;
+  }
   void  operator delete(void* p);
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/memory/barrierSet.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,36 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_barrierSet.cpp.incl"
+
+// count is in HeapWord's
+void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) {
+   Universe::heap()->barrier_set()->write_ref_array_pre(MemRegion(start, start + count));
+}
+
+// count is in HeapWord's
+void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) {
+   Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, start + count));
+}
--- a/hotspot/src/share/vm/memory/barrierSet.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/barrierSet.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -32,6 +32,8 @@
     ModRef,
     CardTableModRef,
     CardTableExtension,
+    G1SATBCT,
+    G1SATBCTLogging,
     Other,
     Uninit
   };
@@ -42,14 +44,16 @@
 
 public:
 
+  BarrierSet() { _kind = Uninit; }
   // To get around prohibition on RTTI.
-  virtual BarrierSet::Name kind() { return _kind; }
+  BarrierSet::Name kind() { return _kind; }
   virtual bool is_a(BarrierSet::Name bsn) = 0;
 
   // These operations indicate what kind of barriers the BarrierSet has.
   virtual bool has_read_ref_barrier() = 0;
   virtual bool has_read_prim_barrier() = 0;
   virtual bool has_write_ref_barrier() = 0;
+  virtual bool has_write_ref_pre_barrier() = 0;
   virtual bool has_write_prim_barrier() = 0;
 
   // These functions indicate whether a particular access of the given
@@ -57,7 +61,8 @@
   virtual bool read_ref_needs_barrier(void* field) = 0;
   virtual bool read_prim_needs_barrier(HeapWord* field, size_t bytes) = 0;
   virtual bool write_ref_needs_barrier(void* field, oop new_val) = 0;
-  virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes, juint val1, juint val2) = 0;
+  virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes,
+                                        juint val1, juint val2) = 0;
 
   // The first four operations provide a direct implementation of the
   // barrier set.  An interpreter loop, for example, could call these
@@ -75,6 +80,13 @@
   // (For efficiency reasons, this operation is specialized for certain
   // barrier types.  Semantically, it should be thought of as a call to the
   // virtual "_work" function below, which must implement the barrier.)
+  // First the pre-write versions...
+  inline void write_ref_field_pre(void* field, oop new_val);
+protected:
+  virtual void write_ref_field_pre_work(void* field, oop new_val) {};
+public:
+
+  // ...then the post-write version.
   inline void write_ref_field(void* field, oop new_val);
 protected:
   virtual void write_ref_field_work(void* field, oop new_val) = 0;
@@ -92,6 +104,7 @@
   // the particular barrier.
   virtual bool has_read_ref_array_opt() = 0;
   virtual bool has_read_prim_array_opt() = 0;
+  virtual bool has_write_ref_array_pre_opt() { return true; }
   virtual bool has_write_ref_array_opt() = 0;
   virtual bool has_write_prim_array_opt() = 0;
 
@@ -104,7 +117,13 @@
   virtual void read_ref_array(MemRegion mr) = 0;
   virtual void read_prim_array(MemRegion mr) = 0;
 
+  virtual void write_ref_array_pre(MemRegion mr) {}
   inline void write_ref_array(MemRegion mr);
+
+  // Static versions, suitable for calling from generated code.
+  static void static_write_ref_array_pre(HeapWord* start, size_t count);
+  static void static_write_ref_array_post(HeapWord* start, size_t count);
+
 protected:
   virtual void write_ref_array_work(MemRegion mr) = 0;
 public:
@@ -120,33 +139,6 @@
   virtual void write_region_work(MemRegion mr) = 0;
 public:
 
-  // The remaining sets of operations are called by compilers or other code
-  // generators to insert barriers into generated code.  There may be
-  // several such code generators; the signatures of these
-  // barrier-generating functions may differ from generator to generator.
-  // There will be a set of four function signatures for each code
-  // generator, which accomplish the generation of barriers of the four
-  // kinds listed above.
-
-#ifdef TBD
-  // Generates code to invoke the barrier, if any, necessary when reading
-  // the ref field at "offset" in "obj".
-  virtual void gen_read_ref_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when reading
-  // the primitive field of "bytes" bytes at offset" in "obj".
-  virtual void gen_read_prim_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when writing
-  // "new_val" into the ref field at "offset" in "obj".
-  virtual void gen_write_ref_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when writing
-  // the "bytes"-byte value "new_val" into the primitive field at "offset"
-  // in "obj".
-  virtual void gen_write_prim_field() = 0;
-#endif
-
   // Some barrier sets create tables whose elements correspond to parts of
   // the heap; the CardTableModRefBS is an example.  Such barrier sets will
   // normally reserve space for such tables, and commit parts of the table
--- a/hotspot/src/share/vm/memory/barrierSet.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/barrierSet.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -26,6 +26,14 @@
 // performance-critical calls when when the barrier is the most common
 // card-table kind.
 
+void BarrierSet::write_ref_field_pre(void* field, oop new_val) {
+  if (kind() == CardTableModRef) {
+    ((CardTableModRefBS*)this)->inline_write_ref_field_pre(field, new_val);
+  } else {
+    write_ref_field_pre_work(field, new_val);
+  }
+}
+
 void BarrierSet::write_ref_field(void* field, oop new_val) {
   if (kind() == CardTableModRef) {
     ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val);
--- a/hotspot/src/share/vm/memory/blockOffsetTable.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/blockOffsetTable.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -184,7 +184,7 @@
     "Offset card has an unexpected value");
   size_t start_card_for_region = start_card;
   u_char offset = max_jubyte;
-  for (int i = 0; i <= N_powers-1; i++) {
+  for (int i = 0; i < N_powers; i++) {
     // -1 so that the the card with the actual offset is counted.  Another -1
     // so that the reach ends in this region and not at the start
     // of the next.
--- a/hotspot/src/share/vm/memory/blockOffsetTable.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/blockOffsetTable.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -214,6 +214,7 @@
 //////////////////////////////////////////////////////////////////////////
 class BlockOffsetArray: public BlockOffsetTable {
   friend class VMStructs;
+  friend class G1BlockOffsetArray; // temp. until we restructure and cleanup
  protected:
   // The following enums are used by do_block_internal() below
   enum Action {
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -344,6 +344,17 @@
 }
 
 
+bool CardTableModRefBS::claim_card(size_t card_index) {
+  jbyte val = _byte_map[card_index];
+  if (val != claimed_card_val()) {
+    jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val);
+    if (res == val)
+      return true;
+    else return false;
+  }
+  return false;
+}
+
 void CardTableModRefBS::non_clean_card_iterate(Space* sp,
                                                MemRegion mr,
                                                DirtyCardToOopClosure* dcto_cl,
@@ -443,7 +454,7 @@
   }
 }
 
-void CardTableModRefBS::invalidate(MemRegion mr) {
+void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (!mri.is_empty()) dirty_MemRegion(mri);
@@ -471,11 +482,15 @@
   }
 }
 
+void CardTableModRefBS::dirty(MemRegion mr) {
+  jbyte* first = byte_for(mr.start());
+  jbyte* last  = byte_after(mr.last());
+  memset(first, dirty_card, last-first);
+}
+
 // NOTES:
 // (1) Unlike mod_oop_in_space_iterate() above, dirty_card_iterate()
 //     iterates over dirty cards ranges in increasing address order.
-// (2) Unlike, e.g., dirty_card_range_after_preclean() below,
-//     this method does not make the dirty cards prelceaned.
 void CardTableModRefBS::dirty_card_iterate(MemRegion mr,
                                            MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
@@ -501,7 +516,9 @@
   }
 }
 
-MemRegion CardTableModRefBS::dirty_card_range_after_preclean(MemRegion mr) {
+MemRegion CardTableModRefBS::dirty_card_range_after_reset(MemRegion mr,
+                                                          bool reset,
+                                                          int reset_val) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (!mri.is_empty()) {
@@ -518,8 +535,10 @@
                dirty_cards++, next_entry++);
           MemRegion cur_cards(addr_for(cur_entry),
                               dirty_cards*card_size_in_words);
-          for (size_t i = 0; i < dirty_cards; i++) {
-             cur_entry[i] = precleaned_card;
+          if (reset) {
+            for (size_t i = 0; i < dirty_cards; i++) {
+              cur_entry[i] = reset_val;
+            }
           }
           return cur_cards;
         }
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -54,6 +54,7 @@
     clean_card                  = -1,
     dirty_card                  =  0,
     precleaned_card             =  1,
+    claimed_card                =  3,
     last_card                   =  4,
     CT_MR_BS_last_reserved      = 10
   };
@@ -150,17 +151,6 @@
     return byte_for(p) + 1;
   }
 
-  // Mapping from card marking array entry to address of first word
-  HeapWord* addr_for(const jbyte* p) const {
-    assert(p >= _byte_map && p < _byte_map + _byte_map_size,
-           "out of bounds access to card marking array");
-    size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte));
-    HeapWord* result = (HeapWord*) (delta << card_shift);
-    assert(_whole_heap.contains(result),
-           "out of bounds accessor from card marking array");
-    return result;
-  }
-
   // Iterate over the portion of the card-table which covers the given
   // region mr in the given space and apply cl to any dirty sub-regions
   // of mr. cl and dcto_cl must either be the same closure or cl must
@@ -263,16 +253,22 @@
     card_size_in_words          = card_size / sizeof(HeapWord)
   };
 
+  static int clean_card_val()      { return clean_card; }
+  static int dirty_card_val()      { return dirty_card; }
+  static int claimed_card_val()    { return claimed_card; }
+  static int precleaned_card_val() { return precleaned_card; }
+
   // For RTTI simulation.
-  BarrierSet::Name kind() { return BarrierSet::CardTableModRef; }
   bool is_a(BarrierSet::Name bsn) {
-    return bsn == BarrierSet::CardTableModRef || bsn == BarrierSet::ModRef;
+    return bsn == BarrierSet::CardTableModRef || ModRefBarrierSet::is_a(bsn);
   }
 
   CardTableModRefBS(MemRegion whole_heap, int max_covered_regions);
 
   // *** Barrier set functions.
 
+  bool has_write_ref_pre_barrier() { return false; }
+
   inline bool write_ref_needs_barrier(void* field, oop new_val) {
     // Note that this assumes the perm gen is the highest generation
     // in the address space
@@ -315,11 +311,33 @@
 
   // *** Card-table-barrier-specific things.
 
+  inline void inline_write_ref_field_pre(void* field, oop newVal) {}
+
   inline void inline_write_ref_field(void* field, oop newVal) {
     jbyte* byte = byte_for(field);
     *byte = dirty_card;
   }
 
+  // These are used by G1, when it uses the card table as a temporary data
+  // structure for card claiming.
+  bool is_card_dirty(size_t card_index) {
+    return _byte_map[card_index] == dirty_card_val();
+  }
+
+  void mark_card_dirty(size_t card_index) {
+    _byte_map[card_index] = dirty_card_val();
+  }
+
+  bool is_card_claimed(size_t card_index) {
+    return _byte_map[card_index] == claimed_card_val();
+  }
+
+  bool claim_card(size_t card_index);
+
+  bool is_card_clean(size_t card_index) {
+    return _byte_map[card_index] == clean_card_val();
+  }
+
   // Card marking array base (adjusted for heap low boundary)
   // This would be the 0th element of _byte_map, if the heap started at 0x0.
   // But since the heap starts at some higher address, this points to somewhere
@@ -344,8 +362,9 @@
   }
 
   // ModRefBS functions.
-  void invalidate(MemRegion mr);
+  virtual void invalidate(MemRegion mr, bool whole_heap = false);
   void clear(MemRegion mr);
+  void dirty(MemRegion mr);
   void mod_oop_in_space_iterate(Space* sp, OopClosure* cl,
                                 bool clear = false,
                                 bool before_save_marks = false);
@@ -375,18 +394,39 @@
 
   static uintx ct_max_alignment_constraint();
 
-  // Apply closure cl to the dirty cards lying completely
-  // within MemRegion mr, setting the cards to precleaned.
-  void      dirty_card_iterate(MemRegion mr, MemRegionClosure* cl);
+  // Apply closure "cl" to the dirty cards containing some part of
+  // MemRegion "mr".
+  void dirty_card_iterate(MemRegion mr, MemRegionClosure* cl);
 
   // Return the MemRegion corresponding to the first maximal run
-  // of dirty cards lying completely within MemRegion mr, after
-  // marking those cards precleaned.
-  MemRegion dirty_card_range_after_preclean(MemRegion mr);
+  // of dirty cards lying completely within MemRegion mr.
+  // If reset is "true", then sets those card table entries to the given
+  // value.
+  MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset,
+                                         int reset_val);
 
   // Set all the dirty cards in the given region to precleaned state.
   void preclean_dirty_cards(MemRegion mr);
 
+  // Provide read-only access to the card table array.
+  const jbyte* byte_for_const(const void* p) const {
+    return byte_for(p);
+  }
+  const jbyte* byte_after_const(const void* p) const {
+    return byte_after(p);
+  }
+
+  // Mapping from card marking array entry to address of first word
+  HeapWord* addr_for(const jbyte* p) const {
+    assert(p >= _byte_map && p < _byte_map + _byte_map_size,
+           "out of bounds access to card marking array");
+    size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte));
+    HeapWord* result = (HeapWord*) (delta << card_shift);
+    assert(_whole_heap.contains(result),
+           "out of bounds accessor from card marking array");
+    return result;
+  }
+
   // Mapping from address to card marking array index.
   int index_for(void* p) {
     assert(_whole_heap.contains(p),
@@ -402,6 +442,7 @@
   static size_t par_chunk_heapword_alignment() {
     return CardsPerStrideChunk * card_size_in_words;
   }
+
 };
 
 class CardTableRS;
--- a/hotspot/src/share/vm/memory/cardTableRS.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/cardTableRS.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -27,10 +27,25 @@
 
 CardTableRS::CardTableRS(MemRegion whole_heap,
                          int max_covered_regions) :
-  GenRemSet(&_ct_bs),
-  _ct_bs(whole_heap, max_covered_regions),
-  _cur_youngergen_card_val(youngergenP1_card)
+  GenRemSet(),
+  _cur_youngergen_card_val(youngergenP1_card),
+  _regions_to_iterate(max_covered_regions - 1)
 {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    if (G1RSBarrierUseQueue) {
+      _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap,
+                                                  max_covered_regions);
+    } else {
+      _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions);
+    }
+  } else {
+    _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
+  }
+#else
+  _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
+#endif
+  set_bs(_ct_bs);
   _last_cur_val_in_gen = new jbyte[GenCollectedHeap::max_gens + 1];
   if (_last_cur_val_in_gen == NULL) {
     vm_exit_during_initialization("Could not last_cur_val_in_gen array.");
@@ -38,20 +53,19 @@
   for (int i = 0; i < GenCollectedHeap::max_gens + 1; i++) {
     _last_cur_val_in_gen[i] = clean_card_val();
   }
-  _ct_bs.set_CTRS(this);
+  _ct_bs->set_CTRS(this);
 }
 
 void CardTableRS::resize_covered_region(MemRegion new_region) {
-  _ct_bs.resize_covered_region(new_region);
+  _ct_bs->resize_covered_region(new_region);
 }
 
 jbyte CardTableRS::find_unused_youngergenP_card_value() {
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
   for (jbyte v = youngergenP1_card;
        v < cur_youngergen_and_prev_nonclean_card;
        v++) {
     bool seen = false;
-    for (int g = 0; g < gch->n_gens()+1; g++) {
+    for (int g = 0; g < _regions_to_iterate; g++) {
       if (_last_cur_val_in_gen[g] == v) {
         seen = true;
         break;
@@ -221,11 +235,11 @@
 
 void CardTableRS::younger_refs_in_space_iterate(Space* sp,
                                                 OopsInGenClosure* cl) {
-  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs.precision(),
+  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs->precision(),
                                                    cl->gen_boundary());
   ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
 
-  _ct_bs.non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
+  _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
                                 dcto_cl, &clear_cl, false);
 }
 
@@ -549,7 +563,7 @@
 
   if (ch->kind() == CollectedHeap::GenCollectedHeap) {
     GenCollectedHeap::heap()->generation_iterate(&blk, false);
-    _ct_bs.verify();
+    _ct_bs->verify();
 
     // If the old gen collections also collect perm, then we are only
     // interested in perm-to-young pointers, not perm-to-old pointers.
--- a/hotspot/src/share/vm/memory/cardTableRS.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/cardTableRS.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -44,7 +44,7 @@
     return CardTableModRefBS::card_is_dirty_wrt_gen_iter(cv);
   }
 
-  CardTableModRefBSForCTRS _ct_bs;
+  CardTableModRefBSForCTRS* _ct_bs;
 
   virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl);
 
@@ -73,6 +73,8 @@
 
   jbyte _cur_youngergen_card_val;
 
+  int _regions_to_iterate;
+
   jbyte cur_youngergen_card_val() {
     return _cur_youngergen_card_val;
   }
@@ -96,7 +98,7 @@
 
   CardTableRS* as_CardTableRS() { return this; }
 
-  CardTableModRefBS* ct_bs() { return &_ct_bs; }
+  CardTableModRefBS* ct_bs() { return _ct_bs; }
 
   // Override.
   void prepare_for_younger_refs_iterate(bool parallel);
@@ -107,7 +109,7 @@
   void younger_refs_iterate(Generation* g, OopsInGenClosure* blk);
 
   void inline_write_ref_field_gc(void* field, oop new_val) {
-    jbyte* byte = _ct_bs.byte_for(field);
+    jbyte* byte = _ct_bs->byte_for(field);
     *byte = youngergen_card;
   }
   void write_ref_field_gc_work(void* field, oop new_val) {
@@ -122,25 +124,27 @@
   void resize_covered_region(MemRegion new_region);
 
   bool is_aligned(HeapWord* addr) {
-    return _ct_bs.is_card_aligned(addr);
+    return _ct_bs->is_card_aligned(addr);
   }
 
   void verify();
   void verify_aligned_region_empty(MemRegion mr);
 
-  void clear(MemRegion mr) { _ct_bs.clear(mr); }
+  void clear(MemRegion mr) { _ct_bs->clear(mr); }
   void clear_into_younger(Generation* gen, bool clear_perm);
 
-  void invalidate(MemRegion mr) { _ct_bs.invalidate(mr); }
+  void invalidate(MemRegion mr, bool whole_heap = false) {
+    _ct_bs->invalidate(mr, whole_heap);
+  }
   void invalidate_or_clear(Generation* gen, bool younger, bool perm);
 
   static uintx ct_max_alignment_constraint() {
     return CardTableModRefBS::ct_max_alignment_constraint();
   }
 
-  jbyte* byte_for(void* p)     { return _ct_bs.byte_for(p); }
-  jbyte* byte_after(void* p)   { return _ct_bs.byte_after(p); }
-  HeapWord* addr_for(jbyte* p) { return _ct_bs.addr_for(p); }
+  jbyte* byte_for(void* p)     { return _ct_bs->byte_for(p); }
+  jbyte* byte_after(void* p)   { return _ct_bs->byte_after(p); }
+  HeapWord* addr_for(jbyte* p) { return _ct_bs->addr_for(p); }
 
   bool is_prev_nonclean_card_val(jbyte v) {
     return
--- a/hotspot/src/share/vm/memory/collectorPolicy.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/collectorPolicy.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -31,11 +31,11 @@
   if (PermSize > MaxPermSize) {
     MaxPermSize = PermSize;
   }
-  PermSize = align_size_down(PermSize, min_alignment());
+  PermSize = MAX2(min_alignment(), align_size_down_(PermSize, min_alignment()));
   MaxPermSize = align_size_up(MaxPermSize, max_alignment());
 
-  MinPermHeapExpansion = align_size_down(MinPermHeapExpansion, min_alignment());
-  MaxPermHeapExpansion = align_size_down(MaxPermHeapExpansion, min_alignment());
+  MinPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MinPermHeapExpansion, min_alignment()));
+  MaxPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MaxPermHeapExpansion, min_alignment()));
 
   MinHeapDeltaBytes = align_size_up(MinHeapDeltaBytes, min_alignment());
 
@@ -55,25 +55,21 @@
 
 void CollectorPolicy::initialize_size_info() {
   // User inputs from -mx and ms are aligned
-  _initial_heap_byte_size = align_size_up(Arguments::initial_heap_size(),
-                                          min_alignment());
-  set_min_heap_byte_size(align_size_up(Arguments::min_heap_size(),
-                                          min_alignment()));
-  set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment()));
-
-  // Check validity of heap parameters from launcher
+  set_initial_heap_byte_size(Arguments::initial_heap_size());
   if (initial_heap_byte_size() == 0) {
     set_initial_heap_byte_size(NewSize + OldSize);
-  } else {
-    Universe::check_alignment(initial_heap_byte_size(), min_alignment(),
-                            "initial heap");
   }
+  set_initial_heap_byte_size(align_size_up(_initial_heap_byte_size,
+                                           min_alignment()));
+
+  set_min_heap_byte_size(Arguments::min_heap_size());
   if (min_heap_byte_size() == 0) {
     set_min_heap_byte_size(NewSize + OldSize);
-  } else {
-    Universe::check_alignment(min_heap_byte_size(), min_alignment(),
-                            "initial heap");
   }
+  set_min_heap_byte_size(align_size_up(_min_heap_byte_size,
+                                       min_alignment()));
+
+  set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment()));
 
   // Check heap parameter properties
   if (initial_heap_byte_size() < M) {
@@ -121,8 +117,6 @@
                                            int max_covered_regions) {
   switch (rem_set_name()) {
   case GenRemSet::CardTable: {
-    if (barrier_set_name() != BarrierSet::CardTableModRef)
-      vm_exit_during_initialization("Mismatch between RS and BS.");
     CardTableRS* res = new CardTableRS(whole_heap, max_covered_regions);
     return res;
   }
@@ -345,7 +339,7 @@
 
     // At this point all three sizes have been checked against the
     // maximum sizes but have not been checked for consistency
-    // amoung the three.
+    // among the three.
 
     // Final check min <= initial <= max
     set_min_gen0_size(MIN2(_min_gen0_size, _max_gen0_size));
--- a/hotspot/src/share/vm/memory/collectorPolicy.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/collectorPolicy.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -39,10 +39,12 @@
 // Forward declarations.
 class GenCollectorPolicy;
 class TwoGenerationCollectorPolicy;
+class AdaptiveSizePolicy;
 #ifndef SERIALGC
 class ConcurrentMarkSweepPolicy;
+class G1CollectorPolicy;
 #endif // SERIALGC
-class AdaptiveSizePolicy;
+
 class GCPolicyCounters;
 class PermanentGenerationSpec;
 class MarkSweepPolicy;
@@ -55,7 +57,7 @@
   // Requires that the concrete subclass sets the alignment constraints
   // before calling.
   virtual void initialize_flags();
-  virtual void initialize_size_info() = 0;
+  virtual void initialize_size_info();
   // Initialize "_permanent_generation" to a spec for the given kind of
   // Perm Gen.
   void initialize_perm_generation(PermGen::Name pgnm);
@@ -91,17 +93,18 @@
   enum Name {
     CollectorPolicyKind,
     TwoGenerationCollectorPolicyKind,
-    TrainPolicyKind,
     ConcurrentMarkSweepPolicyKind,
-    ASConcurrentMarkSweepPolicyKind
+    ASConcurrentMarkSweepPolicyKind,
+    G1CollectorPolicyKind
   };
 
   // Identification methods.
-  virtual GenCollectorPolicy*           as_generation_policy()          { return NULL; }
+  virtual GenCollectorPolicy*           as_generation_policy()            { return NULL; }
   virtual TwoGenerationCollectorPolicy* as_two_generation_policy()        { return NULL; }
   virtual MarkSweepPolicy*              as_mark_sweep_policy()            { return NULL; }
 #ifndef SERIALGC
   virtual ConcurrentMarkSweepPolicy*    as_concurrent_mark_sweep_policy() { return NULL; }
+  virtual G1CollectorPolicy*            as_g1_policy()                    { return NULL; }
 #endif // SERIALGC
   // Note that these are not virtual.
   bool is_generation_policy()            { return as_generation_policy() != NULL; }
@@ -109,10 +112,13 @@
   bool is_mark_sweep_policy()            { return as_mark_sweep_policy() != NULL; }
 #ifndef SERIALGC
   bool is_concurrent_mark_sweep_policy() { return as_concurrent_mark_sweep_policy() != NULL; }
+  bool is_g1_policy()                    { return as_g1_policy() != NULL; }
 #else  // SERIALGC
   bool is_concurrent_mark_sweep_policy() { return false; }
+  bool is_g1_policy()                    { return false; }
 #endif // SERIALGC
 
+
   virtual PermanentGenerationSpec *permanent_generation() {
     assert(_permanent_generation != NULL, "Sanity check");
     return _permanent_generation;
--- a/hotspot/src/share/vm/memory/compactingPermGenGen.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/compactingPermGenGen.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -100,7 +100,7 @@
 
   enum {
     vtbl_list_size = 16, // number of entries in the shared space vtable list.
-    num_virtuals = 100   // number of virtual methods in Klass (or
+    num_virtuals = 200   // number of virtual methods in Klass (or
                          // subclass) objects, or greater.
   };
 
--- a/hotspot/src/share/vm/memory/dump.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/dump.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -818,6 +818,40 @@
 // across the space while doing this, as that causes the vtables to be
 // patched, undoing our useful work.  Instead, iterate to make a list,
 // then use the list to do the fixing.
+//
+// Our constructed vtables:
+// Dump time:
+//  1. init_self_patching_vtbl_list: table of pointers to current virtual method addrs
+//  2. generate_vtable_methods: create jump table, appended to above vtbl_list
+//  3. PatchKlassVtables: for Klass list, patch the vtable entry to point to jump table
+//     rather than to current vtbl
+// Table layout: NOTE FIXED SIZE
+//   1. vtbl pointers
+//   2. #Klass X #virtual methods per Klass
+//   1 entry for each, in the order:
+//   Klass1:method1 entry, Klass1:method2 entry, ... Klass1:method<num_virtuals> entry
+//   Klass2:method1 entry, Klass2:method2 entry, ... Klass2:method<num_virtuals> entry
+//   ...
+//   Klass<vtbl_list_size>:method1 entry, Klass<vtbl_list_size>:method2 entry,
+//       ... Klass<vtbl_list_size>:method<num_virtuals> entry
+//  Sample entry: (Sparc):
+//   save(sp, -256, sp)
+//   ba,pt common_code
+//   mov XXX, %L0       %L0 gets: Klass index <<8 + method index (note: max method index 255)
+//
+// Restore time:
+//   1. initialize_oops: reserve space for table
+//   2. init_self_patching_vtbl_list: update pointers to NEW virtual method addrs in text
+//
+// Execution time:
+//   First virtual method call for any object of these Klass types:
+//   1. object->klass->klass_part
+//   2. vtable entry for that klass_part points to the jump table entries
+//   3. branches to common_code with %O0/klass_part, %L0: Klass index <<8 + method index
+//   4. common_code:
+//      Get address of new vtbl pointer for this Klass from updated table
+//      Update new vtbl pointer in the Klass: future virtual calls go direct
+//      Jump to method, using new vtbl pointer and method index
 
 class PatchKlassVtables: public ObjectClosure {
 private:
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -252,6 +252,21 @@
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
   virtual HeapWord* allocate_new_tlab(size_t size);
 
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    return true;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    // CMS needs to see all, even intra-generational, ref updates.
+    return !UseConcMarkSweepGC;
+  }
+
   // The "requestor" generation is performing some garbage collection
   // action for which it would be useful to have scratch space.  The
   // requestor promises to allocate no more than "max_alloc_words" in any
--- a/hotspot/src/share/vm/memory/genMarkSweep.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/genMarkSweep.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -191,8 +191,10 @@
 
 void GenMarkSweep::deallocate_stacks() {
 
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->release_scratch();
+  if (!UseG1GC) {
+    GenCollectedHeap* gch = GenCollectedHeap::heap();
+    gch->release_scratch();
+  }
 
   if (_preserved_oop_stack) {
     delete _preserved_mark_stack;
--- a/hotspot/src/share/vm/memory/genMarkSweep.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/genMarkSweep.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -24,6 +24,7 @@
 
 class GenMarkSweep : public MarkSweep {
   friend class VM_MarkSweep;
+  friend class G1MarkSweep;
  public:
   static void invoke_at_safepoint(int level, ReferenceProcessor* rp,
                                   bool clear_all_softrefs);
--- a/hotspot/src/share/vm/memory/genOopClosures.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/genOopClosures.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -56,6 +56,9 @@
   // pointers must call the method below.
   template <class T> void do_barrier(T* p);
 
+  // Version for use by closures that may be called in parallel code.
+  void par_do_barrier(oop* p);
+
  public:
   OopsInGenClosure() : OopClosure(NULL),
     _orig_gen(NULL), _gen(NULL), _gen_boundary(NULL), _rs(NULL) {};
--- a/hotspot/src/share/vm/memory/genOopClosures.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/genOopClosures.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -48,6 +48,16 @@
   }
 }
 
+inline void OopsInGenClosure::par_do_barrier(oop* p) {
+  assert(generation()->is_in_reserved(p), "expected ref in generation");
+  oop obj = *p;
+  assert(obj != NULL, "expected non-null object");
+  // If p points to a younger generation, mark the card.
+  if ((HeapWord*)obj < gen_boundary()) {
+    rs()->write_ref_field_gc_par(p, obj);
+  }
+}
+
 // NOTE! Any changes made here should also be made
 // in FastScanClosure::do_oop_work()
 template <class T> inline void ScanClosure::do_oop_work(T* p) {
--- a/hotspot/src/share/vm/memory/genRemSet.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/genRemSet.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -42,6 +42,7 @@
   };
 
   GenRemSet(BarrierSet * bs) : _bs(bs) {}
+  GenRemSet() : _bs(NULL) {}
 
   virtual Name rs_kind() = 0;
 
@@ -53,6 +54,9 @@
   // Return the barrier set associated with "this."
   BarrierSet* bs() { return _bs; }
 
+  // Set the barrier set.
+  void set_bs(BarrierSet* bs) { _bs = bs; }
+
   // Do any (sequential) processing necessary to prepare for (possibly
   // "parallel", if that arg is true) calls to younger_refs_iterate.
   virtual void prepare_for_younger_refs_iterate(bool parallel) = 0;
@@ -116,7 +120,10 @@
 
   // Informs the RS that refs in the given "mr" may have changed
   // arbitrarily, and therefore may contain old-to-young pointers.
-  virtual void invalidate(MemRegion mr) = 0;
+  // If "whole heap" is true, then this invalidation is part of an
+  // invalidation of the whole heap, which an implementation might
+  // handle differently than that of a sub-part of the heap.
+  virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0;
 
   // Informs the RS that refs in this generation
   // may have changed arbitrarily, and therefore may contain
--- a/hotspot/src/share/vm/memory/heapInspection.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/heapInspection.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -238,11 +238,14 @@
   HeapWord* ref;
 
   CollectedHeap* heap = Universe::heap();
+  bool is_shared_heap = false;
   switch (heap->kind()) {
+    case CollectedHeap::G1CollectedHeap:
     case CollectedHeap::GenCollectedHeap: {
-      GenCollectedHeap* gch = (GenCollectedHeap*)heap;
-      gch->gc_prologue(false /* !full */); // get any necessary locks
-      ref = gch->perm_gen()->used_region().start();
+      is_shared_heap = true;
+      SharedHeap* sh = (SharedHeap*)heap;
+      sh->gc_prologue(false /* !full */); // get any necessary locks, etc.
+      ref = sh->perm_gen()->used_region().start();
       break;
     }
 #ifndef SERIALGC
@@ -284,9 +287,9 @@
   }
   st->flush();
 
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    GenCollectedHeap* gch = GenCollectedHeap::heap();
-    gch->gc_epilogue(false /* !full */); // release all acquired locks
+  if (is_shared_heap) {
+    SharedHeap* sh = (SharedHeap*)heap;
+    sh->gc_epilogue(false /* !full */); // release all acquired locks, etc.
   }
 }
 
--- a/hotspot/src/share/vm/memory/iterator.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/iterator.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -26,9 +26,23 @@
 
 class ReferenceProcessor;
 
+// Closure provides abortability.
+
+class Closure : public StackObj {
+ protected:
+  bool _abort;
+  void set_abort() { _abort = true; }
+ public:
+  Closure() : _abort(false) {}
+  // A subtype can use this mechanism to indicate to some iterator mapping
+  // functions that the iteration should cease.
+  bool abort() { return _abort; }
+  void clear_abort() { _abort = false; }
+};
+
 // OopClosure is used for iterating through roots (oop*)
 
-class OopClosure : public StackObj {
+class OopClosure : public Closure {
  public:
   ReferenceProcessor* _ref_processor;
   OopClosure(ReferenceProcessor* rp) : _ref_processor(rp) { }
@@ -55,11 +69,16 @@
   Prefetch::style prefetch_style() { // Note that this is non-virtual.
     return Prefetch::do_none;
   }
+
+  // True iff this closure may be safely applied more than once to an oop
+  // location without an intervening "major reset" (like the end of a GC).
+  virtual bool idempotent() { return false; }
+  virtual bool apply_to_weak_ref_discovered_field() { return false; }
 };
 
 // ObjectClosure is used for iterating through an object space
 
-class ObjectClosure : public StackObj {
+class ObjectClosure : public Closure {
  public:
   // Called for each object.
   virtual void do_object(oop obj) = 0;
--- a/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -31,6 +31,13 @@
 
 class ModRefBarrierSet: public BarrierSet {
 public:
+
+  ModRefBarrierSet() { _kind = BarrierSet::ModRef; }
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::ModRef;
+  }
+
   // Barriers only on ref writes.
   bool has_read_ref_barrier() { return false; }
   bool has_read_prim_barrier() { return false; }
@@ -85,8 +92,10 @@
                                         bool clear = false,
                                         bool before_save_marks = false) = 0;
 
-  // Causes all refs in "mr" to be assumed to be modified.
-  virtual void invalidate(MemRegion mr) = 0;
+  // Causes all refs in "mr" to be assumed to be modified.  If "whole_heap"
+  // is true, the caller asserts that the entire heap is being invalidated,
+  // which may admit an optimized implementation for some barriers.
+  virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0;
 
   // The caller guarantees that "mr" contains no references.  (Perhaps it's
   // objects have been moved elsewhere.)
--- a/hotspot/src/share/vm/memory/referenceProcessor.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -91,7 +91,8 @@
                                          bool               mt_discovery,
                                          BoolObjectClosure* is_alive_non_header,
                                          int                parallel_gc_threads,
-                                         bool               mt_processing) {
+                                         bool               mt_processing,
+                                         bool               dl_needs_barrier) {
   int mt_degree = 1;
   if (parallel_gc_threads > 1) {
     mt_degree = parallel_gc_threads;
@@ -99,7 +100,8 @@
   ReferenceProcessor* rp =
     new ReferenceProcessor(span, atomic_discovery,
                            mt_discovery, mt_degree,
-                           mt_processing && (parallel_gc_threads > 0));
+                           mt_processing && (parallel_gc_threads > 0),
+                           dl_needs_barrier);
   if (rp == NULL) {
     vm_exit_during_initialization("Could not allocate ReferenceProcessor object");
   }
@@ -111,10 +113,13 @@
                                        bool      atomic_discovery,
                                        bool      mt_discovery,
                                        int       mt_degree,
-                                       bool      mt_processing) :
+                                       bool      mt_processing,
+                                       bool      discovered_list_needs_barrier)  :
   _discovering_refs(false),
   _enqueuing_is_done(false),
   _is_alive_non_header(NULL),
+  _discovered_list_needs_barrier(discovered_list_needs_barrier),
+  _bs(NULL),
   _processing_is_mt(mt_processing),
   _next_id(0)
 {
@@ -135,6 +140,10 @@
         _discoveredSoftRefs[i].set_head(sentinel_ref());
     _discoveredSoftRefs[i].set_length(0);
   }
+  // If we do barreirs, cache a copy of the barrier set.
+  if (discovered_list_needs_barrier) {
+    _bs = Universe::heap()->barrier_set();
+  }
 }
 
 #ifndef PRODUCT
@@ -727,10 +736,15 @@
   refs_list.set_length(0);
 }
 
-void
-ReferenceProcessor::abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]) {
-  for (int i = 0; i < _num_q; i++) {
-    abandon_partial_discovered_list(refs_lists[i]);
+void ReferenceProcessor::abandon_partial_discovery() {
+  // loop over the lists
+  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+    if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) {
+      gclog_or_tty->print_cr(
+        "\nAbandoning %s discovered list",
+        list_name(i));
+    }
+    abandon_partial_discovered_list(_discoveredSoftRefs[i]);
   }
 }
 
@@ -994,7 +1008,16 @@
   assert(_discovery_is_mt, "!_discovery_is_mt should have been handled by caller");
   // First we must make sure this object is only enqueued once. CAS in a non null
   // discovered_addr.
-  oop retest = oopDesc::atomic_compare_exchange_oop(refs_list.head(), discovered_addr,
+  oop current_head = refs_list.head();
+
+  // Note: In the case of G1, this pre-barrier is strictly
+  // not necessary because the only case we are interested in
+  // here is when *discovered_addr is NULL, so this will expand to
+  // nothing. As a result, I am just manually eliding this out for G1.
+  if (_discovered_list_needs_barrier && !UseG1GC) {
+    _bs->write_ref_field_pre((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+  }
+  oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr,
                                                     NULL);
   if (retest == NULL) {
     // This thread just won the right to enqueue the object.
@@ -1002,6 +1025,10 @@
     // is necessary.
     refs_list.set_head(obj);
     refs_list.set_length(refs_list.length() + 1);
+    if (_discovered_list_needs_barrier) {
+      _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+    }
+
   } else {
     // If retest was non NULL, another thread beat us to it:
     // The reference has already been discovered...
@@ -1073,8 +1100,8 @@
     }
   }
 
-  HeapWord* discovered_addr = java_lang_ref_Reference::discovered_addr(obj);
-  oop  discovered = java_lang_ref_Reference::discovered(obj);
+  HeapWord* const discovered_addr = java_lang_ref_Reference::discovered_addr(obj);
+  const oop  discovered = java_lang_ref_Reference::discovered(obj);
   assert(discovered->is_oop_or_null(), "bad discovered field");
   if (discovered != NULL) {
     // The reference has already been discovered...
@@ -1094,7 +1121,7 @@
       // discovered twice except by concurrent collectors that potentially
       // trace the same Reference object twice.
       assert(UseConcMarkSweepGC,
-             "Only possible with a concurrent collector");
+             "Only possible with an incremental-update concurrent collector");
       return true;
     }
   }
@@ -1122,12 +1149,24 @@
     return false;   // nothing special needs to be done
   }
 
-  // We do a raw store here, the field will be visited later when
-  // processing the discovered references.
   if (_discovery_is_mt) {
     add_to_discovered_list_mt(*list, obj, discovered_addr);
   } else {
-    oop_store_raw(discovered_addr, list->head());
+    // If "_discovered_list_needs_barrier", we do write barriers when
+    // updating the discovered reference list.  Otherwise, we do a raw store
+    // here: the field will be visited later when processing the discovered
+    // references.
+    oop current_head = list->head();
+    // As in the case further above, since we are over-writing a NULL
+    // pre-value, we can safely elide the pre-barrier here for the case of G1.
+    assert(discovered == NULL, "control point invariant");
+    if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1
+      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+    }
+    oop_store_raw(discovered_addr, current_head);
+    if (_discovered_list_needs_barrier) {
+      _bs->write_ref_field((oop*)discovered_addr, current_head);
+    }
     list->set_head(obj);
     list->set_length(list->length() + 1);
   }
--- a/hotspot/src/share/vm/memory/referenceProcessor.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/referenceProcessor.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -54,6 +54,14 @@
   bool        _discovery_is_atomic;   // if discovery is atomic wrt
                                       // other collectors in configuration
   bool        _discovery_is_mt;       // true if reference discovery is MT.
+  // If true, setting "next" field of a discovered refs list requires
+  // write barrier(s).  (Must be true if used in a collector in which
+  // elements of a discovered list may be moved during discovery: for
+  // example, a collector like Garbage-First that moves objects during a
+  // long-term concurrent marking phase that does weak reference
+  // discovery.)
+  bool        _discovered_list_needs_barrier;
+  BarrierSet* _bs;                    // Cached copy of BarrierSet.
   bool        _enqueuing_is_done;     // true if all weak references enqueued
   bool        _processing_is_mt;      // true during phases when
                                       // reference processing is MT.
@@ -196,7 +204,6 @@
   void verify_ok_to_handle_reflists() PRODUCT_RETURN;
 
   void abandon_partial_discovered_list(DiscoveredList& refs_list);
-  void abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]);
 
   // Calculate the number of jni handles.
   unsigned int count_jni_refs();
@@ -217,6 +224,8 @@
     _discovery_is_atomic(true),
     _enqueuing_is_done(false),
     _discovery_is_mt(false),
+    _discovered_list_needs_barrier(false),
+    _bs(NULL),
     _is_alive_non_header(NULL),
     _num_q(0),
     _processing_is_mt(false),
@@ -224,8 +233,10 @@
   {}
 
   ReferenceProcessor(MemRegion span, bool atomic_discovery,
-                     bool mt_discovery, int mt_degree = 1,
-                     bool mt_processing = false);
+                     bool mt_discovery,
+                     int mt_degree = 1,
+                     bool mt_processing = false,
+                     bool discovered_list_needs_barrier = false);
 
   // Allocates and initializes a reference processor.
   static ReferenceProcessor* create_ref_processor(
@@ -234,8 +245,8 @@
     bool               mt_discovery,
     BoolObjectClosure* is_alive_non_header = NULL,
     int                parallel_gc_threads = 1,
-    bool               mt_processing = false);
-
+    bool               mt_processing = false,
+    bool               discovered_list_needs_barrier = false);
   // RefDiscoveryPolicy values
   enum {
     ReferenceBasedDiscovery = 0,
@@ -296,6 +307,11 @@
   // Enqueue references at end of GC (called by the garbage collector)
   bool enqueue_discovered_references(AbstractRefProcTaskExecutor* task_executor = NULL);
 
+  // If a discovery is in process that is being superceded, abandon it: all
+  // the discovered lists will be empty, and all the objects on them will
+  // have NULL discovered fields.  Must be called only at a safepoint.
+  void abandon_partial_discovery();
+
   // debugging
   void verify_no_references_recorded() PRODUCT_RETURN;
   static void verify();
--- a/hotspot/src/share/vm/memory/sharedHeap.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/sharedHeap.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -57,15 +57,24 @@
   }
   _sh = this;  // ch is static, should be set only once.
   if ((UseParNewGC ||
-      (UseConcMarkSweepGC && CMSParallelRemarkEnabled)) &&
+      (UseConcMarkSweepGC && CMSParallelRemarkEnabled) ||
+       UseG1GC) &&
       ParallelGCThreads > 0) {
-    _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, true);
+    _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads,
+                            /* are_GC_task_threads */true,
+                            /* are_ConcurrentGC_threads */false);
     if (_workers == NULL) {
       vm_exit_during_initialization("Failed necessary allocation.");
     }
   }
 }
 
+bool SharedHeap::heap_lock_held_for_gc() {
+  Thread* t = Thread::current();
+  return    Heap_lock->owned_by_self()
+         || (   (t->is_GC_task_thread() ||  t->is_VM_thread())
+             && _thread_holds_heap_lock_for_gc);
+}
 
 void SharedHeap::set_par_threads(int t) {
   _n_par_threads = t;
@@ -280,10 +289,11 @@
 }
 
 // Some utilities.
-void SharedHeap::print_size_transition(size_t bytes_before,
+void SharedHeap::print_size_transition(outputStream* out,
+                                       size_t bytes_before,
                                        size_t bytes_after,
                                        size_t capacity) {
-  tty->print(" %d%s->%d%s(%d%s)",
+  out->print(" %d%s->%d%s(%d%s)",
              byte_size_in_proper_unit(bytes_before),
              proper_unit_for_byte_size(bytes_before),
              byte_size_in_proper_unit(bytes_after),
--- a/hotspot/src/share/vm/memory/sharedHeap.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/sharedHeap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -44,6 +44,9 @@
 class SharedHeap : public CollectedHeap {
   friend class VMStructs;
 
+  friend class VM_GC_Operation;
+  friend class VM_CGC_Operation;
+
 private:
   // For claiming strong_roots tasks.
   SubTasksDone* _process_strong_tasks;
@@ -82,6 +85,14 @@
   // function.
   SharedHeap(CollectorPolicy* policy_);
 
+  // Returns true if the calling thread holds the heap lock,
+  // or the calling thread is a par gc thread and the heap_lock is held
+  // by the vm thread doing a gc operation.
+  bool heap_lock_held_for_gc();
+  // True if the heap_lock is held by the a non-gc thread invoking a gc
+  // operation.
+  bool _thread_holds_heap_lock_for_gc;
+
 public:
   static SharedHeap* heap() { return _sh; }
 
@@ -97,8 +108,8 @@
 
   void set_perm(PermGen* perm_gen) { _perm_gen = perm_gen; }
 
-  // A helper function that fills an allocated-but-not-yet-initialized
-  // region with a garbage object.
+  // A helper function that fills a region of the heap with
+  // with a single object.
   static void fill_region_with_object(MemRegion mr);
 
   // Minimum garbage fill object size
@@ -214,13 +225,12 @@
   // "SharedHeap" can use in the implementation of its virtual
   // functions.
 
-protected:
+public:
 
   // Do anything common to GC's.
   virtual void gc_prologue(bool full) = 0;
   virtual void gc_epilogue(bool full) = 0;
 
-public:
   //
   // New methods from CollectedHeap
   //
@@ -266,7 +276,8 @@
   }
 
   // Some utilities.
-  void print_size_transition(size_t bytes_before,
+  void print_size_transition(outputStream* out,
+                             size_t bytes_before,
                              size_t bytes_after,
                              size_t capacity);
 };
--- a/hotspot/src/share/vm/memory/space.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/space.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -105,7 +105,7 @@
          "Only ones we deal with for now.");
 
   assert(_precision != CardTableModRefBS::ObjHeadPreciseArray ||
-         _last_bottom == NULL ||
+         _cl->idempotent() || _last_bottom == NULL ||
          top <= _last_bottom,
          "Not decreasing");
   NOT_PRODUCT(_last_bottom = mr.start());
@@ -144,7 +144,14 @@
     walk_mem_region(mr, bottom_obj, top);
   }
 
-  _min_done = bottom;
+  // An idempotent closure might be applied in any order, so we don't
+  // record a _min_done for it.
+  if (!_cl->idempotent()) {
+    _min_done = bottom;
+  } else {
+    assert(_min_done == _last_explicit_min_done,
+           "Don't update _min_done for idempotent cl");
+  }
 }
 
 DirtyCardToOopClosure* Space::new_dcto_cl(OopClosure* cl,
@@ -250,7 +257,8 @@
   }
 }
 
-ContiguousSpace::ContiguousSpace(): CompactibleSpace(), _top(NULL) {
+ContiguousSpace::ContiguousSpace(): CompactibleSpace(), _top(NULL),
+    _concurrent_iteration_safe_limit(NULL) {
   _mangler = new GenSpaceMangler(this);
 }
 
@@ -263,17 +271,17 @@
                                  bool mangle_space)
 {
   CompactibleSpace::initialize(mr, clear_space, mangle_space);
-  _concurrent_iteration_safe_limit = top();
+  set_concurrent_iteration_safe_limit(top());
 }
 
 void ContiguousSpace::clear(bool mangle_space) {
   set_top(bottom());
   set_saved_mark();
-  Space::clear(mangle_space);
+  CompactibleSpace::clear(mangle_space);
 }
 
 bool Space::is_in(const void* p) const {
-  HeapWord* b = block_start(p);
+  HeapWord* b = block_start_const(p);
   return b != NULL && block_is_obj(b);
 }
 
@@ -342,8 +350,13 @@
                                   bool clear_space,
                                   bool mangle_space) {
   Space::initialize(mr, clear_space, mangle_space);
+  set_compaction_top(bottom());
+  _next_compaction_space = NULL;
+}
+
+void CompactibleSpace::clear(bool mangle_space) {
+  Space::clear(mangle_space);
   _compaction_top = bottom();
-  _next_compaction_space = NULL;
 }
 
 HeapWord* CompactibleSpace::forward(oop q, size_t size,
@@ -520,8 +533,8 @@
   }
   guarantee(p == top(), "end of last object must match end of space");
   if (top() != end()) {
-    guarantee(top() == block_start(end()-1) &&
-              top() == block_start(top()),
+    guarantee(top() == block_start_const(end()-1) &&
+              top() == block_start_const(top()),
               "top should be start of unallocated block, if it exists");
   }
 }
@@ -753,7 +766,7 @@
 #undef ContigSpace_OOP_SINCE_SAVE_MARKS_DEFN
 
 // Very general, slow implementation.
-HeapWord* ContiguousSpace::block_start(const void* p) const {
+HeapWord* ContiguousSpace::block_start_const(const void* p) const {
   assert(MemRegion(bottom(), end()).contains(p), "p not in space");
   if (p >= top()) {
     return top();
@@ -957,7 +970,8 @@
     // For a sampling of objects in the space, find it using the
     // block offset table.
     if (blocks == BLOCK_SAMPLE_INTERVAL) {
-      guarantee(p == block_start(p + (size/2)), "check offset computation");
+      guarantee(p == block_start_const(p + (size/2)),
+                "check offset computation");
       blocks = 0;
     } else {
       blocks++;
--- a/hotspot/src/share/vm/memory/space.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/space.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -105,7 +105,7 @@
   virtual void set_bottom(HeapWord* value) { _bottom = value; }
   virtual void set_end(HeapWord* value)    { _end = value; }
 
-  HeapWord* saved_mark_word() const  { return _saved_mark_word; }
+  virtual HeapWord* saved_mark_word() const  { return _saved_mark_word; }
   void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; }
 
   MemRegionClosure* preconsumptionDirtyCardClosure() const {
@@ -131,9 +131,15 @@
     return MemRegion(bottom(), saved_mark_word());
   }
 
-  // Initialization.  These may be run to reset an existing
-  // Space.
+  // Initialization.
+  // "initialize" should be called once on a space, before it is used for
+  // any purpose.  The "mr" arguments gives the bounds of the space, and
+  // the "clear_space" argument should be true unless the memory in "mr" is
+  // known to be zeroed.
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+
+  // The "clear" method must be called on a region that may have
+  // had allocation performed in it, but is now to be considered empty.
   virtual void clear(bool mangle_space);
 
   // For detecting GC bugs.  Should only be called at GC boundaries, since
@@ -218,7 +224,13 @@
   // "block" that contains "p".  We say "block" instead of "object" since
   // some heaps may not pack objects densely; a chunk may either be an
   // object or a non-object.  If "p" is not in the space, return NULL.
-  virtual HeapWord* block_start(const void* p) const = 0;
+  virtual HeapWord* block_start_const(const void* p) const = 0;
+
+  // The non-const version may have benevolent side effects on the data
+  // structure supporting these calls, possibly speeding up future calls.
+  // The default implementation, however, is simply to call the const
+  // version.
+  inline virtual HeapWord* block_start(const void* p);
 
   // Requires "addr" to be the start of a chunk, and returns its size.
   // "addr + size" is required to be the start of a new chunk, or the end
@@ -284,12 +296,13 @@
   CardTableModRefBS::PrecisionStyle _precision;
   HeapWord* _boundary;          // If non-NULL, process only non-NULL oops
                                 // pointing below boundary.
-  HeapWord* _min_done;                // ObjHeadPreciseArray precision requires
+  HeapWord* _min_done;          // ObjHeadPreciseArray precision requires
                                 // a downwards traversal; this is the
                                 // lowest location already done (or,
                                 // alternatively, the lowest address that
                                 // shouldn't be done again.  NULL means infinity.)
   NOT_PRODUCT(HeapWord* _last_bottom;)
+  NOT_PRODUCT(HeapWord* _last_explicit_min_done;)
 
   // Get the actual top of the area on which the closure will
   // operate, given where the top is assumed to be (the end of the
@@ -313,13 +326,15 @@
                         HeapWord* boundary) :
     _sp(sp), _cl(cl), _precision(precision), _boundary(boundary),
     _min_done(NULL) {
-    NOT_PRODUCT(_last_bottom = NULL;)
+    NOT_PRODUCT(_last_bottom = NULL);
+    NOT_PRODUCT(_last_explicit_min_done = NULL);
   }
 
   void do_MemRegion(MemRegion mr);
 
   void set_min_done(HeapWord* min_done) {
     _min_done = min_done;
+    NOT_PRODUCT(_last_explicit_min_done = _min_done);
   }
 #ifndef PRODUCT
   void set_last_bottom(HeapWord* last_bottom) {
@@ -356,7 +371,11 @@
   CompactibleSpace* _next_compaction_space;
 
 public:
+  CompactibleSpace() :
+   _compaction_top(NULL), _next_compaction_space(NULL) {}
+
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void clear(bool mangle_space);
 
   // Used temporarily during a compaction phase to hold the value
   // top should have when compaction is complete.
@@ -513,7 +532,7 @@
       /* prefetch beyond q */                                                \
       Prefetch::write(q, interval);                                          \
       /* size_t size = oop(q)->size();  changing this for cms for perm gen */\
-      size_t size = block_size(q);                                             \
+      size_t size = block_size(q);                                           \
       compact_top = cp->space->forward(oop(q), size, cp, compact_top);       \
       q += size;                                                             \
       end_of_live = q;                                                       \
@@ -577,156 +596,158 @@
   cp->space->set_compaction_top(compact_top);                                \
 }
 
-#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                                \
-  /* adjust all the interior pointers to point at the new locations of objects        \
-   * Used by MarkSweep::mark_sweep_phase3() */                                        \
+#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                             \
+  /* adjust all the interior pointers to point at the new locations of objects  \
+   * Used by MarkSweep::mark_sweep_phase3() */                                  \
                                                                                 \
-  HeapWord* q = bottom();                                                        \
-  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */        \
+  HeapWord* q = bottom();                                                       \
+  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */   \
                                                                                 \
-  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                        \
+  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                 \
                                                                                 \
-  if (q < t && _first_dead > q &&                                                \
+  if (q < t && _first_dead > q &&                                               \
       !oop(q)->is_gc_marked()) {                                                \
     /* we have a chunk of the space which hasn't moved and we've                \
      * reinitialized the mark word during the previous pass, so we can't        \
-     * use is_gc_marked for the traversal. */                                        \
+     * use is_gc_marked for the traversal. */                                   \
     HeapWord* end = _first_dead;                                                \
                                                                                 \
-    while (q < end) {                                                                \
-      /* I originally tried to conjoin "block_start(q) == q" to the                \
-       * assertion below, but that doesn't work, because you can't                \
-       * accurately traverse previous objects to get to the current one                \
-       * after their pointers (including pointers into permGen) have been        \
-       * updated, until the actual compaction is done.  dld, 4/00 */                \
-      assert(block_is_obj(q),                                                        \
-             "should be at block boundaries, and should be looking at objs");        \
+    while (q < end) {                                                           \
+      /* I originally tried to conjoin "block_start(q) == q" to the             \
+       * assertion below, but that doesn't work, because you can't              \
+       * accurately traverse previous objects to get to the current one         \
+       * after their pointers (including pointers into permGen) have been       \
+       * updated, until the actual compaction is done.  dld, 4/00 */            \
+      assert(block_is_obj(q),                                                   \
+             "should be at block boundaries, and should be looking at objs");   \
                                                                                 \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
                                                                                 \
-      /* point all the oops to the new location */                                \
-      size_t size = oop(q)->adjust_pointers();                                        \
-      size = adjust_obj_size(size);                                                \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
                                                                                 \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
-                                                                                      \
+                                                                                \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
-                                                                                      \
+                                                                                \
       q += size;                                                                \
-    }                                                                                \
+    }                                                                           \
                                                                                 \
-    if (_first_dead == t) {                                                        \
-      q = t;                                                                        \
-    } else {                                                                        \
-      /* $$$ This is funky.  Using this to read the previously written                \
-       * LiveRange.  See also use below. */                                        \
+    if (_first_dead == t) {                                                     \
+      q = t;                                                                    \
+    } else {                                                                    \
+      /* $$$ This is funky.  Using this to read the previously written          \
+       * LiveRange.  See also use below. */                                     \
       q = (HeapWord*)oop(_first_dead)->mark()->decode_pointer();                \
-    }                                                                                \
-  }                                                                                \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
   const intx interval = PrefetchScanIntervalInBytes;                            \
                                                                                 \
-  debug_only(HeapWord* prev_q = NULL);                                                \
-  while (q < t) {                                                                \
-    /* prefetch beyond q */                                                        \
+  debug_only(HeapWord* prev_q = NULL);                                          \
+  while (q < t) {                                                               \
+    /* prefetch beyond q */                                                     \
     Prefetch::write(q, interval);                                               \
-    if (oop(q)->is_gc_marked()) {                                                \
-      /* q is alive */                                                                \
+    if (oop(q)->is_gc_marked()) {                                               \
+      /* q is alive */                                                          \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
-      /* point all the oops to the new location */                                \
-      size_t size = oop(q)->adjust_pointers();                                        \
-      size = adjust_obj_size(size);                                                \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());                \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
+      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
-      debug_only(prev_q = q);                                                        \
+      debug_only(prev_q = q);                                                   \
       q += size;                                                                \
-    } else {                                                                        \
-      /* q is not a live object, so its mark should point at the next                \
-       * live object */                                                                \
-      debug_only(prev_q = q);                                                        \
-      q = (HeapWord*) oop(q)->mark()->decode_pointer();                                \
-      assert(q > prev_q, "we should be moving forward through memory");                \
-    }                                                                                \
-  }                                                                                \
+    } else {                                                                    \
+      /* q is not a live object, so its mark should point at the next           \
+       * live object */                                                         \
+      debug_only(prev_q = q);                                                   \
+      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
+      assert(q > prev_q, "we should be moving forward through memory");         \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
-  assert(q == t, "just checking");                                                \
+  assert(q == t, "just checking");                                              \
 }
 
-#define SCAN_AND_COMPACT(obj_size) {                                                \
+#define SCAN_AND_COMPACT(obj_size) {                                            \
   /* Copy all live objects to their new location                                \
-   * Used by MarkSweep::mark_sweep_phase4() */                                        \
+   * Used by MarkSweep::mark_sweep_phase4() */                                  \
                                                                                 \
-  HeapWord*       q = bottom();                                                        \
-  HeapWord* const t = _end_of_live;                                                \
-  debug_only(HeapWord* prev_q = NULL);                                                \
+  HeapWord*       q = bottom();                                                 \
+  HeapWord* const t = _end_of_live;                                             \
+  debug_only(HeapWord* prev_q = NULL);                                          \
                                                                                 \
-  if (q < t && _first_dead > q &&                                                \
+  if (q < t && _first_dead > q &&                                               \
       !oop(q)->is_gc_marked()) {                                                \
-    debug_only(                                                                        \
+    debug_only(                                                                 \
     /* we have a chunk of the space which hasn't moved and we've reinitialized  \
      * the mark word during the previous pass, so we can't use is_gc_marked for \
      * the traversal. */                                                        \
-    HeapWord* const end = _first_dead;                                                \
-                                                                                      \
-    while (q < end) {                                                                \
+    HeapWord* const end = _first_dead;                                          \
+                                                                                \
+    while (q < end) {                                                           \
       size_t size = obj_size(q);                                                \
       assert(!oop(q)->is_gc_marked(),                                           \
              "should be unmarked (special dense prefix handling)");             \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q));        \
-      debug_only(prev_q = q);                                                        \
+      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q));       \
+      debug_only(prev_q = q);                                                   \
       q += size;                                                                \
-    }                                                                                \
-    )  /* debug_only */                                                                \
-                                                                                      \
-    if (_first_dead == t) {                                                        \
-      q = t;                                                                        \
-    } else {                                                                        \
-      /* $$$ Funky */                                                                 \
-      q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer();                \
-    }                                                                                \
-  }                                                                                \
+    }                                                                           \
+    )  /* debug_only */                                                         \
+                                                                                \
+    if (_first_dead == t) {                                                     \
+      q = t;                                                                    \
+    } else {                                                                    \
+      /* $$$ Funky */                                                           \
+      q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer();               \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
-  const intx scan_interval = PrefetchScanIntervalInBytes;                        \
-  const intx copy_interval = PrefetchCopyIntervalInBytes;                        \
-  while (q < t) {                                                                \
-    if (!oop(q)->is_gc_marked()) {                                                \
-      /* mark is pointer to next marked oop */                                        \
-      debug_only(prev_q = q);                                                        \
-      q = (HeapWord*) oop(q)->mark()->decode_pointer();                                \
-      assert(q > prev_q, "we should be moving forward through memory");                \
-    } else {                                                                        \
-      /* prefetch beyond q */                                                        \
+  const intx scan_interval = PrefetchScanIntervalInBytes;                       \
+  const intx copy_interval = PrefetchCopyIntervalInBytes;                       \
+  while (q < t) {                                                               \
+    if (!oop(q)->is_gc_marked()) {                                              \
+      /* mark is pointer to next marked oop */                                  \
+      debug_only(prev_q = q);                                                   \
+      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
+      assert(q > prev_q, "we should be moving forward through memory");         \
+    } else {                                                                    \
+      /* prefetch beyond q */                                                   \
       Prefetch::read(q, scan_interval);                                         \
                                                                                 \
       /* size and destination */                                                \
       size_t size = obj_size(q);                                                \
       HeapWord* compaction_top = (HeapWord*)oop(q)->forwardee();                \
                                                                                 \
-      /* prefetch beyond compaction_top */                                        \
+      /* prefetch beyond compaction_top */                                      \
       Prefetch::write(compaction_top, copy_interval);                           \
                                                                                 \
-      /* copy object and reinit its mark */                                        \
+      /* copy object and reinit its mark */                                     \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size,            \
                                                             compaction_top));   \
-      assert(q != compaction_top, "everything in this pass should be moving");        \
-      Copy::aligned_conjoint_words(q, compaction_top, size);                        \
-      oop(compaction_top)->init_mark();                                                \
-      assert(oop(compaction_top)->klass() != NULL, "should have a class");        \
+      assert(q != compaction_top, "everything in this pass should be moving");  \
+      Copy::aligned_conjoint_words(q, compaction_top, size);                    \
+      oop(compaction_top)->init_mark();                                         \
+      assert(oop(compaction_top)->klass() != NULL, "should have a class");      \
                                                                                 \
-      debug_only(prev_q = q);                                                        \
+      debug_only(prev_q = q);                                                   \
       q += size;                                                                \
-    }                                                                                \
-  }                                                                                \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
+  /* Let's remember if we were empty before we did the compaction. */           \
+  bool was_empty = used_region().is_empty();                                    \
   /* Reset space after compaction is complete */                                \
-  reset_after_compaction();                                                        \
+  reset_after_compaction();                                                     \
   /* We do this clear, below, since it has overloaded meanings for some */      \
   /* space subtypes.  For example, OffsetTableContigSpace's that were   */      \
   /* compacted into will have had their offset table thresholds updated */      \
   /* continuously, but those that weren't need to have their thresholds */      \
   /* re-initialized.  Also mangles unused area for debugging.           */      \
-  if (is_empty()) {                                                             \
-    clear(SpaceDecorator::Mangle);                                              \
+  if (used_region().is_empty()) {                                               \
+    if (!was_empty) clear(SpaceDecorator::Mangle);                              \
   } else {                                                                      \
     if (ZapUnusedHeapArea) mangle_unused_area();                                \
   }                                                                             \
@@ -752,20 +773,18 @@
   inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value);
 
  public:
-
   ContiguousSpace();
   ~ContiguousSpace();
 
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void clear(bool mangle_space);
 
   // Accessors
   HeapWord* top() const            { return _top;    }
   void set_top(HeapWord* value)    { _top = value; }
 
-  void set_saved_mark()       { _saved_mark_word = top();    }
-  void reset_saved_mark()     { _saved_mark_word = bottom(); }
-
-  virtual void clear(bool mangle_space);
+  virtual void set_saved_mark()    { _saved_mark_word = top();    }
+  void reset_saved_mark()          { _saved_mark_word = bottom(); }
 
   WaterMark bottom_mark()     { return WaterMark(this, bottom()); }
   WaterMark top_mark()        { return WaterMark(this, top()); }
@@ -874,7 +893,7 @@
   virtual void object_iterate_from(WaterMark mark, ObjectClosure* blk);
 
   // Very inefficient implementation.
-  virtual HeapWord* block_start(const void* p) const;
+  virtual HeapWord* block_start_const(const void* p) const;
   size_t block_size(const HeapWord* p) const;
   // If a block is in the allocated area, it is an object.
   bool block_is_obj(const HeapWord* p) const { return p < top(); }
@@ -979,7 +998,8 @@
   HeapWord* _soft_end;
 
  public:
-  EdenSpace(DefNewGeneration* gen) : _gen(gen) { _soft_end = NULL; }
+  EdenSpace(DefNewGeneration* gen) :
+   _gen(gen), _soft_end(NULL) {}
 
   // Get/set just the 'soft' limit.
   HeapWord* soft_end()               { return _soft_end; }
@@ -1033,7 +1053,7 @@
 
   void clear(bool mangle_space);
 
-  inline HeapWord* block_start(const void* p) const;
+  inline HeapWord* block_start_const(const void* p) const;
 
   // Add offset table update.
   virtual inline HeapWord* allocate(size_t word_size);
--- a/hotspot/src/share/vm/memory/space.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/space.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -22,6 +22,10 @@
  *
  */
 
+inline HeapWord* Space::block_start(const void* p) {
+  return block_start_const(p);
+}
+
 inline HeapWord* OffsetTableContigSpace::allocate(size_t size) {
   HeapWord* res = ContiguousSpace::allocate(size);
   if (res != NULL) {
@@ -50,7 +54,8 @@
   return res;
 }
 
-inline HeapWord* OffsetTableContigSpace::block_start(const void* p) const {
+inline HeapWord*
+OffsetTableContigSpace::block_start_const(const void* p) const {
   return _offsets.block_start(p);
 }
 
--- a/hotspot/src/share/vm/memory/specialized_oop_closures.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/specialized_oop_closures.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -59,6 +59,12 @@
 // This is split into several because of a Visual C++ 6.0 compiler bug
 // where very long macros cause the compiler to crash
 
+// Some other heap might define further specialized closures.
+#ifndef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
+#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
+        /* None */
+#endif
+
 #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_S(f)       \
   f(ScanClosure,_nv)                                    \
   f(FastScanClosure,_nv)                                \
@@ -77,7 +83,7 @@
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_P(f)
 
 #ifndef SERIALGC
-#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)       \
+#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)       \
   f(MarkRefsIntoAndScanClosure,_nv)                     \
   f(Par_MarkRefsIntoAndScanClosure,_nv)                 \
   f(PushAndMarkClosure,_nv)                             \
@@ -85,11 +91,13 @@
   f(PushOrMarkClosure,_nv)                              \
   f(Par_PushOrMarkClosure,_nv)                          \
   f(CMSKeepAliveClosure,_nv)                            \
-  f(CMSInnerParMarkAndPushClosure,_nv)
+  f(CMSInnerParMarkAndPushClosure,_nv)                  \
+  FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f)
 #else  // SERIALGC
-#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)
+#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)
 #endif // SERIALGC
 
+
 // We separate these out, because sometime the general one has
 // a different definition from the specialized ones, and sometimes it
 // doesn't.
@@ -98,8 +106,8 @@
   f(OopClosure,_v)                                      \
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(f)
 
-#define ALL_OOP_OOP_ITERATE_CLOSURES_3(f)               \
-  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)
+#define ALL_OOP_OOP_ITERATE_CLOSURES_2(f)               \
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)
 
 #ifndef SERIALGC
 // This macro applies an argument macro to all OopClosures for which we
@@ -125,6 +133,13 @@
 // The "root_class" is the most general class to define; this may be
 // "OopClosure" in some applications and "OopsInGenClosure" in others.
 
+
+// Some other heap might define further specialized closures.
+#ifndef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
+#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) \
+        /* None */
+#endif
+
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_S(f) \
   f(ScanClosure,_nv)                                     \
   f(FastScanClosure,_nv)
@@ -132,7 +147,8 @@
 #ifndef SERIALGC
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f) \
   f(ParScanWithBarrierClosure,_nv)                       \
-  f(ParScanWithoutBarrierClosure,_nv)
+  f(ParScanWithoutBarrierClosure,_nv)                    \
+  FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f)
 #else  // SERIALGC
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f)
 #endif // SERIALGC
@@ -179,13 +195,15 @@
 
 #if ENABLE_SPECIALIZATION_STATS
 private:
-  static int _numCallsAll;
+  static bool _init;
+  static bool _wrapped;
+  static jint _numCallsAll;
 
-  static int _numCallsTotal[NUM_Kinds];
-  static int _numCalls_nv[NUM_Kinds];
+  static jint _numCallsTotal[NUM_Kinds];
+  static jint _numCalls_nv[NUM_Kinds];
 
-  static int _numDoOopCallsTotal[NUM_Kinds];
-  static int _numDoOopCalls_nv[NUM_Kinds];
+  static jint _numDoOopCallsTotal[NUM_Kinds];
+  static jint _numDoOopCalls_nv[NUM_Kinds];
 public:
 #endif
   static void clear()  PRODUCT_RETURN;
@@ -203,22 +221,22 @@
 #if ENABLE_SPECIALIZATION_STATS
 
 inline void SpecializationStats::record_call() {
-  _numCallsAll++;;
+  Atomic::inc(&_numCallsAll);
 }
 inline void SpecializationStats::record_iterate_call_v(Kind k) {
-  _numCallsTotal[k]++;
+  Atomic::inc(&_numCallsTotal[k]);
 }
 inline void SpecializationStats::record_iterate_call_nv(Kind k) {
-  _numCallsTotal[k]++;
-  _numCalls_nv[k]++;
+  Atomic::inc(&_numCallsTotal[k]);
+  Atomic::inc(&_numCalls_nv[k]);
 }
 
 inline void SpecializationStats::record_do_oop_call_v(Kind k) {
-  _numDoOopCallsTotal[k]++;
+  Atomic::inc(&_numDoOopCallsTotal[k]);
 }
 inline void SpecializationStats::record_do_oop_call_nv(Kind k) {
-  _numDoOopCallsTotal[k]++;
-  _numDoOopCalls_nv[k]++;
+  Atomic::inc(&_numDoOopCallsTotal[k]);
+  Atomic::inc(&_numDoOopCalls_nv[k]);
 }
 
 #else   // !ENABLE_SPECIALIZATION_STATS
--- a/hotspot/src/share/vm/memory/universe.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/memory/universe.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -739,6 +739,15 @@
     fatal("UseParallelGC not supported in java kernel vm.");
 #endif // SERIALGC
 
+  } else if (UseG1GC) {
+#ifndef SERIALGC
+    G1CollectorPolicy* g1p = new G1CollectorPolicy_BestRegionsFirst();
+    G1CollectedHeap* g1h = new G1CollectedHeap(g1p);
+    Universe::_collectedHeap = g1h;
+#else  // SERIALGC
+    fatal("UseG1GC not supported in java kernel vm.");
+#endif // SERIALGC
+
   } else {
     GenCollectorPolicy *gc_policy;
 
@@ -938,7 +947,10 @@
 
   // This needs to be done before the first scavenge/gc, since
   // it's an input to soft ref clearing policy.
-  Universe::update_heap_info_at_gc();
+  {
+    MutexLocker x(Heap_lock);
+    Universe::update_heap_info_at_gc();
+  }
 
   // ("weak") refs processing infrastructure initialization
   Universe::heap()->post_initialize();
@@ -1194,10 +1206,11 @@
     // ???: What if a CollectedHeap doesn't have a permanent generation?
     ShouldNotReachHere();
     break;
-  case CollectedHeap::GenCollectedHeap: {
-    GenCollectedHeap* gch = (GenCollectedHeap*) Universe::heap();
-    permanent_reserved = gch->perm_gen()->reserved();
-    break;
+  case CollectedHeap::GenCollectedHeap:
+  case CollectedHeap::G1CollectedHeap: {
+    SharedHeap* sh = (SharedHeap*) Universe::heap();
+    permanent_reserved = sh->perm_gen()->reserved();
+   break;
   }
 #ifndef SERIALGC
   case CollectedHeap::ParallelScavengeHeap: {
--- a/hotspot/src/share/vm/oops/generateOopMap.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/generateOopMap.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -370,21 +370,8 @@
 void GenerateOopMap ::initialize_bb() {
   _gc_points = 0;
   _bb_count  = 0;
-  int size = binsToHold(method()->code_size());
-  _bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t,size);
-  memset(_bb_hdr_bits, 0, size*sizeof(uintptr_t));
-}
-
-void GenerateOopMap ::set_bbmark_bit(int bci) {
-  int idx  = bci >> LogBitsPerWord;
-  uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1));
-  _bb_hdr_bits[idx] |= bit;
-}
-
-void GenerateOopMap ::clear_bbmark_bit(int bci) {
-  int idx   = bci >> LogBitsPerWord;
-  uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1));
-  _bb_hdr_bits[idx] &= (~bit);
+  _bb_hdr_bits.clear();
+  _bb_hdr_bits.resize(method()->code_size());
 }
 
 void GenerateOopMap::bb_mark_fct(GenerateOopMap *c, int bci, int *data) {
@@ -952,6 +939,17 @@
   _basic_blocks[bbNo-1]._end_bci = prev_bci;
 
 
+  // Check that the correct number of basicblocks was found
+  if (bbNo !=_bb_count) {
+    if (bbNo < _bb_count) {
+      verify_error("jump into the middle of instruction?");
+      return;
+    } else {
+      verify_error("extra basic blocks - should not happen?");
+      return;
+    }
+  }
+
   _max_monitors = monitor_count;
 
   // Now that we have a bound on the depth of the monitor stack, we can
@@ -985,17 +983,6 @@
   }
 #endif
 
-  // Check that the correct number of basicblocks was found
-  if (bbNo !=_bb_count) {
-    if (bbNo < _bb_count) {
-      verify_error("jump into the middle of instruction?");
-      return;
-    } else {
-      verify_error("extra basic blocks - should not happen?");
-      return;
-    }
-  }
-
   // Mark all alive blocks
   mark_reachable_code();
 }
@@ -1022,21 +1009,22 @@
                                          int new_method_size) {
   assert(new_method_size >= method()->code_size() + delta,
          "new method size is too small");
-  int newWords = binsToHold(new_method_size);
 
-  uintptr_t * new_bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t, newWords);
+  BitMap::bm_word_t* new_bb_hdr_bits =
+    NEW_RESOURCE_ARRAY(BitMap::bm_word_t,
+                       BitMap::word_align_up(new_method_size));
+  _bb_hdr_bits.set_map(new_bb_hdr_bits);
+  _bb_hdr_bits.set_size(new_method_size);
+  _bb_hdr_bits.clear();
 
-  BitMap bb_bits(new_bb_hdr_bits, new_method_size);
-  bb_bits.clear();
 
   for(int k = 0; k < _bb_count; k++) {
     if (_basic_blocks[k]._bci > bci) {
       _basic_blocks[k]._bci     += delta;
       _basic_blocks[k]._end_bci += delta;
     }
-    bb_bits.at_put(_basic_blocks[k]._bci, true);
+    _bb_hdr_bits.at_put(_basic_blocks[k]._bci, true);
   }
-  _bb_hdr_bits = new_bb_hdr_bits ;
 }
 
 //
--- a/hotspot/src/share/vm/oops/generateOopMap.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/generateOopMap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -341,16 +341,22 @@
   BasicBlock *    _basic_blocks;             // Array of basicblock info
   int             _gc_points;
   int             _bb_count;
-  uintptr_t *     _bb_hdr_bits;
+  BitMap          _bb_hdr_bits;
 
   // Basicblocks methods
   void          initialize_bb               ();
   void          mark_bbheaders_and_count_gc_points();
-  bool          is_bb_header                (int bci) const   { return (_bb_hdr_bits[bci >> LogBitsPerWord] & ((uintptr_t)1 << (bci & (BitsPerWord-1)))) != 0; }
+  bool          is_bb_header                (int bci) const   {
+    return _bb_hdr_bits.at(bci);
+  }
   int           gc_points                   () const                          { return _gc_points; }
   int           bb_count                    () const                          { return _bb_count; }
-  void          set_bbmark_bit              (int bci);
-  void          clear_bbmark_bit            (int bci);
+  void          set_bbmark_bit              (int bci) {
+    _bb_hdr_bits.at_put(bci, true);
+  }
+  void          clear_bbmark_bit            (int bci) {
+    _bb_hdr_bits.at_put(bci, false);
+  }
   BasicBlock *  get_basic_block_at          (int bci) const;
   BasicBlock *  get_basic_block_containing  (int bci) const;
   void          interp_bb                   (BasicBlock *bb);
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1515,10 +1515,9 @@
 // closure's do_header() method dicates whether the given closure should be
 // applied to the klass ptr in the object header.
 
-#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)   \
-                                                                        \
-int instanceKlass::oop_oop_iterate##nv_suffix(oop obj,                  \
-                                              OopClosureType* closure) {\
+#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)        \
+                                                                             \
+int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \
   SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik);\
   /* header */                                                          \
   if (closure->do_header()) {                                           \
@@ -1533,6 +1532,26 @@
   return size_helper();                                                 \
 }
 
+#ifndef SERIALGC
+#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \
+                                                                                \
+int instanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj,                \
+                                              OopClosureType* closure) {        \
+  SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik); \
+  /* header */                                                                  \
+  if (closure->do_header()) {                                                   \
+    obj->oop_iterate_header(closure);                                           \
+  }                                                                             \
+  /* instance variables */                                                      \
+  InstanceKlass_OOP_MAP_REVERSE_ITERATE(                                        \
+    obj,                                                                        \
+    SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::ik);\
+    (closure)->do_oop##nv_suffix(p),                                            \
+    assert_is_in_closed_subset)                                                 \
+   return size_helper();                                                        \
+}
+#endif // !SERIALGC
+
 #define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \
                                                                         \
 int instanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj,              \
@@ -1550,9 +1569,13 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
+#ifndef SERIALGC
+ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+#endif // !SERIALGC
 
 void instanceKlass::iterate_static_fields(OopClosure* closure) {
     InstanceKlass_OOP_ITERATE( \
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -655,13 +655,21 @@
     return oop_oop_iterate_v_m(obj, blk, mr);
   }
 
-#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)   \
-  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);        \
-  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,     \
+#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)      \
+  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);           \
+  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,        \
                                       MemRegion mr);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \
+  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   void iterate_static_fields(OopClosure* closure);
   void iterate_static_fields(OopClosure* closure, MemRegion mr);
--- a/hotspot/src/share/vm/oops/instanceRefKlass.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -176,6 +176,11 @@
 }
 
 #define InstanceRefKlass_SPECIALIZED_OOP_ITERATE(T, nv_suffix, contains)        \
+  if (closure->apply_to_weak_ref_discovered_field()) {                          \
+    T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);           \
+    closure->do_oop##nv_suffix(disc_addr);                                      \
+  }                                                                             \
+                                                                                \
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);           \
   oop referent = oopDesc::load_decode_heap_oop(referent_addr);                  \
   if (referent != NULL && contains(referent_addr)) {                            \
@@ -219,6 +224,25 @@
   }                                                                             \
 }
 
+#ifndef SERIALGC
+#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \
+                                                                                \
+int instanceRefKlass::                                                          \
+oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {        \
+  /* Get size before changing pointers */                                       \
+  SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::irk);\
+                                                                                \
+  int size = instanceKlass::oop_oop_iterate_backwards##nv_suffix(obj, closure); \
+                                                                                \
+  if (UseCompressedOops) {                                                      \
+    InstanceRefKlass_SPECIALIZED_OOP_ITERATE(narrowOop, nv_suffix, contains);   \
+  } else {                                                                      \
+    InstanceRefKlass_SPECIALIZED_OOP_ITERATE(oop, nv_suffix, contains);         \
+  }                                                                             \
+}
+#endif // !SERIALGC
+
+
 #define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)      \
                                                                                 \
 int instanceRefKlass::                                                          \
@@ -236,9 +260,13 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
+#ifndef SERIALGC
+ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+#endif // SERIALGC
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
 
 #ifndef SERIALGC
 template <class T>
@@ -423,7 +451,7 @@
   // Verify next field
   oop next = java_lang_ref_Reference::next(obj);
   if (next != NULL) {
-    guarantee(next->is_oop(), "next field verify fa iled");
+    guarantee(next->is_oop(), "next field verify failed");
     guarantee(next->is_instanceRef(), "next field verify failed");
     if (gch != NULL && !gch->is_in_youngest(obj)) {
       // We do a specific remembered set check here since the next field is
--- a/hotspot/src/share/vm/oops/instanceRefKlass.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -72,7 +72,15 @@
   int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)      \
+  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock);
   static void acquire_pending_list_lock(BasicLock *pending_list_basic_lock);
--- a/hotspot/src/share/vm/oops/klass.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/klass.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -134,14 +134,14 @@
   // Every subclass on which vtbl_value is called must include this macro.
   // Delay the installation of the klassKlass pointer until after the
   // the vtable for a new klass has been installed (after the call to new()).
-#define DEFINE_ALLOCATE_PERMANENT(thisKlass) \
+#define DEFINE_ALLOCATE_PERMANENT(thisKlass)                                  \
   void* allocate_permanent(KlassHandle& klass_klass, int size, TRAPS) const { \
-    void* result = new(klass_klass, size, THREAD) thisKlass(); \
-    if (HAS_PENDING_EXCEPTION) return NULL;                    \
-    klassOop new_klass = ((Klass*) result)->as_klassOop();      \
-    OrderAccess::storestore();  \
-    post_new_init_klass(klass_klass, new_klass, size);  \
-    return result;      \
+    void* result = new(klass_klass, size, THREAD) thisKlass();                \
+    if (HAS_PENDING_EXCEPTION) return NULL;                                   \
+    klassOop new_klass = ((Klass*) result)->as_klassOop();                    \
+    OrderAccess::storestore();                                                \
+    post_new_init_klass(klass_klass, new_klass, size);                        \
+    return result;                                                            \
   }
 
   bool null_vtbl() { return *(intptr_t*)this == 0; }
@@ -694,6 +694,14 @@
     return oop_oop_iterate(obj, blk);
   }
 
+#ifndef SERIALGC
+  // In case we don't have a specialized backward scanner use forward
+  // iteration.
+  virtual int oop_oop_iterate_backwards_v(oop obj, OopClosure* blk) {
+    return oop_oop_iterate_v(obj, blk);
+  }
+#endif // !SERIALGC
+
   // Iterates "blk" over all the oops in "obj" (of type "this") within "mr".
   // (I don't see why the _m should be required, but without it the Solaris
   // C++ gives warning messages about overridings of the "oop_oop_iterate"
@@ -722,7 +730,19 @@
   }
 
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL)
-  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(Klass_OOP_OOP_ITERATE_DECL)
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)      \
+  virtual int oop_oop_iterate_backwards##nv_suffix(oop obj,                  \
+                                                   OopClosureType* blk) {    \
+    /* Default implementation reverts to general version. */                 \
+    return oop_oop_iterate_backwards_v(obj, blk);                            \
+  }
+
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   virtual void array_klasses_do(void f(klassOop k)) {}
   virtual void with_array_klasses_do(void f(klassOop k));
--- a/hotspot/src/share/vm/oops/markOop.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/markOop.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -222,11 +222,7 @@
   static markOop INFLATING() { return (markOop) 0; }    // inflate-in-progress
 
   // Should this header be preserved during GC?
-  bool must_be_preserved(oop obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (!is_unlocked() || !has_no_hash());
-    return must_be_preserved_with_bias(obj_containing_mark);
-  }
+  inline bool must_be_preserved(oop obj_containing_mark) const;
   inline bool must_be_preserved_with_bias(oop obj_containing_mark) const;
 
   // Should this header (including its age bits) be preserved in the
@@ -246,22 +242,14 @@
   // observation is that promotion failures are quite rare and
   // reducing the number of mark words preserved during them isn't a
   // high priority.
-  bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (this != prototype());
-    return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark);
-  }
+  inline bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const;
   inline bool must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const;
 
   // Should this header be preserved during a scavenge where CMS is
   // the old generation?
   // (This is basically the same body as must_be_preserved_for_promotion_failure(),
   // but takes the klassOop as argument instead)
-  bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (this != prototype());
-    return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark);
-  }
+  inline bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const;
   inline bool must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const;
 
   // WARNING: The following routines are used EXCLUSIVELY by
--- a/hotspot/src/share/vm/oops/markOop.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/markOop.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -39,6 +39,12 @@
   return (!is_unlocked() || !has_no_hash());
 }
 
+inline bool markOopDesc::must_be_preserved(oop obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (!is_unlocked() || !has_no_hash());
+  return must_be_preserved_with_bias(obj_containing_mark);
+}
+
 // Should this header (including its age bits) be preserved in the
 // case of a promotion failure during scavenge?
 inline bool markOopDesc::must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const {
@@ -59,6 +65,13 @@
   return (this != prototype());
 }
 
+inline bool markOopDesc::must_be_preserved_for_promotion_failure(oop obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (this != prototype());
+  return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark);
+}
+
+
 // Should this header (including its age bits) be preserved in the
 // case of a scavenge in which CMS is the old generation?
 inline bool markOopDesc::must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
@@ -70,6 +83,11 @@
   }
   return (this != prototype());
 }
+inline bool markOopDesc::must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (this != prototype());
+  return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark);
+}
 
 inline markOop markOopDesc::prototype_for_object(oop obj) {
 #ifdef ASSERT
--- a/hotspot/src/share/vm/oops/objArrayKlass.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -86,14 +86,18 @@
 
   const size_t word_len = objArrayOopDesc::array_size(length);
 
-  // For performance reasons, we assume we are using a card marking write
-  // barrier. The assert will fail if this is not the case.
   BarrierSet* bs = Universe::heap()->barrier_set();
+  // For performance reasons, we assume we are that the write barrier we
+  // are using has optimized modes for arrays of references.  At least one
+  // of the asserts below will fail if this is not the case.
   assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
+  assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");
 
+  MemRegion dst_mr = MemRegion((HeapWord*)dst, word_len);
   if (s == d) {
     // since source and destination are equal we do not need conversion checks.
     assert(length > 0, "sanity check");
+    bs->write_ref_array_pre(dst_mr);
     Copy::conjoint_oops_atomic(src, dst, length);
   } else {
     // We have to make sure all elements conform to the destination array
@@ -101,6 +105,7 @@
     klassOop stype = objArrayKlass::cast(s->klass())->element_klass();
     if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) {
       // elements are guaranteed to be subtypes, so no check necessary
+      bs->write_ref_array_pre(dst_mr);
       Copy::conjoint_oops_atomic(src, dst, length);
     } else {
       // slow case: need individual subtype checks
@@ -110,8 +115,13 @@
       for (T* p = dst; from < end; from++, p++) {
         // XXX this is going to be slow.
         T element = *from;
-        if (oopDesc::is_null(element) ||
-            Klass::cast(oopDesc::decode_heap_oop_not_null(element)->klass())->is_subtype_of(bound)) {
+        // even slower now
+        bool element_is_null = oopDesc::is_null(element);
+        oop new_val = element_is_null ? oop(NULL)
+                                      : oopDesc::decode_heap_oop_not_null(element);
+        if (element_is_null ||
+            Klass::cast((new_val->klass()))->is_subtype_of(bound)) {
+          bs->write_ref_field_pre(p, new_val);
           *p = *from;
         } else {
           // We must do a barrier to cover the partial copy.
@@ -401,11 +411,11 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
 
 int objArrayKlass::oop_adjust_pointers(oop obj) {
   assert(obj->is_objArray(), "obj must be obj array");
@@ -465,8 +475,8 @@
     assert(Universe::is_bootstrapping(), "partial objArray only at startup");
     return JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC;
   }
-  // Recurse down the element list
-  jint element_flags = Klass::cast(element_klass())->compute_modifier_flags(CHECK_0);
+  // Return the flags of the bottom element type.
+  jint element_flags = Klass::cast(bottom_klass())->compute_modifier_flags(CHECK_0);
 
   return (element_flags & (JVM_ACC_PUBLIC | JVM_ACC_PRIVATE | JVM_ACC_PROTECTED))
                         | (JVM_ACC_ABSTRACT | JVM_ACC_FINAL);
--- a/hotspot/src/share/vm/oops/objArrayKlass.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/objArrayKlass.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -111,7 +111,7 @@
                                      int start, int end);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
 
   // JVM support
   jint compute_modifier_flags(TRAPS) const;
--- a/hotspot/src/share/vm/oops/objArrayOop.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/objArrayOop.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -33,4 +33,4 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DEFN)
--- a/hotspot/src/share/vm/oops/objArrayOop.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/objArrayOop.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -29,6 +29,8 @@
   friend class objArrayKlass;
   friend class Runtime1;
   friend class psPromotionManager;
+  friend class CSMarkOopClosure;
+  friend class G1ParScanPartialArrayClosure;
 
   template <class T> T* obj_at_addr(int index) const {
     assert(is_within_bounds(index), "index out of bounds");
@@ -88,5 +90,5 @@
   int oop_iterate_range(OopClosureType* blk, int start, int end);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DECL)
 };
--- a/hotspot/src/share/vm/oops/oop.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/oop.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -363,12 +363,21 @@
   static void set_bs(BarrierSet* bs) { _bs = bs; }
 
   // iterators, returns size of object
-#define OOP_ITERATE_DECL(OopClosureType, nv_suffix)                             \
+#define OOP_ITERATE_DECL(OopClosureType, nv_suffix)                      \
   int oop_iterate(OopClosureType* blk);                                  \
   int oop_iterate(OopClosureType* blk, MemRegion mr);  // Only in mr.
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+
+#define OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)            \
+  int oop_iterate_backwards(OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DECL)
+#endif
 
   void oop_iterate_header(OopClosure* blk);
   void oop_iterate_header(OopClosure* blk, MemRegion mr);
--- a/hotspot/src/share/vm/oops/oop.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -395,10 +395,11 @@
       s = (int)((size_t)round_to(size_in_bytes, MinObjAlignmentInBytes) /
         HeapWordSize);
 
-      // UseParNewGC can change the length field of an "old copy" of an object
-      // array in the young gen so it indicates the stealable portion of
-      // an already copied array. This will cause the first disjunct below
-      // to fail if the sizes are computed across such a concurrent change.
+      // UseParNewGC, UseParallelGC and UseG1GC can change the length field
+      // of an "old copy" of an object array in the young gen so it indicates
+      // the grey portion of an already copied array. This will cause the first
+      // disjunct below to fail if the two comparands are computed across such
+      // a concurrent change.
       // UseParNewGC also runs with promotion labs (which look like int
       // filler arrays) which are subject to changing their declared size
       // when finally retiring a PLAB; this also can cause the first disjunct
@@ -408,13 +409,11 @@
       //     is_objArray() && is_forwarded()   // covers first scenario above
       //  || is_typeArray()                    // covers second scenario above
       // If and when UseParallelGC uses the same obj array oop stealing/chunking
-      // technique, or when G1 is integrated (and currently uses this array chunking
-      // technique) we will need to suitably modify the assertion.
+      // technique, we will need to suitably modify the assertion.
       assert((s == klass->oop_size(this)) ||
-             (((UseParNewGC || UseParallelGC) &&
-              Universe::heap()->is_gc_active()) &&
-              (is_typeArray() ||
-               (is_objArray() && is_forwarded()))),
+             (Universe::heap()->is_gc_active() &&
+              ((is_typeArray() && UseParNewGC) ||
+               (is_objArray()  && is_forwarded() && (UseParNewGC || UseParallelGC || UseG1GC)))),
              "wrong array object size");
     } else {
       // Must be zero, so bite the bullet and take the virtual call.
@@ -441,16 +440,22 @@
   oopDesc::bs()->write_ref_field(p, v);
 }
 
+inline void update_barrier_set_pre(void* p, oop v) {
+  oopDesc::bs()->write_ref_field_pre(p, v);
+}
+
 template <class T> inline void oop_store(T* p, oop v) {
   if (always_do_update_barrier) {
     oop_store((volatile T*)p, v);
   } else {
+    update_barrier_set_pre(p, v);
     oopDesc::encode_store_heap_oop(p, v);
     update_barrier_set(p, v);
   }
 }
 
 template <class T> inline void oop_store(volatile T* p, oop v) {
+  update_barrier_set_pre((void*)p, v);
   // Used by release_obj_field_put, so use release_store_ptr.
   oopDesc::release_encode_store_heap_oop(p, v);
   update_barrier_set((void*)p, v);
@@ -698,8 +703,19 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DEFN)
 
+#ifndef SERIALGC
+#define OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
+                                                                           \
+inline int oopDesc::oop_iterate_backwards(OopClosureType* blk) {           \
+  SpecializationStats::record_call();                                      \
+  return blueprint()->oop_oop_iterate_backwards##nv_suffix(this, blk);     \
+}
+
+ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DEFN)
+#endif // !SERIALGC
 
 inline bool oopDesc::is_shared() const {
   return CompactingPermGenGen::is_shared(this);
--- a/hotspot/src/share/vm/opto/addnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/addnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -156,7 +156,8 @@
   if( add1_op == this_op && !con_right ) {
     Node *a12 = add1->in(2);
     const Type *t12 = phase->type( a12 );
-    if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) ) {
+    if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) &&
+       !(add1->in(1)->is_Phi() && add1->in(1)->as_Phi()->is_tripcount()) ) {
       assert(add1->in(1) != this, "dead loop in AddNode::Ideal");
       add2 = add1->clone();
       add2->set_req(2, in(2));
@@ -173,7 +174,8 @@
   if( add2_op == this_op && !con_left ) {
     Node *a22 = add2->in(2);
     const Type *t22 = phase->type( a22 );
-    if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) ) {
+    if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) &&
+       !(add2->in(1)->is_Phi() && add2->in(1)->as_Phi()->is_tripcount()) ) {
       assert(add2->in(1) != this, "dead loop in AddNode::Ideal");
       Node *addx = add2->clone();
       addx->set_req(1, in(1));
@@ -225,34 +227,63 @@
 //=============================================================================
 //------------------------------Idealize---------------------------------------
 Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  int op1 = in(1)->Opcode();
-  int op2 = in(2)->Opcode();
+  Node* in1 = in(1);
+  Node* in2 = in(2);
+  int op1 = in1->Opcode();
+  int op2 = in2->Opcode();
   // Fold (con1-x)+con2 into (con1+con2)-x
+  if ( op1 == Op_AddI && op2 == Op_SubI ) {
+    // Swap edges to try optimizations below
+    in1 = in2;
+    in2 = in(1);
+    op1 = op2;
+    op2 = in2->Opcode();
+  }
   if( op1 == Op_SubI ) {
-    const Type *t_sub1 = phase->type( in(1)->in(1) );
-    const Type *t_2    = phase->type( in(2)        );
+    const Type *t_sub1 = phase->type( in1->in(1) );
+    const Type *t_2    = phase->type( in2        );
     if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
       return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ),
-                              in(1)->in(2) );
+                              in1->in(2) );
     // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
     if( op2 == Op_SubI ) {
       // Check for dead cycle: d = (a-b)+(c-d)
-      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+      assert( in1->in(2) != this && in2->in(2) != this,
               "dead loop in AddINode::Ideal" );
       Node *sub  = new (phase->C, 3) SubINode(NULL, NULL);
-      sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in(1)->in(1), in(2)->in(1) ) ));
-      sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in(1)->in(2), in(2)->in(2) ) ));
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in1->in(1), in2->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in1->in(2), in2->in(2) ) ));
       return sub;
     }
+    // Convert "(a-b)+(b+c)" into "(a+c)"
+    if( op2 == Op_AddI && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) AddINode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c+b)" into "(a+c)"
+    if( op2 == Op_AddI && in1->in(2) == in2->in(2) ) {
+      assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) AddINode(in1->in(1), in2->in(1));
+    }
+    // Convert "(a-b)+(b-c)" into "(a-c)"
+    if( op2 == Op_SubI && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) SubINode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c-a)" into "(c-b)"
+    if( op2 == Op_SubI && in1->in(1) == in2->in(2) ) {
+      assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) SubINode(in2->in(1), in1->in(2));
+    }
   }
 
   // Convert "x+(0-y)" into "(x-y)"
-  if( op2 == Op_SubI && phase->type(in(2)->in(1)) == TypeInt::ZERO )
-    return new (phase->C, 3) SubINode(in(1), in(2)->in(2) );
+  if( op2 == Op_SubI && phase->type(in2->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode(in1, in2->in(2) );
 
   // Convert "(0-y)+x" into "(x-y)"
-  if( op1 == Op_SubI && phase->type(in(1)->in(1)) == TypeInt::ZERO )
-    return new (phase->C, 3) SubINode( in(2), in(1)->in(2) );
+  if( op1 == Op_SubI && phase->type(in1->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode( in2, in1->in(2) );
 
   // Convert (x>>>z)+y into (x+(y<<z))>>>z for small constant z and y.
   // Helps with array allocation math constant folding
@@ -266,15 +297,15 @@
   // Have not observed cases where type information exists to support
   // positive y and (x <= -(y << z))
   if( op1 == Op_URShiftI && op2 == Op_ConI &&
-      in(1)->in(2)->Opcode() == Op_ConI ) {
-    jint z = phase->type( in(1)->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
-    jint y = phase->type( in(2) )->is_int()->get_con();
+      in1->in(2)->Opcode() == Op_ConI ) {
+    jint z = phase->type( in1->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
+    jint y = phase->type( in2 )->is_int()->get_con();
 
     if( z < 5 && -5 < y && y < 0 ) {
-      const Type *t_in11 = phase->type(in(1)->in(1));
+      const Type *t_in11 = phase->type(in1->in(1));
       if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) {
-        Node *a = phase->transform( new (phase->C, 3) AddINode( in(1)->in(1), phase->intcon(y<<z) ) );
-        return new (phase->C, 3) URShiftINode( a, in(1)->in(2) );
+        Node *a = phase->transform( new (phase->C, 3) AddINode( in1->in(1), phase->intcon(y<<z) ) );
+        return new (phase->C, 3) URShiftINode( a, in1->in(2) );
       }
     }
   }
@@ -328,39 +359,73 @@
 //=============================================================================
 //------------------------------Idealize---------------------------------------
 Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  int op1 = in(1)->Opcode();
-  int op2 = in(2)->Opcode();
+  Node* in1 = in(1);
+  Node* in2 = in(2);
+  int op1 = in1->Opcode();
+  int op2 = in2->Opcode();
+  // Fold (con1-x)+con2 into (con1+con2)-x
+  if ( op1 == Op_AddL && op2 == Op_SubL ) {
+    // Swap edges to try optimizations below
+    in1 = in2;
+    in2 = in(1);
+    op1 = op2;
+    op2 = in2->Opcode();
+  }
   // Fold (con1-x)+con2 into (con1+con2)-x
   if( op1 == Op_SubL ) {
-    const Type *t_sub1 = phase->type( in(1)->in(1) );
-    const Type *t_2    = phase->type( in(2)        );
+    const Type *t_sub1 = phase->type( in1->in(1) );
+    const Type *t_2    = phase->type( in2        );
     if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
       return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ),
-                              in(1)->in(2) );
+                              in1->in(2) );
     // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
     if( op2 == Op_SubL ) {
       // Check for dead cycle: d = (a-b)+(c-d)
-      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+      assert( in1->in(2) != this && in2->in(2) != this,
               "dead loop in AddLNode::Ideal" );
       Node *sub  = new (phase->C, 3) SubLNode(NULL, NULL);
-      sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(1), in(2)->in(1) ) ));
-      sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(2), in(2)->in(2) ) ));
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in1->in(1), in2->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in1->in(2), in2->in(2) ) ));
       return sub;
     }
+    // Convert "(a-b)+(b+c)" into "(a+c)"
+    if( op2 == Op_AddL && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) AddLNode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c+b)" into "(a+c)"
+    if( op2 == Op_AddL && in1->in(2) == in2->in(2) ) {
+      assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) AddLNode(in1->in(1), in2->in(1));
+    }
+    // Convert "(a-b)+(b-c)" into "(a-c)"
+    if( op2 == Op_SubL && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) SubLNode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c-a)" into "(c-b)"
+    if( op2 == Op_SubL && in1->in(1) == in1->in(2) ) {
+      assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) SubLNode(in2->in(1), in1->in(2));
+    }
   }
 
   // Convert "x+(0-y)" into "(x-y)"
-  if( op2 == Op_SubL && phase->type(in(2)->in(1)) == TypeLong::ZERO )
-    return new (phase->C, 3) SubLNode(in(1), in(2)->in(2) );
+  if( op2 == Op_SubL && phase->type(in2->in(1)) == TypeLong::ZERO )
+    return new (phase->C, 3) SubLNode( in1, in2->in(2) );
+
+  // Convert "(0-y)+x" into "(x-y)"
+  if( op1 == Op_SubL && phase->type(in1->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubLNode( in2, in1->in(2) );
 
   // Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)"
   // into "(X<<1)+Y" and let shift-folding happen.
   if( op2 == Op_AddL &&
-      in(2)->in(1) == in(1) &&
+      in2->in(1) == in1 &&
       op1 != Op_ConL &&
       0 ) {
-    Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in(1),phase->intcon(1)));
-    return new (phase->C, 3) AddLNode(shift,in(2)->in(2));
+    Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in1,phase->intcon(1)));
+    return new (phase->C, 3) AddLNode(shift,in2->in(2));
   }
 
   return AddNode::Ideal(phase, can_reshape);
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -25,19 +25,6 @@
 #include "incls/_precompiled.incl"
 #include "incls/_bytecodeInfo.cpp.incl"
 
-// These variables are declared in parse1.cpp
-extern int  explicit_null_checks_inserted;
-extern int  explicit_null_checks_elided;
-extern int  explicit_null_checks_inserted_old;
-extern int  explicit_null_checks_elided_old;
-extern int  nodes_created_old;
-extern int  nodes_created;
-extern int  methods_parsed_old;
-extern int  methods_parsed;
-extern int  methods_seen;
-extern int  methods_seen_old;
-
-
 //=============================================================================
 //------------------------------InlineTree-------------------------------------
 InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
@@ -517,27 +504,3 @@
   }
   return iltp;
 }
-
-// ----------------------------------------------------------------------------
-#ifndef PRODUCT
-
-static void per_method_stats() {
-  // Compute difference between this method's cumulative totals and old totals
-  int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
-  int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
-
-  // Print differences
-  if( explicit_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
-  if( elided_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
-
-  // Store the current cumulative totals
-  nodes_created_old = nodes_created;
-  methods_parsed_old = methods_parsed;
-  methods_seen_old = methods_seen;
-  explicit_null_checks_inserted_old = explicit_null_checks_inserted;
-  explicit_null_checks_elided_old = explicit_null_checks_elided;
-}
-
-#endif
--- a/hotspot/src/share/vm/opto/callnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1034,6 +1034,39 @@
 //=============================================================================
 uint AllocateArrayNode::size_of() const { return sizeof(*this); }
 
+// Retrieve the length from the AllocateArrayNode. Narrow the type with a
+// CastII, if appropriate.  If we are not allowed to create new nodes, and
+// a CastII is appropriate, return NULL.
+Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) {
+  Node *length = in(AllocateNode::ALength);
+  assert(length != NULL, "length is not null");
+
+  const TypeInt* length_type = phase->find_int_type(length);
+  const TypeAryPtr* ary_type = oop_type->isa_aryptr();
+
+  if (ary_type != NULL && length_type != NULL) {
+    const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type);
+    if (narrow_length_type != length_type) {
+      // Assert one of:
+      //   - the narrow_length is 0
+      //   - the narrow_length is not wider than length
+      assert(narrow_length_type == TypeInt::ZERO ||
+             (narrow_length_type->_hi <= length_type->_hi &&
+              narrow_length_type->_lo >= length_type->_lo),
+             "narrow type must be narrower than length type");
+
+      // Return NULL if new nodes are not allowed
+      if (!allow_new_nodes) return NULL;
+      // Create a cast which is control dependent on the initialization to
+      // propagate the fact that the array length must be positive.
+      length = new (phase->C, 2) CastIINode(length, narrow_length_type);
+      length->set_req(0, initialization()->proj_out(0));
+    }
+  }
+
+  return length;
+}
+
 //=============================================================================
 uint LockNode::size_of() const { return sizeof(*this); }
 
--- a/hotspot/src/share/vm/opto/callnode.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/callnode.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -755,6 +755,15 @@
   virtual int Opcode() const;
   virtual uint size_of() const; // Size is bigger
 
+  // Dig the length operand out of a array allocation site.
+  Node* Ideal_length() {
+    return in(AllocateNode::ALength);
+  }
+
+  // Dig the length operand out of a array allocation site and narrow the
+  // type with a CastII, if necesssary
+  Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true);
+
   // Pattern-match a possible usage of AllocateArrayNode.
   // Return null if no allocation is recognized.
   static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) {
@@ -762,12 +771,6 @@
     return (allo == NULL || !allo->is_AllocateArray())
            ? NULL : allo->as_AllocateArray();
   }
-
-  // Dig the length operand out of a (possible) array allocation site.
-  static Node* Ideal_length(Node* ptr, PhaseTransform* phase) {
-    AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase);
-    return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength);
-  }
 };
 
 //------------------------------AbstractLockNode-----------------------------------
--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1665,7 +1665,11 @@
             // compress paths and change unreachable cycles to TOP
             // If not, we can update the input infinitely along a MergeMem cycle
             // Equivalent code is in MemNode::Ideal_common
-            Node         *m  = phase->transform(n);
+            Node *m  = phase->transform(n);
+            if (outcnt() == 0) {  // Above transform() may kill us!
+              progress = phase->C->top();
+              break;
+            }
             // If tranformed to a MergeMem, get the desired slice
             // Otherwise the returned node represents memory for every slice
             Node *new_mem = (m->is_MergeMem()) ?
@@ -1765,9 +1769,60 @@
     }
   }
 
+#ifdef _LP64
+  // Push DecodeN down through phi.
+  // The rest of phi graph will transform by split EncodeP node though phis up.
+  if (UseCompressedOops && can_reshape && progress == NULL) {
+    bool may_push = true;
+    bool has_decodeN = false;
+    Node* in_decodeN = NULL;
+    for (uint i=1; i<req(); ++i) {// For all paths in
+      Node *ii = in(i);
+      if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
+        has_decodeN = true;
+        in_decodeN = ii->in(1);
+      } else if (!ii->is_Phi()) {
+        may_push = false;
+      }
+    }
+
+    if (has_decodeN && may_push) {
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      // Note: in_decodeN is used only to define the type of new phi here.
+      PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
+      uint orig_cnt = req();
+      for (uint i=1; i<req(); ++i) {// For all paths in
+        Node *ii = in(i);
+        Node* new_ii = NULL;
+        if (ii->is_DecodeN()) {
+          assert(ii->bottom_type() == bottom_type(), "sanity");
+          new_ii = ii->in(1);
+        } else {
+          assert(ii->is_Phi(), "sanity");
+          if (ii->as_Phi() == this) {
+            new_ii = new_phi;
+          } else {
+            new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type());
+            igvn->register_new_node_with_optimizer(new_ii);
+          }
+        }
+        new_phi->set_req(i, new_ii);
+      }
+      igvn->register_new_node_with_optimizer(new_phi, this);
+      progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type());
+    }
+  }
+#endif
+
   return progress;              // Return any progress
 }
 
+//------------------------------is_tripcount-----------------------------------
+bool PhiNode::is_tripcount() const {
+  return (in(0) != NULL && in(0)->is_CountedLoop() &&
+          in(0)->as_CountedLoop()->phi() == this);
+}
+
 //------------------------------out_RegMask------------------------------------
 const RegMask &PhiNode::in_RegMask(uint i) const {
   return i ? out_RegMask() : RegMask::Empty;
@@ -1783,9 +1838,7 @@
 #ifndef PRODUCT
 void PhiNode::dump_spec(outputStream *st) const {
   TypeNode::dump_spec(st);
-  if (in(0) != NULL &&
-      in(0)->is_CountedLoop() &&
-      in(0)->as_CountedLoop()->phi() == this) {
+  if (is_tripcount()) {
     st->print(" #tripcount");
   }
 }
--- a/hotspot/src/share/vm/opto/cfgnode.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/cfgnode.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -162,6 +162,8 @@
     return NULL;  // not a copy!
   }
 
+  bool is_tripcount() const;
+
   // Determine a unique non-trivial input, if any.
   // Ignore casts if it helps.  Return NULL on failure.
   Node* unique_input(PhaseTransform *phase);
--- a/hotspot/src/share/vm/opto/compile.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/compile.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -467,6 +467,7 @@
     }
   }
   set_print_assembly(print_opto_assembly);
+  set_parsed_irreducible_loop(false);
 #endif
 
   if (ProfileTraps) {
@@ -550,6 +551,8 @@
       rethrow_exceptions(kit.transfer_exceptions_into_jvms());
     }
 
+    print_method("Before RemoveUseless");
+
     // Remove clutter produced by parsing.
     if (!failing()) {
       ResourceMark rm;
@@ -615,8 +618,6 @@
   if (failing())  return;
   NOT_PRODUCT( verify_graph_edges(); )
 
-  print_method("Before Matching");
-
 #ifndef PRODUCT
   if (PrintIdeal) {
     ttyLocker ttyl;  // keep the following output all in one block
@@ -720,6 +721,7 @@
   TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
   TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
   set_print_assembly(PrintFrameConverterAssembly);
+  set_parsed_irreducible_loop(false);
 #endif
   CompileWrapper cw(this);
   Init(/*AliasLevel=*/ 0);
@@ -2073,6 +2075,44 @@
   }
 
 #ifdef _LP64
+  case Op_CastPP:
+    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
+      Compile* C = Compile::current();
+      Node* in1 = n->in(1);
+      const Type* t = n->bottom_type();
+      Node* new_in1 = in1->clone();
+      new_in1->as_DecodeN()->set_type(t);
+
+      if (!Matcher::clone_shift_expressions) {
+        //
+        // x86, ARM and friends can handle 2 adds in addressing mode
+        // and Matcher can fold a DecodeN node into address by using
+        // a narrow oop directly and do implicit NULL check in address:
+        //
+        // [R12 + narrow_oop_reg<<3 + offset]
+        // NullCheck narrow_oop_reg
+        //
+        // On other platforms (Sparc) we have to keep new DecodeN node and
+        // use it to do implicit NULL check in address:
+        //
+        // decode_not_null narrow_oop_reg, base_reg
+        // [base_reg + offset]
+        // NullCheck base_reg
+        //
+        // Pin the new DecodeN node to non-null path on these patforms (Sparc)
+        // to keep the information to which NULL check the new DecodeN node
+        // corresponds to use it as value in implicit_null_check().
+        //
+        new_in1->set_req(0, n->in(0));
+      }
+
+      n->subsume_by(new_in1);
+      if (in1->outcnt() == 0) {
+        in1->disconnect_inputs(NULL);
+      }
+    }
+    break;
+
   case Op_CmpP:
     // Do this transformation here to preserve CmpPNode::sub() and
     // other TypePtr related Ideal optimizations (for example, ptr nullness).
@@ -2092,24 +2132,44 @@
       } else if (in2->Opcode() == Op_ConP) {
         const Type* t = in2->bottom_type();
         if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
-          if (Matcher::clone_shift_expressions) {
-            // x86, ARM and friends can handle 2 adds in addressing mode.
-            // Decode a narrow oop and do implicit NULL check in address
-            // [R12 + narrow_oop_reg<<3 + offset]
-            new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
-          } else {
-            // Don't replace CmpP(o ,null) if 'o' is used in AddP
-            // to generate implicit NULL check on Sparc where
-            // narrow oops can't be used in address.
-            uint i = 0;
-            for (; i < in1->outcnt(); i++) {
-              if (in1->raw_out(i)->is_AddP())
-                break;
-            }
-            if (i >= in1->outcnt()) {
-              new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
-            }
-          }
+          new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+          //
+          // This transformation together with CastPP transformation above
+          // will generated code for implicit NULL checks for compressed oops.
+          //
+          // The original code after Optimize()
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    decode narrow_oop_reg, base_reg
+          //    CmpP base_reg, NULL
+          //    CastPP base_reg // NotNull
+          //    Load [base_reg + offset], val_reg
+          //
+          // after these transformations will be
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    CmpN narrow_oop_reg, NULL
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //
+          // and the uncommon path (== NULL) will use narrow_oop_reg directly
+          // since narrow oops can be used in debug info now (see the code in
+          // final_graph_reshaping_walk()).
+          //
+          // At the end the code will be matched to
+          // on x86:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
+          //    NullCheck narrow_oop_reg
+          //
+          // and on sparc:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //    NullCheck base_reg
+          //
         } else if (t->isa_oopptr()) {
           new_in2 = ConNode::make(C, t->make_narrowoop());
         }
@@ -2126,6 +2186,49 @@
       }
     }
     break;
+
+  case Op_DecodeN:
+    assert(!n->in(1)->is_EncodeP(), "should be optimized out");
+    break;
+
+  case Op_EncodeP: {
+    Node* in1 = n->in(1);
+    if (in1->is_DecodeN()) {
+      n->subsume_by(in1->in(1));
+    } else if (in1->Opcode() == Op_ConP) {
+      Compile* C = Compile::current();
+      const Type* t = in1->bottom_type();
+      if (t == TypePtr::NULL_PTR) {
+        n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
+      } else if (t->isa_oopptr()) {
+        n->subsume_by(ConNode::make(C, t->make_narrowoop()));
+      }
+    }
+    if (in1->outcnt() == 0) {
+      in1->disconnect_inputs(NULL);
+    }
+    break;
+  }
+
+  case Op_Phi:
+    if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
+      // The EncodeP optimization may create Phi with the same edges
+      // for all paths. It is not handled well by Register Allocator.
+      Node* unique_in = n->in(1);
+      assert(unique_in != NULL, "");
+      uint cnt = n->req();
+      for (uint i = 2; i < cnt; i++) {
+        Node* m = n->in(i);
+        assert(m != NULL, "");
+        if (unique_in != m)
+          unique_in = NULL;
+      }
+      if (unique_in != NULL) {
+        n->subsume_by(unique_in);
+      }
+    }
+    break;
+
 #endif
 
   case Op_ModI:
--- a/hotspot/src/share/vm/opto/compile.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/compile.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -160,6 +160,7 @@
   bool                  _print_assembly;        // True if we should dump assembly code for this compilation
 #ifndef PRODUCT
   bool                  _trace_opto_output;
+  bool                  _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
 #endif
 
   // Compilation environment.
@@ -319,6 +320,8 @@
   }
 #ifndef PRODUCT
   bool          trace_opto_output() const       { return _trace_opto_output; }
+  bool              parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
+  void          set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
 #endif
 
   void begin_method() {
--- a/hotspot/src/share/vm/opto/connode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/connode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -433,8 +433,8 @@
 // If not converting int->oop, throw away cast after constant propagation
 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
   const Type *t = ccp->type(in(1));
-  if (!t->isa_oop_ptr()) {
-    return NULL;                // do not transform raw pointers
+  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
+    return NULL; // do not transform raw pointers or narrow oops
   }
   return ConstraintCastNode::Ideal_DU_postCCP(ccp);
 }
--- a/hotspot/src/share/vm/opto/divnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/divnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -110,10 +110,13 @@
     } else if( dividend->Opcode() == Op_AndI ) {
       // An AND mask of sufficient size clears the low bits and
       // I can avoid rounding.
-      const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int();
-      if( andconi && andconi->is_con(-d) ) {
-        dividend = dividend->in(1);
-        needs_rounding = false;
+      const TypeInt *andconi_t = phase->type( dividend->in(2) )->isa_int();
+      if( andconi_t && andconi_t->is_con() ) {
+        jint andconi = andconi_t->get_con();
+        if( andconi < 0 && is_power_of_2(-andconi) && (-andconi) >= d ) {
+          dividend = dividend->in(1);
+          needs_rounding = false;
+        }
       }
     }
 
@@ -316,10 +319,13 @@
     } else if( dividend->Opcode() == Op_AndL ) {
       // An AND mask of sufficient size clears the low bits and
       // I can avoid rounding.
-      const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long();
-      if( andconl && andconl->is_con(-d)) {
-        dividend = dividend->in(1);
-        needs_rounding = false;
+      const TypeLong *andconl_t = phase->type( dividend->in(2) )->isa_long();
+      if( andconl_t && andconl_t->is_con() ) {
+        jlong andconl = andconl_t->get_con();
+        if( andconl < 0 && is_power_of_2_long(-andconl) && (-andconl) >= d ) {
+          dividend = dividend->in(1);
+          needs_rounding = false;
+        }
       }
     }
 
@@ -704,11 +710,18 @@
   if( t2 == TypeD::ONE )
     return t1;
 
-  // If divisor is a constant and not zero, divide them numbers
-  if( t1->base() == Type::DoubleCon &&
-      t2->base() == Type::DoubleCon &&
-      t2->getd() != 0.0 ) // could be negative zero
-    return TypeD::make( t1->getd()/t2->getd() );
+#if defined(IA32)
+  if (!phase->C->method()->is_strict())
+    // Can't trust native compilers to properly fold strict double
+    // division with round-to-zero on this platform.
+#endif
+    {
+      // If divisor is a constant and not zero, divide them numbers
+      if( t1->base() == Type::DoubleCon &&
+          t2->base() == Type::DoubleCon &&
+          t2->getd() != 0.0 ) // could be negative zero
+        return TypeD::make( t1->getd()/t2->getd() );
+    }
 
   // If the dividend is a constant zero
   // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
--- a/hotspot/src/share/vm/opto/doCall.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/doCall.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -795,7 +795,7 @@
 
     ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
     if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
-        (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
+        (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) {
       // ikl is a same or better type than the original actual_receiver,
       // e.g. static receiver from bytecodes.
       actual_receiver = ikl;
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -587,7 +587,7 @@
 #ifdef ASSERT
   _bci    = kit->bci();
   Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
   _block  = block;
 #endif
 }
@@ -596,7 +596,7 @@
 #ifdef ASSERT
   assert(kit->bci() == _bci, "bci must not shift");
   Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
   assert(block == _block,    "block must not shift");
 #endif
   kit->set_map(_map);
@@ -1049,10 +1049,19 @@
 //-------------------------load_array_length-----------------------------------
 Node* GraphKit::load_array_length(Node* array) {
   // Special-case a fresh allocation to avoid building nodes:
-  Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn);
-  if (alen != NULL)  return alen;
-  Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
-  return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+  AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn);
+  Node *alen;
+  if (alloc == NULL) {
+    Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
+    alen = _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+  } else {
+    alen = alloc->Ideal_length();
+    Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_aryptr(), &_gvn);
+    if (ccast != alen) {
+      alen = _gvn.transform(ccast);
+    }
+  }
+  return alen;
 }
 
 //------------------------------do_null_check----------------------------------
@@ -1180,6 +1189,12 @@
   else
     reason = Deoptimization::Reason_div0_check;
 
+  // %%% Since Reason_unhandled is not recorded on a per-bytecode basis,
+  // ciMethodData::has_trap_at will return a conservative -1 if any
+  // must-be-null assertion has failed.  This could cause performance
+  // problems for a method after its first do_null_assert failure.
+  // Consider using 'Reason_class_check' instead?
+
   // To cause an implicit null check, we set the not-null probability
   // to the maximum (PROB_MAX).  For an explicit check the probablity
   // is set to a smaller value.
@@ -1367,6 +1382,10 @@
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+        g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      break;
 
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
@@ -1391,6 +1410,10 @@
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+        g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
+      break;
 
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
@@ -2833,20 +2856,18 @@
   assert(just_allocated_object(control()) == javaoop, "just allocated");
 
 #ifdef ASSERT
-  { // Verify that the AllocateNode::Ideal_foo recognizers work:
-    Node* kn = alloc->in(AllocateNode::KlassNode);
-    Node* ln = alloc->in(AllocateNode::ALength);
-    assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn,
-           "Ideal_klass works");
-    assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn,
-           "Ideal_klass works");
+  { // Verify that the AllocateNode::Ideal_allocation recognizers work:
+    assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc,
+           "Ideal_allocation works");
+    assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc,
+           "Ideal_allocation works");
     if (alloc->is_AllocateArray()) {
-      assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln,
-             "Ideal_length works");
-      assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln,
-             "Ideal_length works");
+      assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(),
+             "Ideal_allocation works");
+      assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(),
+             "Ideal_allocation works");
     } else {
-      assert(ln->is_top(), "no length, please");
+      assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please");
     }
   }
 #endif //ASSERT
@@ -3095,25 +3116,20 @@
   // (This happens via a non-constant argument to inline_native_newArray.)
   // In any case, the value of klass_node provides the desired array type.
   const TypeInt* length_type = _gvn.find_int_type(length);
-  const TypeInt* narrow_length_type = NULL;
   const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
   if (ary_type->isa_aryptr() && length_type != NULL) {
     // Try to get a better type than POS for the size
     ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
-    narrow_length_type = ary_type->is_aryptr()->size();
-    if (narrow_length_type == length_type)
-      narrow_length_type = NULL;
   }
 
   Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);
 
-  // Cast length on remaining path to be positive:
-  if (narrow_length_type != NULL) {
-    Node* ccast = new (C, 2) CastIINode(length, narrow_length_type);
-    ccast->set_req(0, control());
-    _gvn.set_type_bottom(ccast);
-    record_for_igvn(ccast);
-    if (map()->find_edge(length) >= 0) {
+  // Cast length on remaining path to be as narrow as possible
+  if (map()->find_edge(length) >= 0) {
+    Node* ccast = alloc->make_ideal_length(ary_type, &_gvn);
+    if (ccast != length) {
+      _gvn.set_type_bottom(ccast);
+      record_for_igvn(ccast);
       replace_in_map(length, ccast);
     }
   }
@@ -3177,3 +3193,251 @@
   }
   return NULL;
 }
+
+void GraphKit::g1_write_barrier_pre(Node* obj,
+                                    Node* adr,
+                                    uint alias_idx,
+                                    Node* val,
+                                    const Type* val_type,
+                                    BasicType bt) {
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+#define __ ideal.
+  __ declares_done();
+
+  Node* thread = __ thread();
+
+  Node* no_ctrl = NULL;
+  Node* no_base = __ top();
+  Node* zero = __ ConI(0);
+
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+
+  BasicType active_type = in_bytes(PtrQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
+  assert(in_bytes(PtrQueue::byte_width_of_active()) == 4 || in_bytes(PtrQueue::byte_width_of_active()) == 1, "flag width");
+
+  // Offsets into the thread
+  const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +  // 648
+                                          PtrQueue::byte_offset_of_active());
+  const int index_offset   = in_bytes(JavaThread::satb_mark_queue_offset() +  // 656
+                                          PtrQueue::byte_offset_of_index());
+  const int buffer_offset  = in_bytes(JavaThread::satb_mark_queue_offset() +  // 652
+                                          PtrQueue::byte_offset_of_buf());
+  // Now the actual pointers into the thread
+
+  // set_control( ctl);
+
+  Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset));
+  Node* buffer_adr  = __ AddP(no_base, thread, __ ConX(buffer_offset));
+  Node* index_adr   = __ AddP(no_base, thread, __ ConX(index_offset));
+
+  // Now some of the values
+
+  Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
+  Node* index   = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
+  Node* buffer  = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+  // if (!marking)
+  __ if_then(marking, BoolTest::ne, zero); {
+
+    const Type* t1 = adr->bottom_type();
+    const Type* t2 = val->bottom_type();
+
+    Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
+    // if (orig != NULL)
+    __ if_then(orig, BoolTest::ne, null()); {
+
+      // load original value
+      // alias_idx correct??
+
+      // is the queue for this thread full?
+      __ if_then(index, BoolTest::ne, zero, likely); {
+
+        // decrement the index
+        Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+        Node* next_indexX = next_index;
+#ifdef _LP64
+          // We could refine the type for what it's worth
+          // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+          next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif // _LP64
+
+        // Now get the buffer location we will log the original value into and store it
+
+        Node *log_addr = __ AddP(no_base, buffer, next_indexX);
+        // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM));
+        __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);
+
+
+        // update the index
+        // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
+        // This is a hack to force this store to occur before the oop store that is coming up
+        __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM));
+
+      } __ else_(); {
+
+        // logging buffer is full, call the runtime
+        const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
+        // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread);
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread);
+      } __ end_if();
+    } __ end_if();
+  } __ end_if();
+
+  __ drain_delay_transform();
+  set_control( __ ctrl());
+  set_all_memory( __ merged_memory());
+
+#undef __
+}
+
+//
+// Update the card table and add card address to the queue
+//
+void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) {
+#define __ ideal->
+  Node* zero = __ ConI(0);
+  Node* no_base = __ top();
+  BasicType card_bt = T_BYTE;
+  // Smash zero into card. MUST BE ORDERED WRT TO STORE
+  __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw);
+
+  //  Now do the queue work
+  __ if_then(index, BoolTest::ne, zero); {
+
+    Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+    Node* next_indexX = next_index;
+#ifdef _LP64
+    // We could refine the type for what it's worth
+    // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+    next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif // _LP64
+    Node* log_addr = __ AddP(no_base, buffer, next_indexX);
+
+    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw);
+    __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
+
+  } __ else_(); {
+    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
+  } __ end_if();
+#undef __
+}
+
+void GraphKit::g1_write_barrier_post(Node* store,
+                                     Node* obj,
+                                     Node* adr,
+                                     uint alias_idx,
+                                     Node* val,
+                                     BasicType bt,
+                                     bool use_precise) {
+  // If we are writing a NULL then we need no post barrier
+
+  if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
+    // Must be NULL
+    const Type* t = val->bottom_type();
+    assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");
+    // No post barrier if writing NULLx
+    return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+#define __ ideal.
+  __ declares_done();
+
+  Node* thread = __ thread();
+
+  Node* no_ctrl = NULL;
+  Node* no_base = __ top();
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+  Node* zero = __ ConI(0);
+  Node* zeroX = __ ConX(0);
+
+  // Get the alias_index for raw card-mark memory
+  const TypePtr* card_type = TypeRawPtr::BOTTOM;
+
+  const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();
+
+  // Get the address of the card table
+  CardTableModRefBS* ct =
+    (CardTableModRefBS*)(Universe::heap()->barrier_set());
+  Node *card_table = __ makecon(TypeRawPtr::make((address)ct->byte_map_base));
+  // Get base of card map
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+
+  // Offsets into the thread
+  const int index_offset  = in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_index());
+  const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_buf());
+
+  // Pointers into the thread
+
+  Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset));
+  Node* index_adr =  __ AddP(no_base, thread, __ ConX(index_offset));
+
+  // Now some values
+
+  Node* index  = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
+  Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+
+  // Convert the store obj pointer to an int prior to doing math on it
+  // Use addr not obj gets accurate card marks
+
+  // Node* cast = __ CastPX(no_ctrl, adr /* obj */);
+
+  // Must use ctrl to prevent "integerized oop" existing across safepoint
+  Node* cast =  __ CastPX(__ ctrl(), ( use_precise ? adr : obj ));
+
+  // Divide pointer by card size
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
+
+  // Combine card table base and card offset
+  Node *card_adr = __ AddP(no_base, card_table, card_offset );
+
+  // If we know the value being stored does it cross regions?
+
+  if (val != NULL) {
+    // Does the store cause us to cross regions?
+
+    // Should be able to do an unsigned compare of region_size instead of
+    // and extra shift. Do we have an unsigned compare??
+    // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
+    Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
+
+    // if (xor_res == 0) same region so skip
+    __ if_then(xor_res, BoolTest::ne, zeroX); {
+
+      // No barrier if we are storing a NULL
+      __ if_then(val, BoolTest::ne, null(), unlikely); {
+
+        // Ok must mark the card if not already dirty
+
+        // load the original value of the card
+        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+
+        __ if_then(card_val, BoolTest::ne, zero); {
+          g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+        } __ end_if();
+      } __ end_if();
+    } __ end_if();
+  } else {
+    g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+  }
+
+
+  __ drain_delay_transform();
+  set_control( __ ctrl());
+  set_all_memory( __ merged_memory());
+#undef __
+
+}
--- a/hotspot/src/share/vm/opto/graphKit.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -24,6 +24,7 @@
 
 class FastLockNode;
 class FastUnlockNode;
+class IdealKit;
 class Parse;
 class RootNode;
 
@@ -581,6 +582,27 @@
             && Universe::heap()->can_elide_tlab_store_barriers());
   }
 
+  // G1 pre/post barriers
+  void g1_write_barrier_pre(Node* obj,
+                            Node* adr,
+                            uint alias_idx,
+                            Node* val,
+                            const Type* val_type,
+                            BasicType bt);
+
+  void g1_write_barrier_post(Node* store,
+                             Node* obj,
+                             Node* adr,
+                             uint alias_idx,
+                             Node* val,
+                             BasicType bt,
+                             bool use_precise);
+  // Helper function for g1
+  private:
+  void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
+                    Node* buffer, const TypeFunc* tf);
+
+  public:
   // Helper function to round double arguments before a call
   void round_double_arguments(ciMethod* dest_method);
   void round_double_result(ciMethod* dest_method);
--- a/hotspot/src/share/vm/opto/ifg.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/ifg.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -485,8 +485,9 @@
     // Liveout things are presumed live for the whole block.  We accumulate
     // 'area' accordingly.  If they get killed in the block, we'll subtract
     // the unused part of the block from the area.
-    double cost = b->_freq * double(last_inst-last_phi);
-    assert( cost >= 0, "negative spill cost" );
+    int inst_count = last_inst - last_phi;
+    double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+    assert(!(cost < 0.0), "negative spill cost" );
     IndexSetIterator elements(&liveout);
     uint lidx;
     while ((lidx = elements.next()) != 0) {
@@ -590,7 +591,7 @@
         } else {                // Else it is live
           // A DEF also ends 'area' partway through the block.
           lrgs(r)._area -= cost;
-          assert( lrgs(r)._area >= 0, "negative spill area" );
+          assert(!(lrgs(r)._area < 0.0), "negative spill area" );
 
           // Insure high score for immediate-use spill copies so they get a color
           if( n->is_SpillCopy()
@@ -703,8 +704,9 @@
 
       } // End of if normal register-allocated value
 
-      cost -= b->_freq;         // Area remaining in the block
-      if( cost < 0.0 ) cost = 0.0;  // Cost goes negative in the Phi area
+      // Area remaining in the block
+      inst_count--;
+      cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
 
       // Make all inputs live
       if( !n->is_Phi() ) {      // Phi function uses come from prior block
@@ -751,7 +753,7 @@
             assert( pressure[0] == count_int_pressure  (&liveout), "" );
             assert( pressure[1] == count_float_pressure(&liveout), "" );
           }
-          assert( lrg._area >= 0, "negative spill area" );
+          assert(!(lrg._area < 0.0), "negative spill area" );
         }
       }
     } // End of reverse pass over all instructions in block
--- a/hotspot/src/share/vm/opto/lcm.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/lcm.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -595,7 +595,7 @@
 
       // A few node types require changing a required edge to a precedence edge
       // before allocation.
-      if( UseConcMarkSweepGC ) {
+      if( UseConcMarkSweepGC || UseG1GC ) {
         if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
           // Note: Required edges with an index greater than oper_input_base
           // are not supported by the allocator.
--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -679,6 +679,10 @@
   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
   post_head->set_post_loop(main_head);
 
+  // Reduce the post-loop trip count.
+  CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
+  post_end->_prob = PROB_FAIR;
+
   // Build the main-loop normal exit.
   IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end);
   _igvn.register_new_node_with_optimizer( new_main_exit );
@@ -748,6 +752,9 @@
   pre_head->set_pre_loop(main_head);
   Node *pre_incr = old_new[incr->_idx];
 
+  // Reduce the pre-loop trip count.
+  pre_end->_prob = PROB_FAIR;
+
   // Find the pre-loop normal exit.
   Node* pre_exit = pre_end->proj_out(false);
   assert( pre_exit->Opcode() == Op_IfFalse, "" );
@@ -767,8 +774,8 @@
   register_new_node( min_cmp , new_pre_exit );
   register_new_node( min_bol , new_pre_exit );
 
-  // Build the IfNode
-  IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_FAIR, COUNT_UNKNOWN );
+  // Build the IfNode (assume the main-loop is executed always).
+  IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
   _igvn.register_new_node_with_optimizer( min_iff );
   set_idom(min_iff, new_pre_exit, dd_main_head);
   set_loop(min_iff, loop->_parent);
@@ -1012,6 +1019,8 @@
     if (!has_ctrl(old))
       set_loop(nnn, loop);
   }
+
+  loop->record_for_igvn();
 }
 
 //------------------------------do_maximally_unroll----------------------------
@@ -1581,10 +1590,10 @@
 
 //=============================================================================
 //------------------------------iteration_split_impl---------------------------
-void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
+bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
   // Check and remove empty loops (spam micro-benchmarks)
   if( policy_do_remove_empty_loop(phase) )
-    return;                     // Here we removed an empty loop
+    return true;                     // Here we removed an empty loop
 
   bool should_peel = policy_peeling(phase); // Should we peel?
 
@@ -1594,7 +1603,8 @@
   // This removes loop-invariant tests (usually null checks).
   if( !_head->is_CountedLoop() ) { // Non-counted loop
     if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
-      return;
+      // Partial peel succeeded so terminate this round of loop opts
+      return false;
     }
     if( should_peel ) {            // Should we peel?
 #ifndef PRODUCT
@@ -1604,14 +1614,14 @@
     } else if( should_unswitch ) {
       phase->do_unswitching(this, old_new);
     }
-    return;
+    return true;
   }
   CountedLoopNode *cl = _head->as_CountedLoop();
 
-  if( !cl->loopexit() ) return; // Ignore various kinds of broken loops
+  if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops
 
   // Do nothing special to pre- and post- loops
-  if( cl->is_pre_loop() || cl->is_post_loop() ) return;
+  if( cl->is_pre_loop() || cl->is_post_loop() ) return true;
 
   // Compute loop trip count from profile data
   compute_profile_trip_cnt(phase);
@@ -1624,11 +1634,11 @@
       // Here we did some unrolling and peeling.  Eventually we will
       // completely unroll this loop and it will no longer be a loop.
       phase->do_maximally_unroll(this,old_new);
-      return;
+      return true;
     }
     if (should_unswitch) {
       phase->do_unswitching(this, old_new);
-      return;
+      return true;
     }
   }
 
@@ -1689,14 +1699,16 @@
     if( should_peel )           // Might want to peel but do nothing else
       phase->do_peeling(this,old_new);
   }
+  return true;
 }
 
 
 //=============================================================================
 //------------------------------iteration_split--------------------------------
-void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
+bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
   // Recursively iteration split nested loops
-  if( _child ) _child->iteration_split( phase, old_new );
+  if( _child && !_child->iteration_split( phase, old_new ))
+    return false;
 
   // Clean out prior deadwood
   DCE_loop_body();
@@ -1718,7 +1730,9 @@
       _allow_optimizations &&
       !tail()->is_top() ) {     // Also ignore the occasional dead backedge
     if (!_has_call) {
-      iteration_split_impl( phase, old_new );
+      if (!iteration_split_impl( phase, old_new )) {
+        return false;
+      }
     } else if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
     }
@@ -1727,5 +1741,7 @@
   // Minor offset re-organization to remove loop-fallout uses of
   // trip counter.
   if( _head->is_CountedLoop() ) phase->reorg_offsets( this );
-  if( _next ) _next->iteration_split( phase, old_new );
+  if( _next && !_next->iteration_split( phase, old_new ))
+    return false;
+  return true;
 }
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1279,7 +1279,7 @@
     // Visit all children, looking for Phis
     for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
       Node *out = cl->out(i);
-      if (!out->is_Phi())  continue; // Looking for phis
+      if (!out->is_Phi() || out == phi)  continue; // Looking for other phis
       PhiNode* phi2 = out->as_Phi();
       Node *incr2 = phi2->in( LoopNode::LoopBackControl );
       // Look for induction variables of the form:  X += constant
@@ -1388,6 +1388,37 @@
 
 #endif
 
+static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) {
+  if (loop == root) {
+    if (loop->_child != NULL) {
+      log->begin_head("loop_tree");
+      log->end_head();
+      if( loop->_child ) log_loop_tree(root, loop->_child, log);
+      log->tail("loop_tree");
+      assert(loop->_next == NULL, "what?");
+    }
+  } else {
+    Node* head = loop->_head;
+    log->begin_head("loop");
+    log->print(" idx='%d' ", head->_idx);
+    if (loop->_irreducible) log->print("irreducible='1' ");
+    if (head->is_Loop()) {
+      if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' ");
+      if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' ");
+    }
+    if (head->is_CountedLoop()) {
+      CountedLoopNode* cl = head->as_CountedLoop();
+      if (cl->is_pre_loop())  log->print("pre_loop='%d' ",  cl->main_idx());
+      if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx);
+      if (cl->is_post_loop()) log->print("post_loop='%d' ",  cl->main_idx());
+    }
+    log->end_head();
+    if( loop->_child ) log_loop_tree(root, loop->_child, log);
+    log->tail("loop");
+    if( loop->_next  ) log_loop_tree(root, loop->_next, log);
+  }
+}
+
 //=============================================================================
 //------------------------------PhaseIdealLoop---------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
@@ -1624,10 +1655,13 @@
   // Cleanup any modified bits
   _igvn.optimize();
 
-  // Do not repeat loop optimizations if irreducible loops are present
-  // by claiming no-progress.
-  if( _has_irreducible_loops )
-    C->clear_major_progress();
+  // disable assert until issue with split_flow_path is resolved (6742111)
+  // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(),
+  //        "shouldn't introduce irreducible loops");
+
+  if (C->log() != NULL) {
+    log_loop_tree(_ltree_root, _ltree_root, C->log());
+  }
 }
 
 #ifndef PRODUCT
@@ -2732,11 +2766,7 @@
 }
 
 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
-
-  // Indent by loop nesting depth
-  for( uint x = 0; x < loop->_nest; x++ )
-    tty->print("  ");
-  tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx);
+  loop->dump_head();
 
   // Now scan for CFG nodes in the same loop
   for( uint j=idx; j > 0;  j-- ) {
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -192,6 +192,8 @@
   int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
   void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
 
+  int main_idx() const { return _main_idx; }
+
 
   void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
   void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
@@ -323,12 +325,14 @@
   // Returns TRUE if loop tree is structurally changed.
   bool beautify_loops( PhaseIdealLoop *phase );
 
-  // Perform iteration-splitting on inner loops.  Split iterations to avoid
-  // range checks or one-shot null checks.
-  void iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
+  // Perform iteration-splitting on inner loops.  Split iterations to
+  // avoid range checks or one-shot null checks.  Returns false if the
+  // current round of loop opts should stop.
+  bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
 
-  // Driver for various flavors of iteration splitting
-  void iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
+  // Driver for various flavors of iteration splitting.  Returns false
+  // if the current round of loop opts should stop.
+  bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
 
   // Given dominators, try to find loops with calls that must always be
   // executed (call dominates loop tail).  These loops do not need non-call
--- a/hotspot/src/share/vm/opto/loopopts.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1903,9 +1903,6 @@
       // Use in a phi is considered a use in the associated predecessor block
       use_c = use->in(0)->in(j);
     }
-    if (use_c->is_CountedLoop()) {
-      use_c = use_c->in(LoopNode::EntryControl);
-    }
     set_ctrl(n_clone, use_c);
     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
     get_loop(use_c)->_body.push(n_clone);
@@ -2667,6 +2664,10 @@
   // Fix this by adjusting to use the post-increment trip counter.
   Node *phi = cl->phi();
   if( !phi ) return;            // Dead infinite loop
+
+  // Shape messed up, probably by iteration_split_impl
+  if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return;
+
   bool progress = true;
   while (progress) {
     progress = false;
--- a/hotspot/src/share/vm/opto/macro.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/macro.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -944,25 +944,7 @@
     mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
   }
 
-  Node* eden_top_adr;
-  Node* eden_end_adr;
-  set_eden_pointers(eden_top_adr, eden_end_adr);
-
-  uint raw_idx = C->get_alias_index(TypeRawPtr::BOTTOM);
   assert(ctrl != NULL, "must have control");
-
-  // Load Eden::end.  Loop invariant and hoisted.
-  //
-  // Note: We set the control input on "eden_end" and "old_eden_top" when using
-  //       a TLAB to work around a bug where these values were being moved across
-  //       a safepoint.  These are not oops, so they cannot be include in the oop
-  //       map, but the can be changed by a GC.   The proper way to fix this would
-  //       be to set the raw memory state when generating a  SafepointNode.  However
-  //       this will require extensive changes to the loop optimization in order to
-  //       prevent a degradation of the optimization.
-  //       See comment in memnode.hpp, around line 227 in class LoadPNode.
-  Node* eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
-
   // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
   // they will not be used if "always_slow" is set
   enum { slow_result_path = 1, fast_result_path = 2 };
@@ -982,12 +964,15 @@
     initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn);
   }
 
-  if (DTraceAllocProbes) {
+  if (DTraceAllocProbes ||
+      !UseTLAB && (!Universe::heap()->supports_inline_contig_alloc() ||
+                   (UseConcMarkSweepGC && CMSIncrementalMode))) {
     // Force slow-path allocation
     always_slow = true;
     initial_slow_test = NULL;
   }
 
+
   enum { too_big_or_final_path = 1, need_gc_path = 2 };
   Node *slow_region = NULL;
   Node *toobig_false = ctrl;
@@ -1016,6 +1001,23 @@
   Node *slow_mem = mem;  // save the current memory state for slow path
   // generate the fast allocation code unless we know that the initial test will always go slow
   if (!always_slow) {
+    Node* eden_top_adr;
+    Node* eden_end_adr;
+
+    set_eden_pointers(eden_top_adr, eden_end_adr);
+
+    // Load Eden::end.  Loop invariant and hoisted.
+    //
+    // Note: We set the control input on "eden_end" and "old_eden_top" when using
+    //       a TLAB to work around a bug where these values were being moved across
+    //       a safepoint.  These are not oops, so they cannot be include in the oop
+    //       map, but the can be changed by a GC.   The proper way to fix this would
+    //       be to set the raw memory state when generating a  SafepointNode.  However
+    //       this will require extensive changes to the loop optimization in order to
+    //       prevent a degradation of the optimization.
+    //       See comment in memnode.hpp, around line 227 in class LoadPNode.
+    Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
+
     // allocate the Region and Phi nodes for the result
     result_region = new (C, 3) RegionNode(3);
     result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM );
--- a/hotspot/src/share/vm/opto/matcher.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -273,7 +273,7 @@
   find_shared( C->root() );
   find_shared( C->top() );
 
-  C->print_method("Before Matching", 2);
+  C->print_method("Before Matching");
 
   // Swap out to old-space; emptying new-space
   Arena *old = C->node_arena()->move_contents(C->old_arena());
@@ -840,7 +840,7 @@
               _new2old_map.map(m->_idx, n);
 #endif
               if (m->in(0) != NULL) // m might be top
-                collect_null_checks(m);
+                collect_null_checks(m, n);
             } else {                // Else just a regular 'ol guy
               m = n->clone();       // So just clone into new-space
 #ifdef ASSERT
@@ -1478,12 +1478,19 @@
         m = _mem_node;
         assert(m != NULL && m->is_Mem(), "expecting memory node");
       }
-      if (m->adr_type() != mach->adr_type()) {
+      const Type* mach_at = mach->adr_type();
+      // DecodeN node consumed by an address may have different type
+      // then its input. Don't compare types for such case.
+      if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
+          m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
+        mach_at = m->adr_type();
+      }
+      if (m->adr_type() != mach_at) {
         m->dump();
         tty->print_cr("mach:");
         mach->dump(1);
       }
-      assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type");
+      assert(m->adr_type() == mach_at, "matcher should not change adr type");
     }
 #endif
   }
@@ -1995,7 +2002,7 @@
 // it.  Used by later implicit-null-check handling.  Actually collects
 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
 // value being tested.
-void Matcher::collect_null_checks( Node *proj ) {
+void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
   Node *iff = proj->in(0);
   if( iff->Opcode() == Op_If ) {
     // During matching If's have Bool & Cmp side-by-side
@@ -2008,20 +2015,47 @@
     if (ct == TypePtr::NULL_PTR ||
         (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {
 
+      bool push_it = false;
       if( proj->Opcode() == Op_IfTrue ) {
         extern int all_null_checks_found;
         all_null_checks_found++;
         if( b->_test._test == BoolTest::ne ) {
-          _null_check_tests.push(proj);
-          _null_check_tests.push(cmp->in(1));
+          push_it = true;
         }
       } else {
         assert( proj->Opcode() == Op_IfFalse, "" );
         if( b->_test._test == BoolTest::eq ) {
-          _null_check_tests.push(proj);
-          _null_check_tests.push(cmp->in(1));
+          push_it = true;
         }
       }
+      if( push_it ) {
+        _null_check_tests.push(proj);
+        Node* val = cmp->in(1);
+#ifdef _LP64
+        if (UseCompressedOops && !Matcher::clone_shift_expressions &&
+            val->bottom_type()->isa_narrowoop()) {
+          //
+          // Look for DecodeN node which should be pinned to orig_proj.
+          // On platforms (Sparc) which can not handle 2 adds
+          // in addressing mode we have to keep a DecodeN node and
+          // use it to do implicit NULL check in address.
+          //
+          // DecodeN node was pinned to non-null path (orig_proj) during
+          // CastPP transformation in final_graph_reshaping_impl().
+          //
+          uint cnt = orig_proj->outcnt();
+          for (uint i = 0; i < orig_proj->outcnt(); i++) {
+            Node* d = orig_proj->raw_out(i);
+            if (d->is_DecodeN() && d->in(1) == val) {
+              val = d;
+              val->set_req(0, NULL); // Unpin now.
+              break;
+            }
+          }
+        }
+#endif
+        _null_check_tests.push(val);
+      }
     }
   }
 }
--- a/hotspot/src/share/vm/opto/matcher.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/matcher.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -166,7 +166,7 @@
   // List of IfFalse or IfTrue Nodes that indicate a taken null test.
   // List is valid in the post-matching space.
   Node_List _null_check_tests;
-  void collect_null_checks( Node *proj );
+  void collect_null_checks( Node *proj, Node *orig_proj );
   void validate_null_checks( );
 
   Matcher( Node_List &proj_list );
--- a/hotspot/src/share/vm/opto/memnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1887,6 +1887,38 @@
   return tap->size();
 }
 
+//-------------------------------Ideal---------------------------------------
+// Feed through the length in AllocateArray(...length...)._length.
+Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* p = MemNode::Ideal_common(phase, can_reshape);
+  if (p)  return (p == NodeSentinel) ? NULL : p;
+
+  // Take apart the address into an oop and and offset.
+  // Return 'this' if we cannot.
+  Node*    adr    = in(MemNode::Address);
+  intptr_t offset = 0;
+  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase,  offset);
+  if (base == NULL)     return NULL;
+  const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
+  if (tary == NULL)     return NULL;
+
+  // We can fetch the length directly through an AllocateArrayNode.
+  // This works even if the length is not constant (clone or newArray).
+  if (offset == arrayOopDesc::length_offset_in_bytes()) {
+    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
+    if (alloc != NULL) {
+      Node* allocated_length = alloc->Ideal_length();
+      Node* len = alloc->make_ideal_length(tary, phase);
+      if (allocated_length != len) {
+        // New CastII improves on this.
+        return len;
+      }
+    }
+  }
+
+  return NULL;
+}
+
 //------------------------------Identity---------------------------------------
 // Feed through the length in AllocateArray(...length...)._length.
 Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
@@ -1905,15 +1937,22 @@
   // We can fetch the length directly through an AllocateArrayNode.
   // This works even if the length is not constant (clone or newArray).
   if (offset == arrayOopDesc::length_offset_in_bytes()) {
-    Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase);
-    if (allocated_length != NULL) {
-      return allocated_length;
+    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
+    if (alloc != NULL) {
+      Node* allocated_length = alloc->Ideal_length();
+      // Do not allow make_ideal_length to allocate a CastII node.
+      Node* len = alloc->make_ideal_length(tary, phase, false);
+      if (allocated_length == len) {
+        // Return allocated_length only if it would not be improved by a CastII.
+        return allocated_length;
+      }
     }
   }
 
   return this;
 
 }
+
 //=============================================================================
 //---------------------------StoreNode::make-----------------------------------
 // Polymorphic factory method:
--- a/hotspot/src/share/vm/opto/memnode.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -241,6 +241,7 @@
   virtual int Opcode() const;
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
 };
 
 //------------------------------LoadLNode--------------------------------------
--- a/hotspot/src/share/vm/opto/mulnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/mulnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -152,6 +152,14 @@
   if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
     return bottom_type();
 
+#if defined(IA32)
+  // Can't trust native compilers to properly fold strict double
+  // multiplication with round-to-zero on this platform.
+  if (op == Op_MulD && phase->C->method()->is_strict()) {
+    return TypeD::DOUBLE;
+  }
+#endif
+
   return mul_ring(t1,t2);            // Local flavor of type multiplication
 }
 
@@ -360,7 +368,7 @@
 // Compute the product type of two double ranges into this node.
 const Type *MulDNode::mul_ring(const Type *t0, const Type *t1) const {
   if( t0 == Type::DOUBLE || t1 == Type::DOUBLE ) return Type::DOUBLE;
-  // We must be adding 2 double constants.
+  // We must be multiplying 2 double constants.
   return TypeD::make( t0->getd() * t1->getd() );
 }
 
--- a/hotspot/src/share/vm/opto/node.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/node.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1320,7 +1320,8 @@
   Node *pop() {
     if( _clock_index >= size() ) _clock_index = 0;
     Node *b = at(_clock_index);
-    map( _clock_index++, Node_List::pop());
+    map( _clock_index, Node_List::pop());
+    if (size() != 0) _clock_index++; // Always start from 0
     _in_worklist >>= b->_idx;
     return b;
   }
--- a/hotspot/src/share/vm/opto/parse.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/parse.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -167,9 +167,19 @@
 
     int start() const                      { return flow()->start(); }
     int limit() const                      { return flow()->limit(); }
-    int pre_order() const                  { return flow()->pre_order(); }
+    int rpo() const                        { return flow()->rpo(); }
     int start_sp() const                   { return flow()->stack_size(); }
 
+    bool is_loop_head() const              { return flow()->is_loop_head(); }
+    bool is_SEL_head() const               { return flow()->is_single_entry_loop_head(); }
+    bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); }
+    bool is_invariant_local(uint i) const  {
+      const JVMState* jvms = start_map()->jvms();
+      if (!jvms->is_loc(i)) return false;
+      return flow()->is_invariant_local(i - jvms->locoff());
+    }
+    bool can_elide_SEL_phi(uint i) const  { assert(is_SEL_head(),""); return is_invariant_local(i); }
+
     const Type* peek(int off=0) const      { return stack_type_at(start_sp() - (off+1)); }
 
     const Type* stack_type_at(int i) const;
@@ -305,7 +315,7 @@
   //            entry_bci()     -- see osr_bci, etc.
 
   ciTypeFlow*   flow()          const { return _flow; }
-  //            blocks()        -- see pre_order_at, start_block, etc.
+  //            blocks()        -- see rpo_at, start_block, etc.
   int           block_count()   const { return _block_count; }
 
   GraphKit&     exits()               { return _exits; }
@@ -330,12 +340,12 @@
   // Must this parse be aborted?
   bool failing()                { return C->failing(); }
 
-  Block* pre_order_at(int po) {
-    assert(0 <= po && po < _block_count, "oob");
-    return &_blocks[po];
+  Block* rpo_at(int rpo) {
+    assert(0 <= rpo && rpo < _block_count, "oob");
+    return &_blocks[rpo];
   }
   Block* start_block() {
-    return pre_order_at(flow()->start_block()->pre_order());
+    return rpo_at(flow()->start_block()->rpo());
   }
   // Can return NULL if the flow pass did not complete a block.
   Block* successor_for_bci(int bci) {
@@ -359,9 +369,6 @@
   // Parse all the basic blocks.
   void do_all_blocks();
 
-  // Helper for do_all_blocks; makes one pass in pre-order.
-  void visit_blocks();
-
   // Parse the current basic block
   void do_one_block();
 
--- a/hotspot/src/share/vm/opto/parse1.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -29,17 +29,17 @@
 // the most. Some of the non-static variables are needed in bytecodeInfo.cpp
 // and eventually should be encapsulated in a proper class (gri 8/18/98).
 
-int nodes_created              = 0; int nodes_created_old              = 0;
-int methods_parsed             = 0; int methods_parsed_old             = 0;
-int methods_seen               = 0; int methods_seen_old               = 0;
+int nodes_created              = 0;
+int methods_parsed             = 0;
+int methods_seen               = 0;
+int blocks_parsed              = 0;
+int blocks_seen                = 0;
 
-int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0;
-int explicit_null_checks_elided   = 0, explicit_null_checks_elided_old   = 0;
+int explicit_null_checks_inserted = 0;
+int explicit_null_checks_elided   = 0;
 int all_null_checks_found         = 0, implicit_null_checks              = 0;
 int implicit_null_throws          = 0;
 
-int parse_idx = 0;
-size_t parse_arena = 0;
 int reclaim_idx  = 0;
 int reclaim_in   = 0;
 int reclaim_node = 0;
@@ -61,6 +61,7 @@
   tty->cr();
   if (methods_seen != methods_parsed)
     tty->print_cr("Reasons for parse failures (NOT cumulative):");
+  tty->print_cr("Blocks parsed: %d  Blocks seen: %d", blocks_parsed, blocks_seen);
 
   if( explicit_null_checks_inserted )
     tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
@@ -373,6 +374,12 @@
     C->record_method_not_compilable_all_tiers(_flow->failure_reason());
   }
 
+#ifndef PRODUCT
+  if (_flow->has_irreducible_entry()) {
+    C->set_parsed_irreducible_loop(true);
+  }
+#endif
+
   if (_expected_uses <= 0) {
     _prof_factor = 1;
   } else {
@@ -556,118 +563,93 @@
   set_map(entry_map);
   do_exits();
 
-  // Collect a few more statistics.
-  parse_idx += C->unique();
-  parse_arena += C->node_arena()->used();
-
   if (log)  log->done("parse nodes='%d' memory='%d'",
                       C->unique(), C->node_arena()->used());
 }
 
 //---------------------------do_all_blocks-------------------------------------
 void Parse::do_all_blocks() {
-  _blocks_merged = 0;
-  _blocks_parsed = 0;
+  bool has_irreducible = flow()->has_irreducible_entry();
+
+  // Walk over all blocks in Reverse Post-Order.
+  while (true) {
+    bool progress = false;
+    for (int rpo = 0; rpo < block_count(); rpo++) {
+      Block* block = rpo_at(rpo);
+
+      if (block->is_parsed()) continue;
 
-  int old_blocks_merged = -1;
-  int old_blocks_parsed = -1;
+      if (!block->is_merged()) {
+        // Dead block, no state reaches this block
+        continue;
+      }
 
-  for (int tries = 0; ; tries++) {
-    visit_blocks();
-    if (failing())  return; // Check for bailout
+      // Prepare to parse this block.
+      load_state_from(block);
+
+      if (stopped()) {
+        // Block is dead.
+        continue;
+      }
+
+      blocks_parsed++;
 
-    // No need for a work list.  The outer loop is hardly ever repeated.
-    // The following loop traverses the blocks in a reasonable pre-order,
-    // as produced by the ciTypeFlow pass.
+      progress = true;
+      if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) {
+        // Not all preds have been parsed.  We must build phis everywhere.
+        // (Note that dead locals do not get phis built, ever.)
+        ensure_phis_everywhere();
+
+        // Leave behind an undisturbed copy of the map, for future merges.
+        set_map(clone_map());
+      }
 
-    // This loop can be taken more than once if there are two entries to
-    // a loop (irreduceable CFG), and the edge which ciTypeFlow chose
-    // as the first predecessor to the loop goes dead in the parser,
-    // due to parse-time optimization.  (Could happen with obfuscated code.)
+      if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) {
+        // In the absence of irreducible loops, the Region and Phis
+        // associated with a merge that doesn't involve a backedge can
+        // be simplfied now since the RPO parsing order guarantees
+        // that any path which was supposed to reach here has already
+        // been parsed or must be dead.
+        Node* c = control();
+        Node* result = _gvn.transform_no_reclaim(control());
+        if (c != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx);
+        }
+        if (result != top()) {
+          record_for_igvn(result);
+        }
+      }
 
-    // Look for progress, or the lack of it:
-    if (_blocks_parsed == block_count()) {
-      // That's all, folks.
-      if (TraceOptoParse) {
-        tty->print_cr("All blocks parsed.");
-      }
+      // Parse the block.
+      do_one_block();
+
+      // Check for bailouts.
+      if (failing())  return;
+    }
+
+    // with irreducible loops multiple passes might be necessary to parse everything
+    if (!has_irreducible || !progress) {
       break;
     }
+  }
 
-    // How much work was done this time around?
-    int new_blocks_merged = _blocks_merged - old_blocks_merged;
-    int new_blocks_parsed = _blocks_parsed - old_blocks_parsed;
-    if (new_blocks_merged == 0) {
-      if (TraceOptoParse) {
-        tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed);
-      }
-      // No new blocks have become parseable.  Some blocks are just dead.
-      break;
-    }
-    assert(new_blocks_parsed > 0, "must make progress");
-    assert(tries < block_count(), "the pre-order cannot be this bad!");
-
-    old_blocks_merged = _blocks_merged;
-    old_blocks_parsed = _blocks_parsed;
-  }
+  blocks_seen += block_count();
 
 #ifndef PRODUCT
   // Make sure there are no half-processed blocks remaining.
   // Every remaining unprocessed block is dead and may be ignored now.
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (int rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
     if (!block->is_parsed()) {
       if (TraceOptoParse) {
-        tty->print("Skipped dead block %d at bci:%d", po, block->start());
-        assert(!block->is_merged(), "no half-processed blocks");
+        tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start());
       }
+      assert(!block->is_merged(), "no half-processed blocks");
     }
   }
 #endif
 }
 
-//---------------------------visit_blocks--------------------------------------
-void Parse::visit_blocks() {
-  // Walk over all blocks, parsing every one that has been reached (merged).
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-
-    if (block->is_parsed()) {
-      // Do not parse twice.
-      continue;
-    }
-
-    if (!block->is_merged()) {
-      // No state on this block.  It had not yet been reached.
-      // Delay reaching it until later.
-      continue;
-    }
-
-    // Prepare to parse this block.
-    load_state_from(block);
-
-    if (stopped()) {
-      // Block is dead.
-      continue;
-    }
-
-    if (!block->is_ready() || block->is_handler()) {
-      // Not all preds have been parsed.  We must build phis everywhere.
-      // (Note that dead locals do not get phis built, ever.)
-      ensure_phis_everywhere();
-
-      // Leave behind an undisturbed copy of the map, for future merges.
-      set_map(clone_map());
-    }
-
-    // Ready or not, parse the block.
-    do_one_block();
-
-    // Check for bailouts.
-    if (failing())  return;
-  }
-}
-
 //-------------------------------build_exits----------------------------------
 // Build normal and exceptional exit merge points.
 void Parse::build_exits() {
@@ -1134,24 +1116,24 @@
   _blocks = NEW_RESOURCE_ARRAY(Block, _block_count);
   Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count);
 
-  int po;
+  int rpo;
 
   // Initialize the structs.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-    block->init_node(this, po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
+    block->init_node(this, rpo);
   }
 
   // Collect predecessor and successor information.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
     block->init_graph(this);
   }
 }
 
 //-------------------------------init_node-------------------------------------
-void Parse::Block::init_node(Parse* outer, int po) {
-  _flow = outer->flow()->pre_order_at(po);
+void Parse::Block::init_node(Parse* outer, int rpo) {
+  _flow = outer->flow()->rpo_at(rpo);
   _pred_count = 0;
   _preds_parsed = 0;
   _count = 0;
@@ -1177,7 +1159,7 @@
   int p = 0;
   for (int i = 0; i < ns+ne; i++) {
     ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns);
-    Block* block2 = outer->pre_order_at(tf2->pre_order());
+    Block* block2 = outer->rpo_at(tf2->rpo());
     _successors[i] = block2;
 
     // Accumulate pred info for the other block, too.
@@ -1368,10 +1350,11 @@
     int nt = b->all_successors();
 
     tty->print("Parsing block #%d at bci [%d,%d), successors: ",
-                  block()->pre_order(), block()->start(), block()->limit());
+                  block()->rpo(), block()->start(), block()->limit());
     for (int i = 0; i < nt; i++) {
-      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order());
+      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo());
     }
+    if (b->is_loop_head()) tty->print("  lphd");
     tty->print_cr("");
   }
 
@@ -1501,7 +1484,7 @@
 #ifndef PRODUCT
   Block* b = block();
   int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1;
-  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci);
+  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci);
 #endif
   ShouldNotReachHere();
 }
@@ -1509,7 +1492,7 @@
 //--------------------------merge_common---------------------------------------
 void Parse::merge_common(Parse::Block* target, int pnum) {
   if (TraceOptoParse) {
-    tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start());
+    tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start());
   }
 
   // Zap extra stack slots to top
@@ -1534,6 +1517,7 @@
     // which must not be allowed into this block's map.)
     if (pnum > PhiNode::Input         // Known multiple inputs.
         || target->is_handler()       // These have unpredictable inputs.
+        || target->is_loop_head()     // Known multiple inputs
         || control()->is_Region()) {  // We must hide this guy.
       // Add a Region to start the new basic block.  Phis will be added
       // later lazily.
@@ -1575,15 +1559,21 @@
 
     // Compute where to merge into
     // Merge incoming control path
-    r->set_req(pnum, newin->control());
+    r->init_req(pnum, newin->control());
 
     if (pnum == 1) {            // Last merge for this Region?
-      _gvn.transform_no_reclaim(r);
+      if (!block()->flow()->is_irreducible_entry()) {
+        Node* result = _gvn.transform_no_reclaim(r);
+        if (r != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx);
+        }
+      }
       record_for_igvn(r);
     }
 
     // Update all the non-control inputs to map:
     assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms");
+    bool check_elide_phi = target->is_SEL_backedge(save_block);
     for (uint j = 1; j < newin->req(); j++) {
       Node* m = map()->in(j);   // Current state of target.
       Node* n = newin->in(j);   // Incoming change to target state.
@@ -1603,7 +1593,11 @@
           merge_memory_edges(n->as_MergeMem(), pnum, nophi);
           continue;
         default:                // All normal stuff
-          if (phi == NULL)  phi = ensure_phi(j, nophi);
+          if (phi == NULL) {
+            if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
+              phi = ensure_phi(j, nophi);
+            }
+          }
           break;
         }
       }
@@ -1736,9 +1730,13 @@
   uint nof_monitors = map()->jvms()->nof_monitors();
 
   assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms");
+  bool check_elide_phi = block()->is_SEL_head();
   for (uint i = TypeFunc::Parms; i < monoff; i++) {
-    ensure_phi(i);
+    if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) {
+      ensure_phi(i);
+    }
   }
+
   // Even monitors need Phis, though they are well-structured.
   // This is true for OSR methods, and also for the rare cases where
   // a monitor object is the subject of a replace_in_map operation.
--- a/hotspot/src/share/vm/opto/parse2.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/parse2.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -100,16 +100,17 @@
 
   // Do the range check
   if (GenerateRangeChecks && need_range_check) {
-    // Range is constant in array-oop, so we can use the original state of mem
-    Node* len = load_array_length(ary);
     Node* tst;
     if (sizetype->_hi <= 0) {
-      // If the greatest array bound is negative, we can conclude that we're
+      // The greatest array bound is negative, so we can conclude that we're
       // compiling unreachable code, but the unsigned compare trick used below
       // only works with non-negative lengths.  Instead, hack "tst" to be zero so
       // the uncommon_trap path will always be taken.
       tst = _gvn.intcon(0);
     } else {
+      // Range is constant in array-oop, so we can use the original state of mem
+      Node* len = load_array_length(ary);
+
       // Test length vs index (standard trick using unsigned compare)
       Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) );
       BoolTest::mask btest = BoolTest::lt;
@@ -137,9 +138,12 @@
   // Check for always knowing you are throwing a range-check exception
   if (stopped())  return top();
 
-  Node* ptr = array_element_address( ary, idx, type, sizetype);
+  Node* ptr = array_element_address(ary, idx, type, sizetype);
 
   if (result2 != NULL)  *result2 = elemtype;
+
+  assert(ptr != top(), "top should go hand-in-hand with stopped");
+
   return ptr;
 }
 
--- a/hotspot/src/share/vm/opto/postaloc.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/postaloc.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -34,7 +34,7 @@
 #endif
 }
 
-//------------------------------may_be_copy_of_callee-----------------------------
+//---------------------------may_be_copy_of_callee-----------------------------
 // Check to see if we can possibly be a copy of a callee-save value.
 bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
   // Short circuit if there are no callee save registers
@@ -225,6 +225,20 @@
 
   // Scan all registers to see if this value is around already
   for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
+    if (reg == (uint)nk_reg) {
+      // Found ourselves so check if there is only one user of this
+      // copy and keep on searching for a better copy if so.
+      bool ignore_self = true;
+      x = n->in(k);
+      DUIterator_Fast imax, i = x->fast_outs(imax);
+      Node* first = x->fast_out(i); i++;
+      while (i < imax && ignore_self) {
+        Node* use = x->fast_out(i); i++;
+        if (use != first) ignore_self = false;
+      }
+      if (ignore_self) continue;
+    }
+
     Node *vv = value[reg];
     if( !single ) {             // Doubles check for aligned-adjacent pair
       if( (reg&1)==0 ) continue;  // Wrong half of a pair
--- a/hotspot/src/share/vm/opto/runtime.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -44,6 +44,8 @@
 address OptoRuntime::_multianewarray3_Java                        = NULL;
 address OptoRuntime::_multianewarray4_Java                        = NULL;
 address OptoRuntime::_multianewarray5_Java                        = NULL;
+address OptoRuntime::_g1_wb_pre_Java                              = NULL;
+address OptoRuntime::_g1_wb_post_Java                             = NULL;
 address OptoRuntime::_vtable_must_compile_Java                    = NULL;
 address OptoRuntime::_complete_monitor_locking_Java               = NULL;
 address OptoRuntime::_rethrow_Java                                = NULL;
@@ -89,6 +91,8 @@
   gen(env, _multianewarray3_Java           , multianewarray3_Type         , multianewarray3_C               ,    0 , true , false, false);
   gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
   gen(env, _multianewarray5_Java           , multianewarray5_Type         , multianewarray5_C               ,    0 , true , false, false);
+  gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
+  gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
   gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C      ,    0 , false, false, false);
   gen(env, _rethrow_Java                   , rethrow_Type                 , rethrow_C                       ,    2 , true , false, true );
 
@@ -385,6 +389,33 @@
   return multianewarray_Type(5);
 }
 
+const TypeFunc *OptoRuntime::g1_wb_pre_Type() {
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::g1_wb_post_Type() {
+
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
 const TypeFunc *OptoRuntime::uncommon_trap_Type() {
   // create input type (domain)
   const Type **fields = TypeTuple::fields(1);
--- a/hotspot/src/share/vm/opto/runtime.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -108,6 +108,8 @@
   static address _multianewarray3_Java;
   static address _multianewarray4_Java;
   static address _multianewarray5_Java;
+  static address _g1_wb_pre_Java;
+  static address _g1_wb_post_Java;
   static address _vtable_must_compile_Java;
   static address _complete_monitor_locking_Java;
   static address _rethrow_Java;
@@ -140,6 +142,8 @@
   static void multianewarray3_C(klassOopDesc* klass, int len1, int len2, int len3, JavaThread *thread);
   static void multianewarray4_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
   static void multianewarray5_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
+  static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread);
+  static void g1_wb_post_C(void* card_addr, JavaThread* thread);
 
 public:
   // Slow-path Locking and Unlocking
@@ -195,6 +199,8 @@
   static address multianewarray3_Java()                  { return _multianewarray3_Java; }
   static address multianewarray4_Java()                  { return _multianewarray4_Java; }
   static address multianewarray5_Java()                  { return _multianewarray5_Java; }
+  static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
+  static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
   static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
   static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java;   }
 
@@ -232,6 +238,8 @@
   static const TypeFunc* multianewarray3_Type(); // multianewarray
   static const TypeFunc* multianewarray4_Type(); // multianewarray
   static const TypeFunc* multianewarray5_Type(); // multianewarray
+  static const TypeFunc* g1_wb_pre_Type();
+  static const TypeFunc* g1_wb_post_Type();
   static const TypeFunc* complete_monitor_enter_Type();
   static const TypeFunc* complete_monitor_exit_Type();
   static const TypeFunc* uncommon_trap_Type();
--- a/hotspot/src/share/vm/opto/subnode.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/subnode.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -206,6 +206,14 @@
   if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) )
     return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) );
 
+  // Convert "(A+X) - (X+B)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(1) )
+    return new (phase->C, 3) SubINode( in1->in(1), in2->in(2) );
+
+  // Convert "(X+A) - (B+X)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(2) )
+    return new (phase->C, 3) SubINode( in1->in(2), in2->in(1) );
+
   // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally
   // nicer to optimize than subtract.
   if( op2 == Op_SubI && in2->outcnt() == 1) {
--- a/hotspot/src/share/vm/opto/type.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/type.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -3157,17 +3157,18 @@
 
 // Narrow the given size type to the index range for the given array base type.
 // Return NULL if the resulting int type becomes empty.
-const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) {
+const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size) const {
   jint hi = size->_hi;
   jint lo = size->_lo;
   jint min_lo = 0;
-  jint max_hi = max_array_length(elem);
+  jint max_hi = max_array_length(elem()->basic_type());
   //if (index_not_size)  --max_hi;     // type of a valid array index, FTR
   bool chg = false;
   if (lo < min_lo) { lo = min_lo; chg = true; }
   if (hi > max_hi) { hi = max_hi; chg = true; }
+  // Negative length arrays will produce weird intermediate dead fath-path code
   if (lo > hi)
-    return NULL;
+    return TypeInt::ZERO;
   if (!chg)
     return size;
   return TypeInt::make(lo, hi, Type::WidenMin);
@@ -3176,9 +3177,7 @@
 //-------------------------------cast_to_size----------------------------------
 const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const {
   assert(new_size != NULL, "");
-  new_size = narrow_size_type(new_size, elem()->basic_type());
-  if (new_size == NULL)       // Negative length arrays will produce weird
-    new_size = TypeInt::ZERO; // intermediate dead fast-path goo
+  new_size = narrow_size_type(new_size);
   if (new_size == size())  return this;
   const TypeAry* new_ary = TypeAry::make(elem(), new_size);
   return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id);
--- a/hotspot/src/share/vm/opto/type.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/opto/type.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -840,6 +840,7 @@
   virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const;
 
   virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const;
+  virtual const TypeInt* narrow_size_type(const TypeInt* size) const;
 
   virtual bool empty(void) const;        // TRUE if type is vacuous
   virtual const TypePtr *add_offset( intptr_t offset ) const;
@@ -865,7 +866,6 @@
   }
   static const TypeAryPtr *_array_body_type[T_CONFLICT+1];
   // sharpen the type of an int which is used as an array size
-  static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem);
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
 #endif
--- a/hotspot/src/share/vm/prims/jvm.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/prims/jvm.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -377,7 +377,11 @@
 JVM_ENTRY_NO_ENV(jlong, JVM_FreeMemory(void))
   JVMWrapper("JVM_FreeMemory");
   CollectedHeap* ch = Universe::heap();
-  size_t n = ch->capacity() - ch->used();
+  size_t n;
+  {
+     MutexLocker x(Heap_lock);
+     n = ch->capacity() - ch->used();
+  }
   return convert_size_t_to_jlong(n);
 JVM_END
 
@@ -624,6 +628,32 @@
   if (PrintJVMWarnings) warning("JVM_ResolveClass not implemented");
 JVM_END
 
+// Common implementation for JVM_FindClassFromBootLoader and
+// JVM_FindClassFromLoader
+static jclass jvm_find_class_from_class_loader(JNIEnv* env, const char* name,
+                                  jboolean init, jobject loader,
+                                  jboolean throwError, TRAPS) {
+  // Java libraries should ensure that name is never null...
+  if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) {
+    // It's impossible to create this class;  the name cannot fit
+    // into the constant pool.
+    if (throwError) {
+      THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name);
+    } else {
+      THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name);
+    }
+  }
+  symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL);
+  Handle h_loader(THREAD, JNIHandles::resolve(loader));
+  jclass result = find_class_from_class_loader(env, h_name, init, h_loader,
+                                               Handle(), throwError, THREAD);
+
+  if (TraceClassResolution && result != NULL) {
+    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
+  }
+  return result;
+}
+
 // Rationale behind JVM_FindClassFromBootLoader
 // a> JVM_FindClassFromClassLoader was never exported in the export tables.
 // b> because of (a) java.dll has a direct dependecy on the  unexported
@@ -645,8 +675,8 @@
                                               jboolean throwError))
   JVMWrapper3("JVM_FindClassFromBootLoader %s throw %s", name,
               throwError ? "error" : "exception");
-  return JVM_FindClassFromClassLoader(env, name, JNI_FALSE,
-                                      (jobject)NULL, throwError);
+  return jvm_find_class_from_class_loader(env, name, JNI_FALSE,
+                                          (jobject)NULL, throwError, THREAD);
 JVM_END
 
 JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name,
@@ -654,26 +684,8 @@
                                                jboolean throwError))
   JVMWrapper3("JVM_FindClassFromClassLoader %s throw %s", name,
                throwError ? "error" : "exception");
-  // Java libraries should ensure that name is never null...
-  if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) {
-    // It's impossible to create this class;  the name cannot fit
-    // into the constant pool.
-    if (throwError) {
-      THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name);
-    } else {
-      THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name);
-    }
-  }
-  symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL);
-  Handle h_loader(THREAD, JNIHandles::resolve(loader));
-  jclass result = find_class_from_class_loader(env, h_name, init, h_loader,
-                                               Handle(), throwError, thread);
-
-  if (TraceClassResolution && result != NULL) {
-    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
-  }
-
-  return result;
+  return jvm_find_class_from_class_loader(env, name, init, loader,
+                                          throwError, THREAD);
 JVM_END
 
 
--- a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -121,7 +121,7 @@
   JvmtiEventController::env_initialize((JvmtiEnv*)this);
 
 #ifdef JVMTI_TRACE
-  _jvmti_external.functions = strlen(TraceJVMTI)? &jvmtiTrace_Interface : &jvmti_Interface;
+  _jvmti_external.functions = TraceJVMTI != NULL ? &jvmtiTrace_Interface : &jvmti_Interface;
 #else
   _jvmti_external.functions = &jvmti_Interface;
 #endif
--- a/hotspot/src/share/vm/prims/jvmtiExport.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiExport.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -2433,18 +2433,7 @@
   // so we record the number of collections so that it can be checked in
   // the destructor.
   if (!_full) {
-    if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-      GenCollectedHeap* gch = GenCollectedHeap::heap();
-      assert(gch->n_gens() == 2, "configuration not recognized");
-      _invocation_count = (unsigned int)gch->get_gen(1)->stat_record()->invocations;
-    } else {
-#ifndef SERIALGC
-      assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap, "checking");
-      _invocation_count = PSMarkSweep::total_invocations();
-#else  // SERIALGC
-      fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
-    }
+    _invocation_count = Universe::heap()->total_full_collections();
   }
 
   // Do clean up tasks that need to be done at a safepoint
@@ -2466,20 +2455,7 @@
   // generation but could have ended up doing a "full" GC - check the
   // GC count to see.
   if (!_full) {
-    if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-      GenCollectedHeap* gch = GenCollectedHeap::heap();
-      if (_invocation_count != (unsigned int)gch->get_gen(1)->stat_record()->invocations) {
-        _full = true;
-      }
-    } else {
-#ifndef SERIALGC
-      if (_invocation_count != PSMarkSweep::total_invocations()) {
-        _full = true;
-      }
-#else  // SERIALGC
-      fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
-    }
+    _full = (_invocation_count != Universe::heap()->total_full_collections());
   }
 
   // Full collection probably means the perm generation has been GC'ed
--- a/hotspot/src/share/vm/prims/jvmtiTagMap.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiTagMap.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -400,16 +400,28 @@
 
 // get the memory region used for the young generation
 void JvmtiTagMap::get_young_generation() {
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    GenCollectedHeap* gch = GenCollectedHeap::heap();
-    _young_gen = gch->get_gen(0)->reserved();
-  } else {
+  CollectedHeap* ch = Universe::heap();
+  switch (ch->kind()) {
+    case (CollectedHeap::GenCollectedHeap): {
+      _young_gen = ((GenCollectedHeap*)ch)->get_gen(0)->reserved();
+      break;
+    }
 #ifndef SERIALGC
-    ParallelScavengeHeap* psh = ParallelScavengeHeap::heap();
-    _young_gen= psh->young_gen()->reserved();
-#else  // SERIALGC
-    fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
+    case (CollectedHeap::ParallelScavengeHeap): {
+      _young_gen = ((ParallelScavengeHeap*)ch)->young_gen()->reserved();
+      break;
+    }
+    case (CollectedHeap::G1CollectedHeap): {
+      // Until a more satisfactory solution is implemented, all
+      // oops in the tag map will require rehash at each gc.
+      // This is a correct, if extremely inefficient solution.
+      // See RFE 6621729 for related commentary.
+      _young_gen = ch->reserved_region();
+      break;
+    }
+#endif  // !SERIALGC
+    default:
+      ShouldNotReachHere();
   }
 }
 
--- a/hotspot/src/share/vm/prims/jvmtiTrace.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/prims/jvmtiTrace.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -73,7 +73,7 @@
 
   const char *very_end;
   const char *curr;
-  if (strlen(TraceJVMTI)) {
+  if (TraceJVMTI != NULL) {
     curr = TraceJVMTI;
   } else {
     curr = "";  // hack in fixed tracing here
--- a/hotspot/src/share/vm/prims/unsafe.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/prims/unsafe.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -891,6 +891,7 @@
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
   HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
+  update_barrier_set_pre((void*)addr, e);
   oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e);
   jboolean success  = (res == e);
   if (success)
--- a/hotspot/src/share/vm/runtime/aprofiler.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/aprofiler.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -33,6 +33,7 @@
 
 class AllocationProfiler: AllStatic {
   friend class GenCollectedHeap;
+  friend class G1CollectedHeap;
   friend class MarkSweep;
  private:
   static bool _active;                          // tells whether profiler is active
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -947,18 +947,17 @@
 // UseParNewGC and not explicitly set ParallelGCThreads we
 // set it, unless this is a single cpu machine.
 void Arguments::set_parnew_gc_flags() {
-  assert(!UseSerialGC && !UseParallelGC, "control point invariant");
+  assert(!UseSerialGC && !UseParallelGC && !UseG1GC,
+         "control point invariant");
+  assert(UseParNewGC, "Error");
 
   // Turn off AdaptiveSizePolicy by default for parnew until it is
   // complete.
-  if (UseParNewGC &&
-      FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
+  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
     FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
   }
 
-  if (FLAG_IS_DEFAULT(UseParNewGC) && ParallelGCThreads > 1) {
-    FLAG_SET_DEFAULT(UseParNewGC, true);
-  } else if (UseParNewGC && ParallelGCThreads == 0) {
+  if (ParallelGCThreads == 0) {
     FLAG_SET_DEFAULT(ParallelGCThreads,
                      Abstract_VM_Version::parallel_worker_threads());
     if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) {
@@ -994,15 +993,12 @@
 // further optimization and tuning efforts, and would almost
 // certainly gain from analysis of platform and environment.
 void Arguments::set_cms_and_parnew_gc_flags() {
-  if (UseSerialGC || UseParallelGC) {
-    return;
-  }
-
+  assert(!UseSerialGC && !UseParallelGC, "Error");
   assert(UseConcMarkSweepGC, "CMS is expected to be on here");
 
   // If we are using CMS, we prefer to UseParNewGC,
   // unless explicitly forbidden.
-  if (!UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) {
+  if (FLAG_IS_DEFAULT(UseParNewGC)) {
     FLAG_SET_ERGO(bool, UseParNewGC, true);
   }
 
@@ -1182,6 +1178,7 @@
     // machine class and automatic selection policy.
     if (!UseSerialGC &&
         !UseConcMarkSweepGC &&
+        !UseG1GC &&
         !UseParNewGC &&
         !DumpSharedSpaces &&
         FLAG_IS_DEFAULT(UseParallelGC)) {
@@ -1200,9 +1197,13 @@
   // Check that UseCompressedOops can be set with the max heap size allocated
   // by ergonomics.
   if (MaxHeapSize <= max_heap_for_compressed_oops()) {
-    if (FLAG_IS_DEFAULT(UseCompressedOops)) {
+    if (FLAG_IS_DEFAULT(UseCompressedOops) && !UseG1GC) {
       // Turn off until bug is fixed.
+      // the following line to return it to default status.
       // FLAG_SET_ERGO(bool, UseCompressedOops, true);
+    } else if (UseCompressedOops && UseG1GC) {
+      warning(" UseCompressedOops does not currently work with UseG1GC; switching off UseCompressedOops. ");
+      FLAG_SET_DEFAULT(UseCompressedOops, false);
     }
 #ifdef _WIN64
     if (UseLargePages && UseCompressedOops) {
@@ -1213,8 +1214,7 @@
 #endif //  _WIN64
   } else {
     if (UseCompressedOops && !FLAG_IS_DEFAULT(UseCompressedOops)) {
-      // If specified, give a warning
-      warning( "Max heap size too large for Compressed Oops");
+      warning("Max heap size too large for Compressed Oops");
       FLAG_SET_DEFAULT(UseCompressedOops, false);
     }
   }
@@ -1224,6 +1224,7 @@
 }
 
 void Arguments::set_parallel_gc_flags() {
+  assert(UseParallelGC || UseParallelOldGC, "Error");
   // If parallel old was requested, automatically enable parallel scavenge.
   if (UseParallelOldGC && !UseParallelGC && FLAG_IS_DEFAULT(UseParallelGC)) {
     FLAG_SET_DEFAULT(UseParallelGC, true);
@@ -1235,51 +1236,8 @@
     FLAG_SET_ERGO(uintx, ParallelGCThreads,
                   Abstract_VM_Version::parallel_worker_threads());
 
-    if (FLAG_IS_DEFAULT(MaxHeapSize)) {
-      const uint64_t reasonable_fraction =
-        os::physical_memory() / DefaultMaxRAMFraction;
-      const uint64_t maximum_size = (uint64_t)
-                 (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ?
-                     MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) :
-                     DefaultMaxRAM);
-      size_t reasonable_max =
-        (size_t) os::allocatable_physical_memory(reasonable_fraction);
-      if (reasonable_max > maximum_size) {
-        reasonable_max = maximum_size;
-      }
-      if (PrintGCDetails && Verbose) {
-        // Cannot use gclog_or_tty yet.
-        tty->print_cr("  Max heap size for server class platform "
-                      SIZE_FORMAT, reasonable_max);
-      }
-      // If the initial_heap_size has not been set with -Xms,
-      // then set it as fraction of size of physical memory
-      // respecting the maximum and minimum sizes of the heap.
-      if (initial_heap_size() == 0) {
-        const uint64_t reasonable_initial_fraction =
-          os::physical_memory() / DefaultInitialRAMFraction;
-        const size_t reasonable_initial =
-          (size_t) os::allocatable_physical_memory(reasonable_initial_fraction);
-        const size_t minimum_size = NewSize + OldSize;
-        set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max),
-                                  minimum_size));
-        // Currently the minimum size and the initial heap sizes are the same.
-        set_min_heap_size(initial_heap_size());
-        if (PrintGCDetails && Verbose) {
-          // Cannot use gclog_or_tty yet.
-          tty->print_cr("  Initial heap size for server class platform "
-                        SIZE_FORMAT, initial_heap_size());
-        }
-      } else {
-        // An minimum size was specified on the command line.  Be sure
-        // that the maximum size is consistent.
-        if (initial_heap_size() > reasonable_max) {
-          reasonable_max = initial_heap_size();
-        }
-      }
-      FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max);
-    }
-
+    // PS is a server collector, setup the heap sizes accordingly.
+    set_server_heap_size();
     // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
     // SurvivorRatio has been set, reset their default values to SurvivorRatio +
     // 2.  By doing this we make SurvivorRatio also work for Parallel Scavenger.
@@ -1307,6 +1265,70 @@
   }
 }
 
+void Arguments::set_g1_gc_flags() {
+  assert(UseG1GC, "Error");
+  // G1 is a server collector, setup the heap sizes accordingly.
+  set_server_heap_size();
+#ifdef COMPILER1
+  FastTLABRefill = false;
+#endif
+  FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads());
+  if (ParallelGCThreads == 0) {
+    FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads
+());
+  }
+  no_shared_spaces();
+}
+
+void Arguments::set_server_heap_size() {
+  if (FLAG_IS_DEFAULT(MaxHeapSize)) {
+    const uint64_t reasonable_fraction =
+      os::physical_memory() / DefaultMaxRAMFraction;
+    const uint64_t maximum_size = (uint64_t)
+                 (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ?
+                     MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) :
+                     DefaultMaxRAM);
+    size_t reasonable_max =
+      (size_t) os::allocatable_physical_memory(reasonable_fraction);
+    if (reasonable_max > maximum_size) {
+      reasonable_max = maximum_size;
+    }
+    if (PrintGCDetails && Verbose) {
+      // Cannot use gclog_or_tty yet.
+      tty->print_cr("  Max heap size for server class platform "
+                    SIZE_FORMAT, reasonable_max);
+    }
+    // If the initial_heap_size has not been set with -Xms,
+    // then set it as fraction of size of physical memory
+    // respecting the maximum and minimum sizes of the heap.
+    if (initial_heap_size() == 0) {
+      const uint64_t reasonable_initial_fraction =
+        os::physical_memory() / DefaultInitialRAMFraction;
+      const size_t reasonable_initial =
+        (size_t) os::allocatable_physical_memory(reasonable_initial_fraction);
+      const size_t minimum_size = NewSize + OldSize;
+      set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max),
+                                minimum_size));
+      // Currently the minimum size and the initial heap sizes are the same.
+      set_min_heap_size(initial_heap_size());
+      if (PrintGCDetails && Verbose) {
+        // Cannot use gclog_or_tty yet.
+        tty->print_cr("  Initial heap size for server class platform "
+                      SIZE_FORMAT, initial_heap_size());
+      }
+    } else {
+      // A minimum size was specified on the command line.  Be sure
+      // that the maximum size is consistent.
+      if (initial_heap_size() > reasonable_max) {
+        reasonable_max = initial_heap_size();
+      }
+    }
+    FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max);
+  }
+}
+
 // This must be called after ergonomics because we want bytecode rewriting
 // if the server compiler is used, or if UseSharedSpaces is disabled.
 void Arguments::set_bytecode_flags() {
@@ -1393,12 +1415,13 @@
   FLAG_SET_DEFAULT(UseConcMarkSweepGC, false);
   FLAG_SET_DEFAULT(UseParallelGC, false);
   FLAG_SET_DEFAULT(UseParallelOldGC, false);
+  FLAG_SET_DEFAULT(UseG1GC, false);
 }
 
 static bool verify_serial_gc_flags() {
   return (UseSerialGC &&
-        !(UseParNewGC || UseConcMarkSweepGC || UseParallelGC ||
-          UseParallelOldGC));
+        !(UseParNewGC || UseConcMarkSweepGC || UseG1GC ||
+          UseParallelGC || UseParallelOldGC));
 }
 
 // Check consistency of GC selection
@@ -1501,8 +1524,8 @@
   status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit");
 
   // Check user specified sharing option conflict with Parallel GC
-  bool cannot_share = (UseConcMarkSweepGC || UseParallelGC ||
-                       UseParallelOldGC || UseParNewGC ||
+  bool cannot_share = (UseConcMarkSweepGC || UseG1GC || UseParNewGC ||
+                       UseParallelGC || UseParallelOldGC ||
                        SOLARIS_ONLY(UseISM) NOT_SOLARIS(UseLargePages));
 
   if (cannot_share) {
@@ -1542,11 +1565,6 @@
                   "The CMS collector (-XX:+UseConcMarkSweepGC) must be "
                   "selected in order\nto use CMSIncrementalMode.\n");
       status = false;
-    } else if (!UseTLAB) {
-      jio_fprintf(defaultStream::error_stream(),
-                  "error:  CMSIncrementalMode requires thread-local "
-                  "allocation buffers\n(-XX:+UseTLAB).\n");
-      status = false;
     } else {
       status = status && verify_percentage(CMSIncrementalDutyCycle,
                                   "CMSIncrementalDutyCycle");
@@ -1566,13 +1584,6 @@
     }
   }
 
-  if (UseNUMA && !UseTLAB) {
-    jio_fprintf(defaultStream::error_stream(),
-                "error:  NUMA allocator (-XX:+UseNUMA) requires thread-local "
-                "allocation\nbuffers (-XX:+UseTLAB).\n");
-    status = false;
-  }
-
   // CMS space iteration, which FLSVerifyAllHeapreferences entails,
   // insists that we hold the requisite locks so that the iteration is
   // MT-safe. For the verification at start-up and shut-down, we don't
@@ -2361,10 +2372,15 @@
     SOLARIS_ONLY(FLAG_SET_DEFAULT(UseMPSS, false));
     SOLARIS_ONLY(FLAG_SET_DEFAULT(UseISM, false));
   }
+
 #else
   if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) {
     FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1);
   }
+  // Temporary disable bulk zeroing reduction with G1. See CR 6627983.
+  if (UseG1GC) {
+    FLAG_SET_DEFAULT(ReduceBulkZeroing, false);
+  }
 #endif
 
   if (!check_vm_args_consistency()) {
@@ -2519,12 +2535,29 @@
     }
   }
 
+
   // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS
   jint result = parse_vm_init_args(args);
   if (result != JNI_OK) {
     return result;
   }
 
+  // These are hacks until G1 is fully supported and tested
+  // but lets you force -XX:+UseG1GC in PRT and get it where it (mostly) works
+  if (UseG1GC) {
+    if (UseConcMarkSweepGC || UseParNewGC || UseParallelGC || UseParallelOldGC || UseSerialGC) {
+#ifndef PRODUCT
+      tty->print_cr("-XX:+UseG1GC is incompatible with other collectors, using UseG1GC");
+#endif // PRODUCT
+      UseConcMarkSweepGC = false;
+      UseParNewGC        = false;
+      UseParallelGC      = false;
+      UseParallelOldGC   = false;
+      UseSerialGC        = false;
+    }
+    no_shared_spaces();
+  }
+
 #ifndef PRODUCT
   if (TraceBytecodesAt != 0) {
     TraceBytecodes = true;
@@ -2570,6 +2603,12 @@
     // Set some flags for ParNew
     set_parnew_gc_flags();
   }
+  // Temporary; make the "if" an "else-if" before
+  // we integrate G1. XXX
+  if (UseG1GC) {
+    // Set some flags for garbage-first, if needed.
+    set_g1_gc_flags();
+  }
 
 #ifdef SERIALGC
   assert(verify_serial_gc_flags(), "SerialGC unset");
--- a/hotspot/src/share/vm/runtime/arguments.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -294,10 +294,14 @@
   // CMS/ParNew garbage collectors
   static void set_parnew_gc_flags();
   static void set_cms_and_parnew_gc_flags();
-  // UseParallelGC
+  // UseParallel[Old]GC
   static void set_parallel_gc_flags();
+  // Garbage-First (UseG1GC)
+  static void set_g1_gc_flags();
   // GC ergonomics
   static void set_ergonomics_flags();
+  // Setup heap size for a server platform
+  static void set_server_heap_size();
   // Based on automatic selection criteria, should the
   // low pause collector be used.
   static bool should_auto_select_low_pause_collector();
--- a/hotspot/src/share/vm/runtime/globals.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/globals.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -28,7 +28,8 @@
 
 RUNTIME_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \
               MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \
-              MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG, \
+              MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \
+              MATERIALIZE_NOTPRODUCT_FLAG, \
               MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG, \
               MATERIALIZE_LP64_PRODUCT_FLAG)
 
@@ -37,12 +38,16 @@
                  MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
 
 bool Flag::is_unlocker() const {
-  return strcmp(name, "UnlockDiagnosticVMOptions") == 0;
+  return strcmp(name, "UnlockDiagnosticVMOptions") == 0     ||
+         strcmp(name, "UnlockExperimentalVMOptions") == 0;
+
 }
 
 bool Flag::is_unlocked() const {
   if (strcmp(kind, "{diagnostic}") == 0) {
     return UnlockDiagnosticVMOptions;
+  } else if (strcmp(kind, "{experimental}") == 0) {
+    return UnlockExperimentalVMOptions;
   } else {
     return true;
   }
@@ -125,6 +130,7 @@
 #define RUNTIME_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{product}", DEFAULT },
 #define RUNTIME_PD_PRODUCT_FLAG_STRUCT(type, name, doc)     { #type, XSTR(name), &name, "{pd product}", DEFAULT },
 #define RUNTIME_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{diagnostic}", DEFAULT },
+#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{experimental}", DEFAULT },
 #define RUNTIME_MANAGEABLE_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{manageable}", DEFAULT },
 #define RUNTIME_PRODUCT_RW_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{product rw}", DEFAULT },
 
@@ -172,8 +178,11 @@
 
 
 static Flag flagTable[] = {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT)
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_EXPERIMENTAL_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT)
  RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT)
+#ifndef SERIALGC
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_EXPERIMENTAL_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT)
+#endif // SERIALGC
 #ifdef COMPILER1
  C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, C1_PD_DEVELOP_FLAG_STRUCT, C1_PRODUCT_FLAG_STRUCT, C1_PD_PRODUCT_FLAG_STRUCT, C1_NOTPRODUCT_FLAG_STRUCT)
 #endif
@@ -196,7 +205,8 @@
   for (Flag* current = &flagTable[0]; current->name; current++) {
     if (str_equal(current->name, name, length)) {
       if (!(current->is_unlocked() || current->is_unlocker())) {
-        // disable use of diagnostic flags until they are unlocked
+        // disable use of diagnostic or experimental flags until they
+        // are explicitly unlocked
         return NULL;
       }
       return current;
@@ -355,8 +365,11 @@
   if (result == NULL) return false;
   if (!result->is_ccstr()) return false;
   ccstr old_value = result->get_ccstr();
-  char* new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
-  strcpy(new_value, *value);
+  char* new_value = NULL;
+  if (*value != NULL) {
+    new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
+    strcpy(new_value, *value);
+  }
   result->set_ccstr(new_value);
   if (result->origin == DEFAULT && old_value != NULL) {
     // Prior value is NOT heap allocated, but was a literal constant.
--- a/hotspot/src/share/vm/runtime/globals.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -255,7 +255,19 @@
 // diagnostic information about VM problems.  To use a VM diagnostic
 // option, you must first specify +UnlockDiagnosticVMOptions.
 // (This master switch also affects the behavior of -Xprintflags.)
-
+//
+// experimental flags are in support of features that are not
+//    part of the officially supported product, but are available
+//    for experimenting with. They could, for example, be performance
+//    features that may not have undergone full or rigorous QA, but which may
+//    help performance in some cases and released for experimentation
+//    by the community of users and developers. This flag also allows one to
+//    be able to build a fully supported product that nonetheless also
+//    ships with some unsupported, lightly tested, experimental features.
+//    Like the UnlockDiagnosticVMOptions flag above, there is a corresponding
+//    UnlockExperimentalVMOptions flag, which allows the control and
+//    modification of the experimental flags.
+//
 // manageable flags are writeable external product flags.
 //    They are dynamically writeable through the JDK management interface
 //    (com.sun.management.HotSpotDiagnosticMXBean API) and also through JConsole.
@@ -285,7 +297,7 @@
 // Note that when there is a need to support develop flags to be writeable,
 // it can be done in the same way as product_rw.
 
-#define RUNTIME_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, manageable, product_rw, lp64_product) \
+#define RUNTIME_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw, lp64_product) \
                                                                             \
   lp64_product(bool, UseCompressedOops, false,                              \
             "Use 32-bit object references in 64-bit VM. "                   \
@@ -307,7 +319,10 @@
           "Prints flags that appeared on the command line")                 \
                                                                             \
   diagnostic(bool, UnlockDiagnosticVMOptions, trueInDebug,                  \
-          "Enable processing of flags relating to field diagnostics")       \
+          "Enable normal processing of flags relating to field diagnostics")\
+                                                                            \
+  experimental(bool, UnlockExperimentalVMOptions, false,                    \
+          "Enable normal processing of flags relating to experimental features")\
                                                                             \
   product(bool, JavaMonitorsInStackTrace, true,                             \
           "Print info. about Java monitor locks when the stacks are dumped")\
@@ -315,6 +330,12 @@
   product_pd(bool, UseLargePages,                                           \
           "Use large page memory")                                          \
                                                                             \
+  product_pd(bool, UseLargePagesIndividualAllocation,                       \
+          "Allocate large pages individually for better affinity")          \
+                                                                            \
+  develop(bool, LargePagesIndividualAllocationInjectError, false,           \
+          "Fail large pages individual allocation")                         \
+                                                                            \
   develop(bool, TracePageSizes, false,                                      \
           "Trace page size selection and usage.")                           \
                                                                             \
@@ -692,7 +713,7 @@
   diagnostic(bool, PrintAssembly, false,                                    \
           "Print assembly code (using external disassembler.so)")           \
                                                                             \
-  diagnostic(ccstr, PrintAssemblyOptions, false,                            \
+  diagnostic(ccstr, PrintAssemblyOptions, NULL,                             \
           "Options string passed to disassembler.so")                       \
                                                                             \
   diagnostic(bool, PrintNMethods, false,                                    \
@@ -833,7 +854,7 @@
           "Use LWP-based instead of libthread-based synchronization "       \
           "(SPARC only)")                                                   \
                                                                             \
-  product(ccstr, SyncKnobs, "",                                             \
+  product(ccstr, SyncKnobs, NULL,                                           \
           "(Unstable) Various monitor synchronization tunables")            \
                                                                             \
   product(intx, EmitSync, 0,                                                \
@@ -976,6 +997,12 @@
   product(bool, UseXmmI2F, false,                                           \
           "Use SSE2 CVTDQ2PS instruction to convert Integer to Float")      \
                                                                             \
+  product(bool, UseXMMForArrayCopy, false,                                  \
+          "Use SSE2 MOVQ instruction for Arraycopy")                        \
+                                                                            \
+  product(bool, UseUnalignedLoadStores, false,                              \
+          "Use SSE2 MOVDQU instruction for Arraycopy")                      \
+                                                                            \
   product(intx, FieldsAllocationStyle, 1,                                   \
           "0 - type based with oops first, 1 - with oops last")             \
                                                                             \
@@ -1017,7 +1044,7 @@
   notproduct(bool, TraceJVMCalls, false,                                    \
           "Trace JVM calls")                                                \
                                                                             \
-  product(ccstr, TraceJVMTI, "",                                            \
+  product(ccstr, TraceJVMTI, NULL,                                          \
           "Trace flags for JVMTI functions and events")                     \
                                                                             \
   /* This option can change an EMCP method into an obsolete method. */      \
@@ -1124,7 +1151,10 @@
   /* gc */                                                                  \
                                                                             \
   product(bool, UseSerialGC, false,                                         \
-          "Tells whether the VM should use serial garbage collector")       \
+          "Use the serial garbage collector")                               \
+                                                                            \
+  experimental(bool, UseG1GC, false,                                        \
+          "Use the Garbage-First garbage collector")                        \
                                                                             \
   product(bool, UseParallelGC, false,                                       \
           "Use the Parallel Scavenge garbage collector")                    \
@@ -1139,10 +1169,6 @@
           "In the Parallel Old garbage collector use parallel dense"        \
           " prefix update")                                                 \
                                                                             \
-  develop(bool, UseParallelOldGCChunkPointerCalc, true,                     \
-          "In the Parallel Old garbage collector use chucks to calculate"   \
-          " new object locations")                                          \
-                                                                            \
   product(uintx, HeapMaximumCompactionInterval, 20,                         \
           "How often should we maximally compact the heap (not allowing "   \
           "any dead space)")                                                \
@@ -1171,21 +1197,17 @@
   product(uintx, ParallelCMSThreads, 0,                                     \
           "Max number of threads CMS will use for concurrent work")         \
                                                                             \
-  develop(bool, VerifyParallelOldWithMarkSweep, false,                      \
-          "Use the MarkSweep code to verify phases of Parallel Old")        \
-                                                                            \
-  develop(uintx, VerifyParallelOldWithMarkSweepInterval, 1,                 \
-          "Interval at which the MarkSweep code is used to verify "         \
-          "phases of Parallel Old")                                         \
-                                                                            \
   develop(bool, ParallelOldMTUnsafeMarkBitMap, false,                       \
           "Use the Parallel Old MT unsafe in marking the bitmap")           \
                                                                             \
   develop(bool, ParallelOldMTUnsafeUpdateLiveData, false,                   \
           "Use the Parallel Old MT unsafe in update of live size")          \
                                                                             \
-  develop(bool, TraceChunkTasksQueuing, false,                              \
-          "Trace the queuing of the chunk tasks")                           \
+  develop(bool, TraceRegionTasksQueuing, false,                             \
+          "Trace the queuing of the region tasks")                          \
+                                                                            \
+  product(uintx, ParallelMarkingThreads, 0,                                 \
+          "Number of marking threads concurrent gc will use")               \
                                                                             \
   product(uintx, YoungPLABSize, 4096,                                       \
           "Size of young gen promotion labs (in HeapWords)")                \
@@ -1283,6 +1305,12 @@
           "The amount of young gen chosen by default per GC worker "        \
           "thread available ")                                              \
                                                                             \
+  product(bool, GCOverheadReporting, false,                                 \
+         "Enables the GC overhead reporting facility")                      \
+                                                                            \
+  product(intx, GCOverheadReportingPeriodMS, 100,                           \
+          "Reporting period for conc GC overhead reporting, in ms ")        \
+                                                                            \
   product(bool, CMSIncrementalMode, false,                                  \
           "Whether CMS GC should operate in \"incremental\" mode")          \
                                                                             \
@@ -1611,6 +1639,9 @@
   product(bool, ZeroTLAB, false,                                            \
           "Zero out the newly created TLAB")                                \
                                                                             \
+  product(bool, FastTLABRefill, true,                                       \
+          "Use fast TLAB refill code")                                      \
+                                                                            \
   product(bool, PrintTLAB, false,                                           \
           "Print various TLAB related information")                         \
                                                                             \
@@ -1800,6 +1831,9 @@
   diagnostic(bool, VerifyDuringGC, false,                                   \
           "Verify memory system during GC (between phases)")                \
                                                                             \
+  diagnostic(bool, GCParallelVerificationEnabled, true,                     \
+          "Enable parallel memory system verification")                     \
+                                                                            \
   diagnostic(bool, VerifyRememberedSets, false,                             \
           "Verify GC remembered sets")                                      \
                                                                             \
@@ -2527,7 +2561,7 @@
   develop(intx, MaxRecursiveInlineLevel, 1,                                 \
           "maximum number of nested recursive calls that are inlined")      \
                                                                             \
-  develop(intx, InlineSmallCode, 1000,                                      \
+  product(intx, InlineSmallCode, 1000,                                      \
           "Only inline already compiled methods if their code size is "     \
           "less than this")                                                 \
                                                                             \
@@ -2811,6 +2845,12 @@
           "how many entries we'll try to leave on the stack during "        \
           "parallel GC")                                                    \
                                                                             \
+  product(intx, DCQBarrierQueueBufferSize, 256,                             \
+          "Number of elements in a dirty card queue buffer")                \
+                                                                            \
+  product(intx, DCQBarrierProcessCompletedThreshold, 5,                     \
+          "Number of completed dirty card buffers to trigger processing.")  \
+                                                                            \
   /* stack parameters */                                                    \
   product_pd(intx, StackYellowPages,                                        \
           "Number of yellow zone (recoverable overflows) pages")            \
@@ -3232,6 +3272,7 @@
 #define DECLARE_PRODUCT_FLAG(type, name, value, doc)    extern "C" type name;
 #define DECLARE_PD_PRODUCT_FLAG(type, name, doc)        extern "C" type name;
 #define DECLARE_DIAGNOSTIC_FLAG(type, name, value, doc) extern "C" type name;
+#define DECLARE_EXPERIMENTAL_FLAG(type, name, value, doc) extern "C" type name;
 #define DECLARE_MANAGEABLE_FLAG(type, name, value, doc) extern "C" type name;
 #define DECLARE_PRODUCT_RW_FLAG(type, name, value, doc) extern "C" type name;
 #ifdef PRODUCT
@@ -3254,6 +3295,7 @@
 #define MATERIALIZE_PRODUCT_FLAG(type, name, value, doc)   type name = value;
 #define MATERIALIZE_PD_PRODUCT_FLAG(type, name, doc)       type name = pd_##name;
 #define MATERIALIZE_DIAGNOSTIC_FLAG(type, name, value, doc) type name = value;
+#define MATERIALIZE_EXPERIMENTAL_FLAG(type, name, value, doc) type name = value;
 #define MATERIALIZE_MANAGEABLE_FLAG(type, name, value, doc) type name = value;
 #define MATERIALIZE_PRODUCT_RW_FLAG(type, name, value, doc) type name = value;
 #ifdef PRODUCT
@@ -3271,6 +3313,6 @@
 #define MATERIALIZE_LP64_PRODUCT_FLAG(type, name, value, doc) /* flag is constant */
 #endif // _LP64
 
-RUNTIME_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG, DECLARE_LP64_PRODUCT_FLAG)
+RUNTIME_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG, DECLARE_LP64_PRODUCT_FLAG)
 
 RUNTIME_OS_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
--- a/hotspot/src/share/vm/runtime/globals_extension.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/globals_extension.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -30,6 +30,7 @@
 #define RUNTIME_PRODUCT_FLAG_MEMBER(type, name, value, doc)    FLAG_MEMBER(name),
 #define RUNTIME_PD_PRODUCT_FLAG_MEMBER(type, name, doc)        FLAG_MEMBER(name),
 #define RUNTIME_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
+#define RUNTIME_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
 #define RUNTIME_MANAGEABLE_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
 #define RUNTIME_PRODUCT_RW_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
 #ifdef PRODUCT
@@ -74,21 +75,16 @@
 #endif
 
 typedef enum {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER,
-               RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER,
-               RUNTIME_PRODUCT_RW_FLAG_MEMBER,
-               RUNTIME_LP64_PRODUCT_FLAG_MEMBER)
- RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER,
-               RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER)
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_EXPERIMENTAL_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER, RUNTIME_LP64_PRODUCT_FLAG_MEMBER)
+ RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER)
+#ifndef KERNEL
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_EXPERIMENTAL_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER)
+#endif
 #ifdef COMPILER1
- C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER,
-          C1_NOTPRODUCT_FLAG_MEMBER)
+ C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, C1_NOTPRODUCT_FLAG_MEMBER)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER,
-          C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
+ C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
 #endif
  NUM_CommandLineFlag
 } CommandLineFlag;
@@ -100,6 +96,7 @@
 #define RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)    FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc)        FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
+#define RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
 #ifdef PRODUCT
@@ -144,24 +141,47 @@
 #endif
 
 typedef enum {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_LP64_PRODUCT_FLAG_MEMBER_WITH_TYPE)
-RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+#ifndef KERNEL
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE)
+#endif // KERNEL
 #ifdef COMPILER1
- C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-          C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C1_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-          C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C2_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
  NUM_CommandLineFlagWithType
 } CommandLineFlagWithType;
--- a/hotspot/src/share/vm/runtime/mutexLocker.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/mutexLocker.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -47,7 +47,7 @@
 Monitor* JNICritical_lock             = NULL;
 Mutex*   JvmtiThreadState_lock        = NULL;
 Monitor* JvmtiPendingEvent_lock       = NULL;
-Mutex*   Heap_lock                    = NULL;
+Monitor* Heap_lock                    = NULL;
 Mutex*   ExpandHeap_lock              = NULL;
 Mutex*   AdapterHandlerLibrary_lock   = NULL;
 Mutex*   SignatureHandlerLibrary_lock = NULL;
@@ -67,7 +67,18 @@
 Monitor* SLT_lock                     = NULL;
 Monitor* iCMS_lock                    = NULL;
 Monitor* FullGCCount_lock             = NULL;
+Monitor* CMark_lock                   = NULL;
+Monitor* ZF_mon                       = NULL;
+Monitor* Cleanup_mon                  = NULL;
+Monitor* G1ConcRefine_mon             = NULL;
+Mutex*   SATB_Q_FL_lock               = NULL;
+Monitor* SATB_Q_CBL_mon               = NULL;
+Mutex*   Shared_SATB_Q_lock           = NULL;
+Mutex*   DirtyCardQ_FL_lock           = NULL;
+Monitor* DirtyCardQ_CBL_mon           = NULL;
+Mutex*   Shared_DirtyCardQ_lock       = NULL;
 Mutex*   ParGCRareEvent_lock          = NULL;
+Mutex*   EvacFailureStack_lock        = NULL;
 Mutex*   DerivedPointerTableGC_lock   = NULL;
 Mutex*   Compile_lock                 = NULL;
 Monitor* MethodCompileQueue_lock      = NULL;
@@ -102,6 +113,9 @@
 Mutex*   PerfDataManager_lock         = NULL;
 Mutex*   OopMapCacheAlloc_lock        = NULL;
 
+Mutex*   MMUTracker_lock              = NULL;
+Mutex*   HotCardCache_lock            = NULL;
+
 Monitor* GCTaskManager_lock           = NULL;
 
 Mutex*   Management_lock              = NULL;
@@ -150,6 +164,23 @@
     def(iCMS_lock                  , Monitor, special,     true ); // CMS incremental mode start/stop notification
     def(FullGCCount_lock           , Monitor, leaf,        true ); // in support of ExplicitGCInvokesConcurrent
   }
+  if (UseG1GC) {
+    def(CMark_lock                 , Monitor, nonleaf,     true ); // coordinate concurrent mark thread
+    def(ZF_mon                     , Monitor, leaf,        true );
+    def(Cleanup_mon                , Monitor, nonleaf,     true );
+    def(G1ConcRefine_mon           , Monitor, nonleaf,     true );
+    def(SATB_Q_FL_lock             , Mutex  , special,     true );
+    def(SATB_Q_CBL_mon             , Monitor, nonleaf,     true );
+    def(Shared_SATB_Q_lock         , Mutex,   nonleaf,     true );
+
+    def(DirtyCardQ_FL_lock         , Mutex  , special,     true );
+    def(DirtyCardQ_CBL_mon         , Monitor, nonleaf,     true );
+    def(Shared_DirtyCardQ_lock     , Mutex,   nonleaf,     true );
+
+    def(MMUTracker_lock            , Mutex  , leaf     ,   true );
+    def(HotCardCache_lock          , Mutex  , special  ,   true );
+    def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
+  }
   def(ParGCRareEvent_lock          , Mutex  , leaf     ,   true );
   def(DerivedPointerTableGC_lock   , Mutex,   leaf,        true );
   def(CodeCache_lock               , Mutex  , special,     true );
@@ -203,7 +234,7 @@
     def(SLT_lock                   , Monitor, nonleaf,     false );
                     // used in CMS GC for locking PLL lock
   }
-  def(Heap_lock                    , Mutex  , nonleaf+1,   false);
+  def(Heap_lock                    , Monitor, nonleaf+1,   false);
   def(JfieldIdCreation_lock        , Mutex  , nonleaf+1,   true ); // jfieldID, Used in VM_Operation
   def(JNICachedItableIndex_lock    , Mutex  , nonleaf+1,   false); // Used to cache an itable index during JNI invoke
 
--- a/hotspot/src/share/vm/runtime/mutexLocker.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/mutexLocker.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -38,7 +38,7 @@
 extern Monitor* JNICritical_lock;                // a lock used while entering and exiting JNI critical regions, allows GC to sometimes get in
 extern Mutex*   JvmtiThreadState_lock;           // a lock on modification of JVMTI thread data
 extern Monitor* JvmtiPendingEvent_lock;          // a lock on the JVMTI pending events list
-extern Mutex*   Heap_lock;                       // a lock on the heap
+extern Monitor* Heap_lock;                       // a lock on the heap
 extern Mutex*   ExpandHeap_lock;                 // a lock on expanding the heap
 extern Mutex*   AdapterHandlerLibrary_lock;      // a lock on the AdapterHandlerLibrary
 extern Mutex*   SignatureHandlerLibrary_lock;    // a lock on the SignatureHandlerLibrary
@@ -60,8 +60,30 @@
 extern Monitor* SLT_lock;                        // used in CMS GC for acquiring PLL
 extern Monitor* iCMS_lock;                       // CMS incremental mode start/stop notification
 extern Monitor* FullGCCount_lock;                // in support of "concurrent" full gc
+extern Monitor* CMark_lock;                      // used for concurrent mark thread coordination
+extern Monitor* ZF_mon;                          // used for G1 conc zero-fill.
+extern Monitor* Cleanup_mon;                     // used for G1 conc cleanup.
+extern Monitor* G1ConcRefine_mon;                // used for G1 conc-refine
+                                                 // coordination.
+
+extern Mutex*   SATB_Q_FL_lock;                  // Protects SATB Q
+                                                 // buffer free list.
+extern Monitor* SATB_Q_CBL_mon;                  // Protects SATB Q
+                                                 // completed buffer queue.
+extern Mutex*   Shared_SATB_Q_lock;              // Lock protecting SATB
+                                                 // queue shared by
+                                                 // non-Java threads.
+
+extern Mutex*   DirtyCardQ_FL_lock;              // Protects dirty card Q
+                                                 // buffer free list.
+extern Monitor* DirtyCardQ_CBL_mon;              // Protects dirty card Q
+                                                 // completed buffer queue.
+extern Mutex*   Shared_DirtyCardQ_lock;          // Lock protecting dirty card
+                                                 // queue shared by
+                                                 // non-Java threads.
                                                  // (see option ExplicitGCInvokesConcurrent)
 extern Mutex*   ParGCRareEvent_lock;             // Synchronizes various (rare) parallel GC ops.
+extern Mutex*   EvacFailureStack_lock;           // guards the evac failure scan stack
 extern Mutex*   Compile_lock;                    // a lock held when Compilation is updating code (used to block CodeCache traversal, CHA updates, etc)
 extern Monitor* MethodCompileQueue_lock;         // a lock held when method compilations are enqueued, dequeued
 #ifdef TIERED
@@ -93,6 +115,10 @@
 extern Mutex*   ParkerFreeList_lock;
 extern Mutex*   OopMapCacheAlloc_lock;           // protects allocation of oop_map caches
 
+extern Mutex*   MMUTracker_lock;                 // protects the MMU
+                                                 // tracker data structures
+extern Mutex*   HotCardCache_lock;               // protects the hot card cache
+
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* LowMemory_lock;                  // a lock used for low memory detection
 
--- a/hotspot/src/share/vm/runtime/os.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -105,6 +105,18 @@
   static jlong elapsed_counter();
   static jlong elapsed_frequency();
 
+  // The "virtual time" of a thread is the amount of time a thread has
+  // actually run.  The first function indicates whether the OS supports
+  // this functionality for the current thread, and if so:
+  //   * the second enables vtime tracking (if that is required).
+  //   * the third tells whether vtime is enabled.
+  //   * the fourth returns the elapsed virtual time for the current
+  //     thread.
+  static bool supports_vtime();
+  static bool enable_vtime();
+  static bool vtime_enabled();
+  static double elapsedVTime();
+
   // Return current local time in a string (YYYY-MM-DD HH:MM:SS).
   // It is MT safe, but not async-safe, as reading time zone
   // information may require a lock on some platforms.
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -111,6 +111,25 @@
 }
 #endif // PRODUCT
 
+#ifndef SERIALGC
+
+// G1 write-barrier pre: executed before a pointer store.
+JRT_LEAF(void, SharedRuntime::g1_wb_pre(oopDesc* orig, JavaThread *thread))
+  if (orig == NULL) {
+    assert(false, "should be optimized out");
+    return;
+  }
+  // store the original value that was in the field reference
+  thread->satb_mark_queue().enqueue(orig);
+JRT_END
+
+// G1 write-barrier post: executed after a pointer store.
+JRT_LEAF(void, SharedRuntime::g1_wb_post(void* card_addr, JavaThread* thread))
+  thread->dirty_card_queue().enqueue(card_addr);
+JRT_END
+
+#endif // !SERIALGC
+
 
 JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x))
   return x * y;
--- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -99,6 +99,12 @@
   static address raw_exception_handler_for_return_address(address return_address);
   static address exception_handler_for_return_address(address return_address);
 
+#ifndef SERIALGC
+  // G1 write barriers
+  static void g1_wb_pre(oopDesc* orig, JavaThread *thread);
+  static void g1_wb_post(void* card_addr, JavaThread* thread);
+#endif // !SERIALGC
+
   // exception handling and implicit exceptions
   static address compute_compiled_exc_handler(nmethod* nm, address ret_pc, Handle& exception,
                                               bool force_unwind, bool top_frame_only);
--- a/hotspot/src/share/vm/runtime/task.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/task.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -67,7 +67,6 @@
 
 PeriodicTask::PeriodicTask(size_t interval_time) :
   _counter(0), _interval(interval_time) {
-  assert(is_init_completed(), "Periodic tasks should not start during VM initialization");
   // Sanity check the interval time
   assert(_interval >= PeriodicTask::min_interval &&
          _interval <= PeriodicTask::max_interval &&
--- a/hotspot/src/share/vm/runtime/thread.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -1138,6 +1138,10 @@
 
 void JavaThread::initialize() {
   // Initialize fields
+
+  // Set the claimed par_id to -1 (ie not claiming any par_ids)
+  set_claimed_par_id(-1);
+
   set_saved_exception_pc(NULL);
   set_threadObj(NULL);
   _anchor.clear();
@@ -1209,7 +1213,18 @@
   pd_initialize();
 }
 
-JavaThread::JavaThread(bool is_attaching) : Thread() {
+#ifndef SERIALGC
+SATBMarkQueueSet JavaThread::_satb_mark_queue_set;
+DirtyCardQueueSet JavaThread::_dirty_card_queue_set;
+#endif // !SERIALGC
+
+JavaThread::JavaThread(bool is_attaching) :
+  Thread()
+#ifndef SERIALGC
+  , _satb_mark_queue(&_satb_mark_queue_set),
+  _dirty_card_queue(&_dirty_card_queue_set)
+#endif // !SERIALGC
+{
   initialize();
   _is_attaching = is_attaching;
 }
@@ -1255,7 +1270,13 @@
 // Remove this ifdef when C1 is ported to the compiler interface.
 static void compiler_thread_entry(JavaThread* thread, TRAPS);
 
-JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : Thread() {
+JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) :
+  Thread()
+#ifndef SERIALGC
+  , _satb_mark_queue(&_satb_mark_queue_set),
+  _dirty_card_queue(&_dirty_card_queue_set)
+#endif // !SERIALGC
+{
   if (TraceThreadEvents) {
     tty->print_cr("creating thread %p", this);
   }
@@ -2964,10 +2985,6 @@
       if (UseStringCache) {
         // Forcibly initialize java/lang/String and mutate the private
         // static final "stringCacheEnabled" field before we start creating instances
-#ifdef ASSERT
-        klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0);
-        assert(tmp_k == NULL, "java/lang/String should not be loaded yet");
-#endif
         klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0);
         KlassHandle k = KlassHandle(THREAD, k_o);
         guarantee(k.not_null(), "Must find java/lang/String");
@@ -3071,9 +3088,14 @@
 
 #ifndef SERIALGC
   // Support for ConcurrentMarkSweep. This should be cleaned up
-  // and better encapsulated. XXX YSR
-  if (UseConcMarkSweepGC) {
-    ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD);
+  // and better encapsulated. The ugly nested if test would go away
+  // once things are properly refactored. XXX YSR
+  if (UseConcMarkSweepGC || UseG1GC) {
+    if (UseConcMarkSweepGC) {
+      ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD);
+    } else {
+      ConcurrentMarkThread::makeSurrogateLockerThread(THREAD);
+    }
     if (HAS_PENDING_EXCEPTION) {
       vm_exit_during_initialization(Handle(THREAD, PENDING_EXCEPTION));
     }
--- a/hotspot/src/share/vm/runtime/thread.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/thread.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -783,6 +783,18 @@
   }   _jmp_ring[ jump_ring_buffer_size ];
 #endif /* PRODUCT */
 
+#ifndef SERIALGC
+  // Support for G1 barriers
+
+  ObjPtrQueue _satb_mark_queue;          // Thread-local log for SATB barrier.
+  // Set of all such queues.
+  static SATBMarkQueueSet _satb_mark_queue_set;
+
+  DirtyCardQueue _dirty_card_queue;      // Thread-local log for dirty cards.
+  // Set of all such queues.
+  static DirtyCardQueueSet _dirty_card_queue_set;
+#endif // !SERIALGC
+
   friend class VMThread;
   friend class ThreadWaitTransition;
   friend class VM_Exit;
@@ -1168,6 +1180,11 @@
 
   static ByteSize do_not_unlock_if_synchronized_offset() { return byte_offset_of(JavaThread, _do_not_unlock_if_synchronized); }
 
+#ifndef SERIALGC
+  static ByteSize satb_mark_queue_offset()       { return byte_offset_of(JavaThread, _satb_mark_queue); }
+  static ByteSize dirty_card_queue_offset()      { return byte_offset_of(JavaThread, _dirty_card_queue); }
+#endif // !SERIALGC
+
   // Returns the jni environment for this thread
   JNIEnv* jni_environment()                      { return &_jni_environment; }
 
@@ -1414,6 +1431,20 @@
     _stack_size_at_create = value;
   }
 
+#ifndef SERIALGC
+  // SATB marking queue support
+  ObjPtrQueue& satb_mark_queue() { return _satb_mark_queue; }
+  static SATBMarkQueueSet& satb_mark_queue_set() {
+    return _satb_mark_queue_set;
+  }
+
+  // Dirty card queue support
+  DirtyCardQueue& dirty_card_queue() { return _dirty_card_queue; }
+  static DirtyCardQueueSet& dirty_card_queue_set() {
+    return _dirty_card_queue_set;
+  }
+#endif // !SERIALGC
+
   // Machine dependent stuff
   #include "incls/_thread_pd.hpp.incl"
 
@@ -1445,6 +1476,14 @@
   // clearing/querying jni attach status
   bool is_attaching() const { return _is_attaching; }
   void set_attached() { _is_attaching = false; OrderAccess::fence(); }
+private:
+  // This field is used to determine if a thread has claimed
+  // a par_id: it is -1 if the thread has not claimed a par_id;
+  // otherwise its value is the par_id that has been claimed.
+  int _claimed_par_id;
+public:
+  int get_claimed_par_id() { return _claimed_par_id; }
+  void set_claimed_par_id(int id) { _claimed_par_id = id;}
 };
 
 // Inline implementation of JavaThread::current
--- a/hotspot/src/share/vm/runtime/virtualspace.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/virtualspace.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -251,24 +251,16 @@
       // increase size to a multiple of the desired alignment
       size = align_size_up(size, alignment);
       size_t extra_size = size + alignment;
-      char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
-      if (extra_base == NULL) return;
-      // Do manual alignement
-      base = (char*) align_size_up((uintptr_t) extra_base, alignment);
-      assert(base >= extra_base, "just checking");
-      // Release unused areas
-      size_t unused_bottom_size = base - extra_base;
-      size_t unused_top_size = extra_size - size - unused_bottom_size;
-      assert(unused_bottom_size % os::vm_allocation_granularity() == 0,
-             "size not allocation aligned");
-      assert(unused_top_size % os::vm_allocation_granularity() == 0,
-             "size not allocation aligned");
-      if (unused_bottom_size > 0) {
-        os::release_memory(extra_base, unused_bottom_size);
-      }
-      if (unused_top_size > 0) {
-        os::release_memory(base + size, unused_top_size);
-      }
+      do {
+        char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+        if (extra_base == NULL) return;
+        // Do manual alignement
+        base = (char*) align_size_up((uintptr_t) extra_base, alignment);
+        assert(base >= extra_base, "just checking");
+        // Re-reserve the region at the aligned base address.
+        os::release_memory(extra_base, extra_size);
+        base = os::reserve_memory(size, base);
+      } while (base == NULL);
     }
   }
   // Done
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -302,7 +302,7 @@
   nonstatic_field(CardTableModRefBS,           _guard_region,                                 MemRegion)                             \
   nonstatic_field(CardTableModRefBS,           byte_map_base,                                 jbyte*)                                \
                                                                                                                                      \
-  nonstatic_field(CardTableRS,                 _ct_bs,                                        CardTableModRefBS)                     \
+  nonstatic_field(CardTableRS,                 _ct_bs,                                        CardTableModRefBSForCTRS*)             \
                                                                                                                                      \
   nonstatic_field(CollectedHeap,               _reserved,                                     MemRegion)                             \
   nonstatic_field(SharedHeap,                  _perm_gen,                                     PermGen*)                              \
@@ -995,6 +995,7 @@
   declare_toplevel_type(BarrierSet)                                       \
            declare_type(ModRefBarrierSet,             BarrierSet)         \
            declare_type(CardTableModRefBS,            ModRefBarrierSet)   \
+           declare_type(CardTableModRefBSForCTRS,     CardTableModRefBS)  \
   declare_toplevel_type(GenRemSet)                                        \
            declare_type(CardTableRS,                  GenRemSet)          \
   declare_toplevel_type(BlockOffsetSharedArray)                           \
@@ -1022,6 +1023,10 @@
   declare_toplevel_type(BlockOffsetSharedArray*)                          \
   declare_toplevel_type(GenRemSet*)                                       \
   declare_toplevel_type(CardTableRS*)                                     \
+  declare_toplevel_type(CardTableModRefBS*)                               \
+  declare_toplevel_type(CardTableModRefBS**)                              \
+  declare_toplevel_type(CardTableModRefBSForCTRS*)                        \
+  declare_toplevel_type(CardTableModRefBSForCTRS**)                       \
   declare_toplevel_type(CollectedHeap*)                                   \
   declare_toplevel_type(ContiguousSpace*)                                 \
   declare_toplevel_type(DefNewGeneration*)                                \
--- a/hotspot/src/share/vm/runtime/vm_operations.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/runtime/vm_operations.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -53,8 +53,13 @@
   template(ParallelGCFailedAllocation)            \
   template(ParallelGCFailedPermanentAllocation)   \
   template(ParallelGCSystemGC)                    \
+  template(CGC_Operation)                         \
   template(CMS_Initial_Mark)                      \
   template(CMS_Final_Remark)                      \
+  template(G1CollectFull)                         \
+  template(G1CollectForAllocation)                \
+  template(G1IncCollectionPause)                  \
+  template(G1PopRegionCollectionPause)            \
   template(EnableBiasedLocking)                   \
   template(RevokeBias)                            \
   template(BulkRevokeBias)                        \
--- a/hotspot/src/share/vm/services/heapDumper.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/services/heapDumper.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -343,7 +343,8 @@
 
 // Default stack trace ID (used for dummy HPROF_TRACE record)
 enum {
-  STACK_TRACE_ID = 1
+  STACK_TRACE_ID = 1,
+  INITIAL_CLASS_COUNT = 200
 };
 
 
@@ -408,6 +409,7 @@
   void write_u8(u8 x);
   void write_objectID(oop o);
   void write_classID(Klass* k);
+  void write_id(u4 x);
 };
 
 DumpWriter::DumpWriter(const char* path) {
@@ -548,6 +550,14 @@
 #endif
 }
 
+void DumpWriter::write_id(u4 x) {
+#ifdef _LP64
+  write_u8((u8) x);
+#else
+  write_u4(x);
+#endif
+}
+
 // We use java mirror as the class ID
 void DumpWriter::write_classID(Klass* k) {
   write_objectID(k->java_mirror());
@@ -596,6 +606,8 @@
   static void dump_object_array(DumpWriter* writer, objArrayOop array);
   // creates HPROF_GC_PRIM_ARRAY_DUMP record for the given type array
   static void dump_prim_array(DumpWriter* writer, typeArrayOop array);
+  // create HPROF_FRAME record for the given method and bci
+  static void dump_stack_frame(DumpWriter* writer, int frame_serial_num, int class_serial_num, methodOop m, int bci);
 };
 
 // write a header of the given type
@@ -1070,6 +1082,29 @@
   }
 }
 
+// create a HPROF_FRAME record of the given methodOop and bci
+void DumperSupport::dump_stack_frame(DumpWriter* writer,
+                                     int frame_serial_num,
+                                     int class_serial_num,
+                                     methodOop m,
+                                     int bci) {
+  int line_number;
+  if (m->is_native()) {
+    line_number = -3;  // native frame
+  } else {
+    line_number = m->line_number_from_bci(bci);
+  }
+
+  write_header(writer, HPROF_FRAME, 4*oopSize + 2*sizeof(u4));
+  writer->write_id(frame_serial_num);               // frame serial number
+  writer->write_objectID(m->name());                // method's name
+  writer->write_objectID(m->signature());           // method's signature
+
+  assert(Klass::cast(m->method_holder())->oop_is_instance(), "not instanceKlass");
+  writer->write_objectID(instanceKlass::cast(m->method_holder())->source_file_name());  // source file name
+  writer->write_u4(class_serial_num);               // class serial number
+  writer->write_u4((u4) line_number);               // line number
+}
 
 // Support class used to generate HPROF_UTF8 records from the entries in the
 // SymbolTable.
@@ -1104,12 +1139,15 @@
  private:
   DumpWriter* _writer;
   u4 _thread_serial_num;
+  int _frame_num;
   DumpWriter* writer() const                { return _writer; }
  public:
   JNILocalsDumper(DumpWriter* writer, u4 thread_serial_num) {
     _writer = writer;
     _thread_serial_num = thread_serial_num;
+    _frame_num = -1;  // default - empty stack
   }
+  void set_frame_number(int n) { _frame_num = n; }
   void do_oop(oop* obj_p);
   void do_oop(narrowOop* obj_p) { ShouldNotReachHere(); }
 };
@@ -1122,7 +1160,7 @@
     writer()->write_u1(HPROF_GC_ROOT_JNI_LOCAL);
     writer()->write_objectID(o);
     writer()->write_u4(_thread_serial_num);
-    writer()->write_u4((u4)-1); // empty
+    writer()->write_u4((u4)_frame_num);
   }
 }
 
@@ -1269,6 +1307,9 @@
   bool _gc_before_heap_dump;
   bool _is_segmented_dump;
   jlong _dump_start;
+  GrowableArray<Klass*>* _klass_map;
+  ThreadStackTrace** _stack_traces;
+  int _num_threads;
 
   // accessors
   DumpWriter* writer() const                    { return _writer; }
@@ -1291,9 +1332,16 @@
   static void do_basic_type_array_class_dump(klassOop k);
 
   // HPROF_GC_ROOT_THREAD_OBJ records
-  void do_thread(JavaThread* thread, u4 thread_serial_num);
+  int do_thread(JavaThread* thread, u4 thread_serial_num);
   void do_threads();
 
+  void add_class_serial_number(Klass* k, int serial_num) {
+    _klass_map->at_put_grow(serial_num, k);
+  }
+
+  // HPROF_TRACE and HPROF_FRAME records
+  void dump_stack_traces();
+
   // writes a HPROF_HEAP_DUMP or HPROF_HEAP_DUMP_SEGMENT record
   void write_dump_header();
 
@@ -1313,6 +1361,18 @@
     _gc_before_heap_dump = gc_before_heap_dump;
     _is_segmented_dump = false;
     _dump_start = (jlong)-1;
+    _klass_map = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(INITIAL_CLASS_COUNT, true);
+    _stack_traces = NULL;
+    _num_threads = 0;
+  }
+  ~VM_HeapDumper() {
+    if (_stack_traces != NULL) {
+      for (int i=0; i < _num_threads; i++) {
+        delete _stack_traces[i];
+      }
+      FREE_C_HEAP_ARRAY(ThreadStackTrace*, _stack_traces);
+    }
+    delete _klass_map;
   }
 
   VMOp_Type type() const { return VMOp_HeapDumper; }
@@ -1436,6 +1496,9 @@
     Klass* klass = Klass::cast(k);
     writer->write_classID(klass);
 
+    // add the klassOop and class serial number pair
+    dumper->add_class_serial_number(klass, class_serial_num);
+
     writer->write_u4(STACK_TRACE_ID);
 
     // class name ID
@@ -1465,15 +1528,15 @@
 // Walk the stack of the given thread.
 // Dumps a HPROF_GC_ROOT_JAVA_FRAME record for each local
 // Dumps a HPROF_GC_ROOT_JNI_LOCAL record for each JNI local
-void VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) {
+//
+// It returns the number of Java frames in this thread stack
+int VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) {
   JNILocalsDumper blk(writer(), thread_serial_num);
 
   oop threadObj = java_thread->threadObj();
   assert(threadObj != NULL, "sanity check");
 
-  // JNI locals for the top frame
-  java_thread->active_handles()->oops_do(&blk);
-
+  int stack_depth = 0;
   if (java_thread->has_last_Java_frame()) {
 
     // vframes are resource allocated
@@ -1484,13 +1547,14 @@
     RegisterMap reg_map(java_thread);
     frame f = java_thread->last_frame();
     vframe* vf = vframe::new_vframe(&f, &reg_map, java_thread);
+    frame* last_entry_frame = NULL;
 
     while (vf != NULL) {
+      blk.set_frame_number(stack_depth);
       if (vf->is_java_frame()) {
 
         // java frame (interpreted, compiled, ...)
         javaVFrame *jvf = javaVFrame::cast(vf);
-
         if (!(jvf->method()->is_native())) {
           StackValueCollection* locals = jvf->locals();
           for (int slot=0; slot<locals->size(); slot++) {
@@ -1501,44 +1565,61 @@
                 writer()->write_u1(HPROF_GC_ROOT_JAVA_FRAME);
                 writer()->write_objectID(o);
                 writer()->write_u4(thread_serial_num);
-                writer()->write_u4((u4)-1); // empty
+                writer()->write_u4((u4) stack_depth);
               }
             }
           }
+        } else {
+          // native frame
+          if (stack_depth == 0) {
+            // JNI locals for the top frame.
+            java_thread->active_handles()->oops_do(&blk);
+          } else {
+            if (last_entry_frame != NULL) {
+              // JNI locals for the entry frame
+              assert(last_entry_frame->is_entry_frame(), "checking");
+              last_entry_frame->entry_frame_call_wrapper()->handles()->oops_do(&blk);
+            }
+          }
         }
-      } else {
+        // increment only for Java frames
+        stack_depth++;
+        last_entry_frame = NULL;
 
+      } else {
         // externalVFrame - if it's an entry frame then report any JNI locals
-        // as roots
+        // as roots when we find the corresponding native javaVFrame
         frame* fr = vf->frame_pointer();
         assert(fr != NULL, "sanity check");
         if (fr->is_entry_frame()) {
-          fr->entry_frame_call_wrapper()->handles()->oops_do(&blk);
+          last_entry_frame = fr;
         }
       }
-
       vf = vf->sender();
     }
+  } else {
+    // no last java frame but there may be JNI locals
+    java_thread->active_handles()->oops_do(&blk);
   }
+  return stack_depth;
 }
 
 
 // write a HPROF_GC_ROOT_THREAD_OBJ record for each java thread. Then walk
 // the stack so that locals and JNI locals are dumped.
 void VM_HeapDumper::do_threads() {
-  u4 thread_serial_num = 0;
-  for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) {
+  for (int i=0; i < _num_threads; i++) {
+    JavaThread* thread = _stack_traces[i]->thread();
     oop threadObj = thread->threadObj();
-    if (threadObj != NULL && !thread->is_exiting() && !thread->is_hidden_from_external_view()) {
-      ++thread_serial_num;
-
-      writer()->write_u1(HPROF_GC_ROOT_THREAD_OBJ);
-      writer()->write_objectID(threadObj);
-      writer()->write_u4(thread_serial_num);
-      writer()->write_u4(STACK_TRACE_ID);
-
-      do_thread(thread, thread_serial_num);
-    }
+    u4 thread_serial_num = i+1;
+    u4 stack_serial_num = thread_serial_num + STACK_TRACE_ID;
+    writer()->write_u1(HPROF_GC_ROOT_THREAD_OBJ);
+    writer()->write_objectID(threadObj);
+    writer()->write_u4(thread_serial_num);  // thread number
+    writer()->write_u4(stack_serial_num);   // stack trace serial number
+    int num_frames = do_thread(thread, thread_serial_num);
+    assert(num_frames == _stack_traces[i]->get_stack_depth(),
+           "total number of Java frames not matched");
   }
 }
 
@@ -1547,16 +1628,16 @@
 // records:
 //
 //  HPROF_HEADER
-//  HPROF_TRACE
 //  [HPROF_UTF8]*
 //  [HPROF_LOAD_CLASS]*
+//  [[HPROF_FRAME]*|HPROF_TRACE]*
 //  [HPROF_GC_CLASS_DUMP]*
 //  HPROF_HEAP_DUMP
 //
-// The HPROF_TRACE record after the header is "dummy trace" record which does
-// not include any frames. Other records which require a stack trace ID will
-// specify the trace ID of this record (1). It also means we can run HAT without
-// needing the -stack false option.
+// The HPROF_TRACE records represent the stack traces where the heap dump
+// is generated and a "dummy trace" record which does not include
+// any frames. The dummy trace record is used to be referenced as the
+// unknown object alloc site.
 //
 // The HPROF_HEAP_DUMP record has a length following by sub-records. To allow
 // the heap dump be generated in a single pass we remember the position of
@@ -1578,17 +1659,8 @@
   }
 
   // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1
-  size_t used;
+  size_t used = ch->used();
   const char* header;
-#ifndef SERIALGC
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    used = GenCollectedHeap::heap()->used();
-  } else {
-    used = ParallelScavengeHeap::heap()->used();
-  }
-#else // SERIALGC
-  used = GenCollectedHeap::heap()->used();
-#endif // SERIALGC
   if (used > (size_t)SegmentedHeapDumpThreshold) {
     set_segmented_dump();
     header = "JAVA PROFILE 1.0.2";
@@ -1601,12 +1673,6 @@
   writer()->write_u4(oopSize);
   writer()->write_u8(os::javaTimeMillis());
 
-  // HPROF_TRACE record without any frames
-  DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4));
-  writer()->write_u4(STACK_TRACE_ID);
-  writer()->write_u4(0);                    // thread number
-  writer()->write_u4(0);                    // frame count
-
   // HPROF_UTF8 records
   SymbolTableDumper sym_dumper(writer());
   SymbolTable::oops_do(&sym_dumper);
@@ -1615,6 +1681,10 @@
   SystemDictionary::classes_do(&do_load_class);
   Universe::basic_type_classes_do(&do_load_class);
 
+  // write HPROF_FRAME and HPROF_TRACE records
+  // this must be called after _klass_map is built when iterating the classes above.
+  dump_stack_traces();
+
   // write HPROF_HEAP_DUMP or HPROF_HEAP_DUMP_SEGMENT
   write_dump_header();
 
@@ -1655,6 +1725,47 @@
   end_of_dump();
 }
 
+void VM_HeapDumper::dump_stack_traces() {
+  // write a HPROF_TRACE record without any frames to be referenced as object alloc sites
+  DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4));
+  writer()->write_u4((u4) STACK_TRACE_ID);
+  writer()->write_u4(0);                    // thread number
+  writer()->write_u4(0);                    // frame count
+
+  _stack_traces = NEW_C_HEAP_ARRAY(ThreadStackTrace*, Threads::number_of_threads());
+  int frame_serial_num = 0;
+  for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) {
+    oop threadObj = thread->threadObj();
+    if (threadObj != NULL && !thread->is_exiting() && !thread->is_hidden_from_external_view()) {
+      // dump thread stack trace
+      ThreadStackTrace* stack_trace = new ThreadStackTrace(thread, false);
+      stack_trace->dump_stack_at_safepoint(-1);
+      _stack_traces[_num_threads++] = stack_trace;
+
+      // write HPROF_FRAME records for this thread's stack trace
+      int depth = stack_trace->get_stack_depth();
+      int thread_frame_start = frame_serial_num;
+      for (int j=0; j < depth; j++) {
+        StackFrameInfo* frame = stack_trace->stack_frame_at(j);
+        methodOop m = frame->method();
+        int class_serial_num = _klass_map->find(Klass::cast(m->method_holder()));
+        // the class serial number starts from 1
+        assert(class_serial_num > 0, "class not found");
+        DumperSupport::dump_stack_frame(writer(), ++frame_serial_num, class_serial_num, m, frame->bci());
+      }
+
+      // write HPROF_TRACE record for one thread
+      DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4) + depth*oopSize);
+      int stack_serial_num = _num_threads + STACK_TRACE_ID;
+      writer()->write_u4(stack_serial_num);      // stack trace serial number
+      writer()->write_u4((u4) _num_threads);     // thread serial number
+      writer()->write_u4(depth);                 // frame count
+      for (int j=1; j <= depth; j++) {
+        writer()->write_id(thread_frame_start + j);
+      }
+    }
+  }
+}
 
 // dump the heap to given path.
 int HeapDumper::dump(const char* path) {
--- a/hotspot/src/share/vm/services/management.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/services/management.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -886,7 +886,7 @@
   int count = 0;
   for (int i = 0; i < nFlags; i++) {
     Flag* flag = &Flag::flags[i];
-    // Exclude the diagnostic flags
+    // Exclude the locked (diagnostic, experimental) flags
     if (flag->is_unlocked() || flag->is_unlocker()) {
       count++;
     }
@@ -1487,7 +1487,7 @@
   int num_entries = 0;
   for (int i = 0; i < nFlags; i++) {
     Flag* flag = &Flag::flags[i];
-    // Exclude the diagnostic flags
+    // Exclude the locked (experimental, diagnostic) flags
     if (flag->is_unlocked() || flag->is_unlocker()) {
       Handle s = java_lang_String::create_from_str(flag->name, CHECK_0);
       flags_ah->obj_at_put(num_entries, s());
@@ -1616,7 +1616,7 @@
     int num_entries = 0;
     for (int i = 0; i < nFlags && num_entries < count;  i++) {
       Flag* flag = &Flag::flags[i];
-      // Exclude the diagnostic flags
+      // Exclude the locked (diagnostic, experimental) flags
       if (flag->is_unlocked() || flag->is_unlocker()) {
         add_global_entry(env, null_h, &globals[num_entries], flag, THREAD);
         num_entries++;
--- a/hotspot/src/share/vm/services/memoryService.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/services/memoryService.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -59,9 +59,13 @@
       add_parallel_scavenge_heap_info(ParallelScavengeHeap::heap());
       break;
     }
+    case CollectedHeap::G1CollectedHeap : {
+      G1CollectedHeap::g1_unimplemented();
+      return;
+    }
 #endif // SERIALGC
     default: {
-      guarantee(false, "Not recognized kind of heap");
+      guarantee(false, "Unrecognized kind of heap");
     }
   }
 
--- a/hotspot/src/share/vm/services/threadService.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/services/threadService.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -242,6 +242,7 @@
   ThreadStackTrace(JavaThread* thread, bool with_locked_monitors);
   ~ThreadStackTrace();
 
+  JavaThread*     thread()              { return _thread; }
   StackFrameInfo* stack_frame_at(int i) { return _frames->at(i); }
   int             get_stack_depth()     { return _depth; }
 
--- a/hotspot/src/share/vm/utilities/bitMap.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/bitMap.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -26,54 +26,59 @@
 # include "incls/_bitMap.cpp.incl"
 
 
-BitMap::BitMap(idx_t* map, idx_t size_in_bits) {
+BitMap::BitMap(bm_word_t* map, idx_t size_in_bits) :
+  _map(map), _size(size_in_bits)
+{
+  assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption.");
   assert(size_in_bits >= 0, "just checking");
-  _map = map;
-  _size = size_in_bits;
 }
 
 
-BitMap::BitMap(idx_t size_in_bits) {
-  assert(size_in_bits >= 0, "just checking");
-  _size = size_in_bits;
-  _map = NEW_RESOURCE_ARRAY(idx_t, size_in_words());
+BitMap::BitMap(idx_t size_in_bits, bool in_resource_area) :
+  _map(NULL), _size(0)
+{
+  assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption.");
+  resize(size_in_bits, in_resource_area);
 }
 
 
-void BitMap::resize(idx_t size_in_bits) {
+void BitMap::verify_index(idx_t index) const {
+    assert(index < _size, "BitMap index out of bounds");
+}
+
+void BitMap::verify_range(idx_t beg_index, idx_t end_index) const {
+#ifdef ASSERT
+    assert(beg_index <= end_index, "BitMap range error");
+    // Note that [0,0) and [size,size) are both valid ranges.
+    if (end_index != _size)  verify_index(end_index);
+#endif
+}
+
+void BitMap::resize(idx_t size_in_bits, bool in_resource_area) {
   assert(size_in_bits >= 0, "just checking");
-  size_t old_size_in_words = size_in_words();
-  uintptr_t* old_map = map();
+  idx_t old_size_in_words = size_in_words();
+  bm_word_t* old_map = map();
+
   _size = size_in_bits;
-  size_t new_size_in_words = size_in_words();
-  _map = NEW_RESOURCE_ARRAY(idx_t, new_size_in_words);
-  Copy::disjoint_words((HeapWord*) old_map, (HeapWord*) _map, MIN2(old_size_in_words, new_size_in_words));
+  idx_t new_size_in_words = size_in_words();
+  if (in_resource_area) {
+    _map = NEW_RESOURCE_ARRAY(bm_word_t, new_size_in_words);
+  } else {
+    if (old_map != NULL) FREE_C_HEAP_ARRAY(bm_word_t, _map);
+    _map = NEW_C_HEAP_ARRAY(bm_word_t, new_size_in_words);
+  }
+  Copy::disjoint_words((HeapWord*)old_map, (HeapWord*) _map,
+                       MIN2(old_size_in_words, new_size_in_words));
   if (new_size_in_words > old_size_in_words) {
     clear_range_of_words(old_size_in_words, size_in_words());
   }
 }
 
-// Returns a bit mask for a range of bits [beg, end) within a single word.  Each
-// bit in the mask is 0 if the bit is in the range, 1 if not in the range.  The
-// returned mask can be used directly to clear the range, or inverted to set the
-// range.  Note:  end must not be 0.
-inline BitMap::idx_t
-BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const {
-  assert(end != 0, "does not work when end == 0");
-  assert(beg == end || word_index(beg) == word_index(end - 1),
-         "must be a single-word range");
-  idx_t mask = bit_mask(beg) - 1;       // low (right) bits
-  if (bit_in_word(end) != 0) {
-    mask |= ~(bit_mask(end) - 1);       // high (left) bits
-  }
-  return mask;
-}
-
 void BitMap::set_range_within_word(idx_t beg, idx_t end) {
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
   if (beg != end) {
-    idx_t mask = inverted_bit_mask_for_range(beg, end);
+    bm_word_t mask = inverted_bit_mask_for_range(beg, end);
     *word_addr(beg) |= ~mask;
   }
 }
@@ -82,7 +87,7 @@
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
   if (beg != end) {
-    idx_t mask = inverted_bit_mask_for_range(beg, end);
+    bm_word_t mask = inverted_bit_mask_for_range(beg, end);
     *word_addr(beg) &= mask;
   }
 }
@@ -105,20 +110,6 @@
   }
 }
 
-inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) {
-  memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t));
-}
-
-inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) {
-  memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t));
-}
-
-inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const {
-  idx_t bit_rounded_up = bit + (BitsPerWord - 1);
-  // Check for integer arithmetic overflow.
-  return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words();
-}
-
 void BitMap::set_range(idx_t beg, idx_t end) {
   verify_range(beg, end);
 
@@ -187,6 +178,64 @@
   clear_range_within_word(bit_index(end_full_word), end);
 }
 
+void BitMap::mostly_disjoint_range_union(BitMap* from_bitmap,
+                                         idx_t   from_start_index,
+                                         idx_t   to_start_index,
+                                         size_t  word_num) {
+  // Ensure that the parameters are correct.
+  // These shouldn't be that expensive to check, hence I left them as
+  // guarantees.
+  guarantee(from_bitmap->bit_in_word(from_start_index) == 0,
+            "it should be aligned on a word boundary");
+  guarantee(bit_in_word(to_start_index) == 0,
+            "it should be aligned on a word boundary");
+  guarantee(word_num >= 2, "word_num should be at least 2");
+
+  intptr_t* from = (intptr_t*) from_bitmap->word_addr(from_start_index);
+  intptr_t* to   = (intptr_t*) word_addr(to_start_index);
+
+  if (*from != 0) {
+    // if it's 0, then there's no point in doing the CAS
+    while (true) {
+      intptr_t old_value = *to;
+      intptr_t new_value = old_value | *from;
+      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
+      if (res == old_value) break;
+    }
+  }
+  ++from;
+  ++to;
+
+  for (size_t i = 0; i < word_num - 2; ++i) {
+    if (*from != 0) {
+      // if it's 0, then there's no point in doing the CAS
+      assert(*to == 0, "nobody else should be writing here");
+      intptr_t new_value = *from;
+      *to = new_value;
+    }
+
+    ++from;
+    ++to;
+  }
+
+  if (*from != 0) {
+    // if it's 0, then there's no point in doing the CAS
+    while (true) {
+      intptr_t old_value = *to;
+      intptr_t new_value = old_value | *from;
+      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
+      if (res == old_value) break;
+    }
+  }
+
+  // the -1 is because we didn't advance them after the final CAS
+  assert(from ==
+           (intptr_t*) from_bitmap->word_addr(from_start_index) + word_num - 1,
+            "invariant");
+  assert(to == (intptr_t*) word_addr(to_start_index) + word_num - 1,
+            "invariant");
+}
+
 void BitMap::at_put(idx_t offset, bool value) {
   if (value) {
     set_bit(offset);
@@ -282,11 +331,11 @@
 
 bool BitMap::contains(const BitMap other) const {
   assert(size() == other.size(), "must have same size");
-  uintptr_t* dest_map = map();
-  uintptr_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
-    uintptr_t word_union = dest_map[index] | other_map[index];
+    bm_word_t word_union = dest_map[index] | other_map[index];
     // If this has more bits set than dest_map[index], then other is not a
     // subset.
     if (word_union != dest_map[index]) return false;
@@ -296,8 +345,8 @@
 
 bool BitMap::intersects(const BitMap other) const {
   assert(size() == other.size(), "must have same size");
-  uintptr_t* dest_map = map();
-  uintptr_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     if ((dest_map[index] & other_map[index]) != 0) return true;
@@ -308,8 +357,8 @@
 
 void BitMap::set_union(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     dest_map[index] = dest_map[index] | other_map[index];
@@ -319,8 +368,8 @@
 
 void BitMap::set_difference(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     dest_map[index] = dest_map[index] & ~(other_map[index]);
@@ -330,8 +379,8 @@
 
 void BitMap::set_intersection(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     dest_map[index]  = dest_map[index] & other_map[index];
@@ -339,11 +388,26 @@
 }
 
 
+void BitMap::set_intersection_at_offset(BitMap other, idx_t offset) {
+  assert(other.size() >= offset, "offset not in range");
+  assert(other.size() - offset >= size(), "other not large enough");
+  // XXX Ideally, we would remove this restriction.
+  guarantee((offset % (sizeof(bm_word_t) * BitsPerByte)) == 0,
+            "Only handle aligned cases so far.");
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
+  idx_t offset_word_ind = word_index(offset);
+  idx_t size = size_in_words();
+  for (idx_t index = 0; index < size; index++) {
+    dest_map[index] = dest_map[index] & other_map[offset_word_ind + index];
+  }
+}
+
 bool BitMap::set_union_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     idx_t temp = map(index) | other_map[index];
@@ -357,11 +421,11 @@
 bool BitMap::set_difference_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
-    idx_t temp = dest_map[index] & ~(other_map[index]);
+    bm_word_t temp = dest_map[index] & ~(other_map[index]);
     changed = changed || (temp != dest_map[index]);
     dest_map[index] = temp;
   }
@@ -372,12 +436,12 @@
 bool BitMap::set_intersection_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
-    idx_t orig = dest_map[index];
-    idx_t temp = orig & other_map[index];
+    bm_word_t orig = dest_map[index];
+    bm_word_t temp = orig & other_map[index];
     changed = changed || (temp != orig);
     dest_map[index]  = temp;
   }
@@ -387,8 +451,8 @@
 
 void BitMap::set_from(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     dest_map[index] = other_map[index];
@@ -398,8 +462,8 @@
 
 bool BitMap::is_same(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     if (dest_map[index] != other_map[index]) return false;
@@ -408,24 +472,24 @@
 }
 
 bool BitMap::is_full() const {
-  uintptr_t* word = map();
+  bm_word_t* word = map();
   idx_t rest = size();
   for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) {
-    if (*word != (uintptr_t) AllBits) return false;
+    if (*word != (bm_word_t) AllBits) return false;
     word++;
   }
-  return rest == 0 || (*word | ~right_n_bits((int)rest)) == (uintptr_t) AllBits;
+  return rest == 0 || (*word | ~right_n_bits((int)rest)) == (bm_word_t) AllBits;
 }
 
 
 bool BitMap::is_empty() const {
-  uintptr_t* word = map();
+  bm_word_t* word = map();
   idx_t rest = size();
   for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) {
-    if (*word != (uintptr_t) NoBits) return false;
+    if (*word != (bm_word_t) NoBits) return false;
     word++;
   }
-  return rest == 0 || (*word & right_n_bits((int)rest)) == (uintptr_t) NoBits;
+  return rest == 0 || (*word & right_n_bits((int)rest)) == (bm_word_t) NoBits;
 }
 
 void BitMap::clear_large() {
@@ -436,7 +500,7 @@
 // then modifications in and to the left of the _bit_ being
 // currently sampled will not be seen. Note also that the
 // interval [leftOffset, rightOffset) is right open.
-void BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
+bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
   verify_range(leftOffset, rightOffset);
 
   idx_t startIndex = word_index(leftOffset);
@@ -445,106 +509,71 @@
        offset < rightOffset && index < endIndex;
        offset = (++index) << LogBitsPerWord) {
     idx_t rest = map(index) >> (offset & (BitsPerWord - 1));
-    for (; offset < rightOffset && rest != (uintptr_t)NoBits; offset++) {
+    for (; offset < rightOffset && rest != (bm_word_t)NoBits; offset++) {
       if (rest & 1) {
-        blk->do_bit(offset);
+        if (!blk->do_bit(offset)) return false;
         //  resample at each closure application
         // (see, for instance, CMS bug 4525989)
         rest = map(index) >> (offset & (BitsPerWord -1));
-        // XXX debugging: remove
-        // The following assertion assumes that closure application
-        // doesn't clear bits (may not be true in general, e.g. G1).
-        assert(rest & 1,
-               "incorrect shift or closure application can clear bits?");
       }
       rest = rest >> 1;
     }
   }
+  return true;
+}
+
+BitMap::idx_t* BitMap::_pop_count_table = NULL;
+
+void BitMap::init_pop_count_table() {
+  if (_pop_count_table == NULL) {
+    BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256);
+    for (uint i = 0; i < 256; i++) {
+      table[i] = num_set_bits(i);
+    }
+
+    intptr_t res = Atomic::cmpxchg_ptr((intptr_t)  table,
+                                       (intptr_t*) &_pop_count_table,
+                                       (intptr_t)  NULL_WORD);
+    if (res != NULL_WORD) {
+      guarantee( _pop_count_table == (void*) res, "invariant" );
+      FREE_C_HEAP_ARRAY(bm_word_t, table);
+    }
+  }
 }
 
-BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset,
-                                          idx_t r_offset) const {
-  assert(l_offset <= size(), "BitMap index out of bounds");
-  assert(r_offset <= size(), "BitMap index out of bounds");
-  assert(l_offset <= r_offset, "l_offset > r_offset ?");
-
-  if (l_offset == r_offset) {
-    return l_offset;
-  }
-  idx_t   index = word_index(l_offset);
-  idx_t r_index = word_index(r_offset-1) + 1;
-  idx_t res_offset = l_offset;
+BitMap::idx_t BitMap::num_set_bits(bm_word_t w) {
+  idx_t bits = 0;
 
-  // check bits including and to the _left_ of offset's position
-  idx_t pos = bit_in_word(res_offset);
-  idx_t res = map(index) >> pos;
-  if (res != (uintptr_t)NoBits) {
-    // find the position of the 1-bit
-    for (; !(res & 1); res_offset++) {
-      res = res >> 1;
+  while (w != 0) {
+    while ((w & 1) == 0) {
+      w >>= 1;
     }
-    assert(res_offset >= l_offset, "just checking");
-    return MIN2(res_offset, r_offset);
+    bits++;
+    w >>= 1;
   }
-  // skip over all word length 0-bit runs
-  for (index++; index < r_index; index++) {
-    res = map(index);
-    if (res != (uintptr_t)NoBits) {
-      // found a 1, return the offset
-      for (res_offset = index << LogBitsPerWord; !(res & 1);
-           res_offset++) {
-        res = res >> 1;
-      }
-      assert(res & 1, "tautology; see loop condition");
-      assert(res_offset >= l_offset, "just checking");
-      return MIN2(res_offset, r_offset);
-    }
-  }
-  return r_offset;
+  return bits;
 }
 
-BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset,
-                                           idx_t r_offset) const {
-  assert(l_offset <= size(), "BitMap index out of bounds");
-  assert(r_offset <= size(), "BitMap index out of bounds");
-  assert(l_offset <= r_offset, "l_offset > r_offset ?");
-
-  if (l_offset == r_offset) {
-    return l_offset;
-  }
-  idx_t   index = word_index(l_offset);
-  idx_t r_index = word_index(r_offset-1) + 1;
-  idx_t res_offset = l_offset;
-
-  // check bits including and to the _left_ of offset's position
-  idx_t pos = res_offset & (BitsPerWord - 1);
-  idx_t res = (map(index) >> pos) | left_n_bits((int)pos);
+BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) {
+  assert(_pop_count_table != NULL, "precondition");
+  return _pop_count_table[c];
+}
 
-  if (res != (uintptr_t)AllBits) {
-    // find the position of the 0-bit
-    for (; res & 1; res_offset++) {
-      res = res >> 1;
-    }
-    assert(res_offset >= l_offset, "just checking");
-    return MIN2(res_offset, r_offset);
-  }
-  // skip over all word length 1-bit runs
-  for (index++; index < r_index; index++) {
-    res = map(index);
-    if (res != (uintptr_t)AllBits) {
-      // found a 0, return the offset
-      for (res_offset = index << LogBitsPerWord; res & 1;
-           res_offset++) {
-        res = res >> 1;
-      }
-      assert(!(res & 1), "tautology; see loop condition");
-      assert(res_offset >= l_offset, "just checking");
-      return MIN2(res_offset, r_offset);
+BitMap::idx_t BitMap::count_one_bits() const {
+  init_pop_count_table(); // If necessary.
+  idx_t sum = 0;
+  typedef unsigned char uchar;
+  for (idx_t i = 0; i < size_in_words(); i++) {
+    bm_word_t w = map()[i];
+    for (size_t j = 0; j < sizeof(bm_word_t); j++) {
+      sum += num_set_bits_from_table(uchar(w & 255));
+      w >>= 8;
     }
   }
-  return r_offset;
+  return sum;
 }
 
+
 #ifndef PRODUCT
 
 void BitMap::print_on(outputStream* st) const {
@@ -558,7 +587,7 @@
 #endif
 
 
-BitMap2D::BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot)
+BitMap2D::BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot)
   : _bits_per_slot(bits_per_slot)
   , _map(map, size_in_slots * bits_per_slot)
 {
--- a/hotspot/src/share/vm/utilities/bitMap.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/bitMap.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -22,25 +22,19 @@
  *
  */
 
-// Closure for iterating over BitMaps
+// Forward decl;
+class BitMapClosure;
 
-class BitMapClosure VALUE_OBJ_CLASS_SPEC {
- public:
-  // Callback when bit in map is set
-  virtual void do_bit(size_t offset) = 0;
-};
-
-
-// Operations for bitmaps represented as arrays of unsigned 32- or 64-bit
-// integers (uintptr_t).
-//
-// Bit offsets are numbered from 0 to size-1
+// Operations for bitmaps represented as arrays of unsigned integers.
+// Bit offsets are numbered from 0 to size-1.
 
 class BitMap VALUE_OBJ_CLASS_SPEC {
   friend class BitMap2D;
 
  public:
   typedef size_t idx_t;         // Type used for bit and word indices.
+  typedef uintptr_t bm_word_t;  // Element type of array that represents
+                                // the bitmap.
 
   // Hints for range sizes.
   typedef enum {
@@ -48,8 +42,8 @@
   } RangeSizeHint;
 
  private:
-  idx_t* _map;     // First word in bitmap
-  idx_t  _size;    // Size of bitmap (in bits)
+  bm_word_t* _map;     // First word in bitmap
+  idx_t      _size;    // Size of bitmap (in bits)
 
   // Puts the given value at the given offset, using resize() to size
   // the bitmap appropriately if needed using factor-of-two expansion.
@@ -62,7 +56,7 @@
 
   // Return a mask that will select the specified bit, when applied to the word
   // containing the bit.
-  static idx_t bit_mask(idx_t bit)    { return (idx_t)1 << bit_in_word(bit); }
+  static bm_word_t bit_mask(idx_t bit) { return (bm_word_t)1 << bit_in_word(bit); }
 
   // Return the index of the word containing the specified bit.
   static idx_t word_index(idx_t bit)  { return bit >> LogBitsPerWord; }
@@ -71,66 +65,68 @@
   static idx_t bit_index(idx_t word)  { return word << LogBitsPerWord; }
 
   // Return the array of bitmap words, or a specific word from it.
-  idx_t* map() const           { return _map; }
-  idx_t  map(idx_t word) const { return _map[word]; }
+  bm_word_t* map() const           { return _map; }
+  bm_word_t  map(idx_t word) const { return _map[word]; }
 
   // Return a pointer to the word containing the specified bit.
-  idx_t* word_addr(idx_t bit) const { return map() + word_index(bit); }
+  bm_word_t* word_addr(idx_t bit) const { return map() + word_index(bit); }
 
   // Set a word to a specified value or to all ones; clear a word.
-  void set_word  (idx_t word, idx_t val) { _map[word] = val; }
+  void set_word  (idx_t word, bm_word_t val) { _map[word] = val; }
   void set_word  (idx_t word)            { set_word(word, ~(uintptr_t)0); }
   void clear_word(idx_t word)            { _map[word] = 0; }
 
   // Utilities for ranges of bits.  Ranges are half-open [beg, end).
 
   // Ranges within a single word.
-  inline idx_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const;
-  inline void  set_range_within_word      (idx_t beg, idx_t end);
-  inline void  clear_range_within_word    (idx_t beg, idx_t end);
-  inline void  par_put_range_within_word  (idx_t beg, idx_t end, bool value);
+  bm_word_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const;
+  void  set_range_within_word      (idx_t beg, idx_t end);
+  void  clear_range_within_word    (idx_t beg, idx_t end);
+  void  par_put_range_within_word  (idx_t beg, idx_t end, bool value);
 
   // Ranges spanning entire words.
-  inline void      set_range_of_words         (idx_t beg, idx_t end);
-  inline void      clear_range_of_words       (idx_t beg, idx_t end);
-  inline void      set_large_range_of_words   (idx_t beg, idx_t end);
-  inline void      clear_large_range_of_words (idx_t beg, idx_t end);
+  void      set_range_of_words         (idx_t beg, idx_t end);
+  void      clear_range_of_words       (idx_t beg, idx_t end);
+  void      set_large_range_of_words   (idx_t beg, idx_t end);
+  void      clear_large_range_of_words (idx_t beg, idx_t end);
 
   // The index of the first full word in a range.
-  inline idx_t word_index_round_up(idx_t bit) const;
+  idx_t word_index_round_up(idx_t bit) const;
 
   // Verification, statistics.
-  void verify_index(idx_t index) const {
-    assert(index < _size, "BitMap index out of bounds");
-  }
+  void verify_index(idx_t index) const;
+  void verify_range(idx_t beg_index, idx_t end_index) const;
 
-  void verify_range(idx_t beg_index, idx_t end_index) const {
-#ifdef ASSERT
-    assert(beg_index <= end_index, "BitMap range error");
-    // Note that [0,0) and [size,size) are both valid ranges.
-    if (end_index != _size)  verify_index(end_index);
-#endif
-  }
+  static idx_t* _pop_count_table;
+  static void init_pop_count_table();
+  static idx_t num_set_bits(bm_word_t w);
+  static idx_t num_set_bits_from_table(unsigned char c);
 
  public:
 
   // Constructs a bitmap with no map, and size 0.
   BitMap() : _map(NULL), _size(0) {}
 
-  // Construction
-  BitMap(idx_t* map, idx_t size_in_bits);
+  // Constructs a bitmap with the given map and size.
+  BitMap(bm_word_t* map, idx_t size_in_bits);
 
-  // Allocates necessary data structure in resource area
-  BitMap(idx_t size_in_bits);
+  // Constructs an empty bitmap of the given size (that is, this clears the
+  // new bitmap).  Allocates the map array in resource area if
+  // "in_resource_area" is true, else in the C heap.
+  BitMap(idx_t size_in_bits, bool in_resource_area = true);
 
-  void set_map(idx_t* map)          { _map = map; }
+  // Set the map and size.
+  void set_map(bm_word_t* map)      { _map = map; }
   void set_size(idx_t size_in_bits) { _size = size_in_bits; }
 
-  // Allocates necessary data structure in resource area.
+  // Allocates necessary data structure, either in the resource area
+  // or in the C heap, as indicated by "in_resource_area."
   // Preserves state currently in bit map by copying data.
   // Zeros any newly-addressable bits.
-  // Does not perform any frees (i.e., of current _map).
-  void resize(idx_t size_in_bits);
+  // If "in_resource_area" is false, frees the current map.
+  // (Note that this assumes that all calls to "resize" on the same BitMap
+  // use the same value for "in_resource_area".)
+  void resize(idx_t size_in_bits, bool in_resource_area = true);
 
   // Accessing
   idx_t size() const                    { return _size; }
@@ -157,11 +153,11 @@
 
   // Set or clear the specified bit.
   inline void set_bit(idx_t bit);
-  inline void clear_bit(idx_t bit);
+  void clear_bit(idx_t bit);
 
   // Atomically set or clear the specified bit.
-  inline bool par_set_bit(idx_t bit);
-  inline bool par_clear_bit(idx_t bit);
+  bool par_set_bit(idx_t bit);
+  bool par_clear_bit(idx_t bit);
 
   // Put the given value at the given offset. The parallel version
   // will CAS the value into the bitmap and is quite a bit slower.
@@ -183,23 +179,61 @@
   // Update a range of bits, using a hint about the size.  Currently only
   // inlines the predominant case of a 1-bit range.  Works best when hint is a
   // compile-time constant.
-  inline void set_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void clear_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void par_clear_range  (idx_t beg, idx_t end, RangeSizeHint hint);
+  void set_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void clear_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void par_clear_range  (idx_t beg, idx_t end, RangeSizeHint hint);
+
+  // It performs the union operation between subsets of equal length
+  // of two bitmaps (the target bitmap of the method and the
+  // from_bitmap) and stores the result to the target bitmap.  The
+  // from_start_index represents the first bit index of the subrange
+  // of the from_bitmap.  The to_start_index is the equivalent of the
+  // target bitmap. Both indexes should be word-aligned, i.e. they
+  // should correspond to the first bit on a bitmap word (it's up to
+  // the caller to ensure this; the method does check it).  The length
+  // of the subset is specified with word_num and it is in number of
+  // bitmap words. The caller should ensure that this is at least 2
+  // (smaller ranges are not support to save extra checks).  Again,
+  // this is checked in the method.
+  //
+  // Atomicity concerns: it is assumed that any contention on the
+  // target bitmap with other threads will happen on the first and
+  // last words; the ones in between will be "owned" exclusively by
+  // the calling thread and, in fact, they will already be 0. So, the
+  // method performs a CAS on the first word, copies the next
+  // word_num-2 words, and finally performs a CAS on the last word.
+  void mostly_disjoint_range_union(BitMap* from_bitmap,
+                                   idx_t   from_start_index,
+                                   idx_t   to_start_index,
+                                   size_t  word_num);
+
 
   // Clearing
-  void clear();
   void clear_large();
+  inline void clear();
 
-  // Iteration support
-  void iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex);
-  inline void iterate(BitMapClosure* blk) {
+  // Iteration support.  Returns "true" if the iteration completed, false
+  // if the iteration terminated early (because the closure "blk" returned
+  // false).
+  bool iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex);
+  bool iterate(BitMapClosure* blk) {
     // call the version that takes an interval
-    iterate(blk, 0, size());
+    return iterate(blk, 0, size());
   }
 
-  // Looking for 1's and 0's to the "right"
+  // Looking for 1's and 0's at indices equal to or greater than "l_index",
+  // stopping if none has been found before "r_index", and returning
+  // "r_index" (which must be at most "size") in that case.
+  idx_t get_next_one_offset_inline (idx_t l_index, idx_t r_index) const;
+  idx_t get_next_zero_offset_inline(idx_t l_index, idx_t r_index) const;
+
+  // Like "get_next_one_offset_inline", except requires that "r_index" is
+  // aligned to bitsizeof(bm_word_t).
+  idx_t get_next_one_offset_inline_aligned_right(idx_t l_index,
+                                                        idx_t r_index) const;
+
+  // Non-inline versionsof the above.
   idx_t get_next_one_offset (idx_t l_index, idx_t r_index) const;
   idx_t get_next_zero_offset(idx_t l_index, idx_t r_index) const;
 
@@ -210,12 +244,8 @@
     return get_next_zero_offset(offset, size());
   }
 
-
-
-  // Find the next one bit in the range [beg_bit, end_bit), or return end_bit if
-  // no one bit is found.  Equivalent to get_next_one_offset(), but inline for
-  // use in performance-critical code.
-  inline idx_t find_next_one_bit(idx_t beg_bit, idx_t end_bit) const;
+  // Returns the number of bits set in the bitmap.
+  idx_t count_one_bits() const;
 
   // Set operations.
   void set_union(BitMap bits);
@@ -232,6 +262,15 @@
   bool set_difference_with_result(BitMap bits);
   bool set_intersection_with_result(BitMap bits);
 
+  // Requires the submap of "bits" starting at offset to be at least as
+  // large as "this".  Modifies "this" to be the intersection of its
+  // current contents and the submap of "bits" starting at "offset" of the
+  // same length as "this."
+  // (For expedience, currently requires the offset to be aligned to the
+  // bitsize of a uintptr_t.  This should go away in the future though it
+  // will probably remain a good case to optimize.)
+  void set_intersection_at_offset(BitMap bits, idx_t offset);
+
   void set_from(BitMap bits);
 
   bool is_same(BitMap bits);
@@ -248,58 +287,13 @@
 #endif
 };
 
-inline void BitMap::set_bit(idx_t bit) {
-  verify_index(bit);
-  *word_addr(bit) |= bit_mask(bit);
-}
-
-inline void BitMap::clear_bit(idx_t bit) {
-  verify_index(bit);
-  *word_addr(bit) &= ~bit_mask(bit);
-}
-
-inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    set_bit(beg);
-  } else {
-    if (hint == large_range) {
-      set_large_range(beg, end);
-    } else {
-      set_range(beg, end);
-    }
-  }
-}
-
-inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    clear_bit(beg);
-  } else {
-    if (hint == large_range) {
-      clear_large_range(beg, end);
-    } else {
-      clear_range(beg, end);
-    }
-  }
-}
-
-inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    par_at_put(beg, true);
-  } else {
-    if (hint == large_range) {
-      par_at_put_large_range(beg, end, true);
-    } else {
-      par_at_put_range(beg, end, true);
-    }
-  }
-}
-
 
 // Convenience class wrapping BitMap which provides multiple bits per slot.
 class BitMap2D VALUE_OBJ_CLASS_SPEC {
  public:
-  typedef size_t idx_t;         // Type used for bit and word indices.
-
+  typedef BitMap::idx_t idx_t;          // Type used for bit and word indices.
+  typedef BitMap::bm_word_t bm_word_t;  // Element type of array that
+                                        // represents the bitmap.
  private:
   BitMap _map;
   idx_t  _bits_per_slot;
@@ -314,7 +308,7 @@
 
  public:
   // Construction. bits_per_slot must be greater than 0.
-  BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot);
+  BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot);
 
   // Allocates necessary data structure in resource area. bits_per_slot must be greater than 0.
   BitMap2D(idx_t size_in_slots, idx_t bits_per_slot);
@@ -359,38 +353,14 @@
     _map.at_put_grow(bit_index(slot_index, bit_within_slot_index), value);
   }
 
-  void clear() {
-    _map.clear();
-  }
+  void clear();
 };
 
-
-
-inline void BitMap::set_range_of_words(idx_t beg, idx_t end) {
-  uintptr_t* map = _map;
-  for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0;
-}
-
-
-inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) {
-  uintptr_t* map = _map;
-  for (idx_t i = beg; i < end; ++i) map[i] = 0;
-}
-
+// Closure for iterating over BitMaps
 
-inline void BitMap::clear() {
-  clear_range_of_words(0, size_in_words());
-}
-
-
-inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    par_at_put(beg, false);
-  } else {
-    if (hint == large_range) {
-      par_at_put_large_range(beg, end, false);
-    } else {
-      par_at_put_range(beg, end, false);
-    }
-  }
-}
+class BitMapClosure VALUE_OBJ_CLASS_SPEC {
+ public:
+  // Callback when bit in map is set.  Should normally return "true";
+  // return of false indicates that the bitmap iteration should terminate.
+  virtual bool do_bit(BitMap::idx_t offset) = 0;
+};
--- a/hotspot/src/share/vm/utilities/bitMap.inline.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/bitMap.inline.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -22,6 +22,17 @@
  *
  */
 
+
+inline void BitMap::set_bit(idx_t bit) {
+  verify_index(bit);
+  *word_addr(bit) |= bit_mask(bit);
+}
+
+inline void BitMap::clear_bit(idx_t bit) {
+  verify_index(bit);
+  *word_addr(bit) &= ~bit_mask(bit);
+}
+
 inline bool BitMap::par_set_bit(idx_t bit) {
   verify_index(bit);
   volatile idx_t* const addr = word_addr(bit);
@@ -64,42 +75,236 @@
   } while (true);
 }
 
-inline BitMap::idx_t
-BitMap::find_next_one_bit(idx_t beg_bit, idx_t end_bit) const
-{
-  verify_range(beg_bit, end_bit);
-  assert(bit_in_word(end_bit) == 0, "end_bit not word-aligned");
+inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    set_bit(beg);
+  } else {
+    if (hint == large_range) {
+      set_large_range(beg, end);
+    } else {
+      set_range(beg, end);
+    }
+  }
+}
+
+inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    clear_bit(beg);
+  } else {
+    if (hint == large_range) {
+      clear_large_range(beg, end);
+    } else {
+      clear_range(beg, end);
+    }
+  }
+}
+
+inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    par_at_put(beg, true);
+  } else {
+    if (hint == large_range) {
+      par_at_put_large_range(beg, end, true);
+    } else {
+      par_at_put_range(beg, end, true);
+    }
+  }
+}
 
-  if (beg_bit == end_bit) {
-    return beg_bit;
-  }
+inline void BitMap::set_range_of_words(idx_t beg, idx_t end) {
+  bm_word_t* map = _map;
+  for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0;
+}
+
+
+inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) {
+  bm_word_t* map = _map;
+  for (idx_t i = beg; i < end; ++i) map[i] = 0;
+}
+
+
+inline void BitMap::clear() {
+  clear_range_of_words(0, size_in_words());
+}
+
 
-  idx_t   index = word_index(beg_bit);
-  idx_t r_index = word_index(end_bit);
-  idx_t res_bit = beg_bit;
+inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    par_at_put(beg, false);
+  } else {
+    if (hint == large_range) {
+      par_at_put_large_range(beg, end, false);
+    } else {
+      par_at_put_range(beg, end, false);
+    }
+  }
+}
+
+inline BitMap::idx_t
+BitMap::get_next_one_offset_inline(idx_t l_offset, idx_t r_offset) const {
+  assert(l_offset <= size(), "BitMap index out of bounds");
+  assert(r_offset <= size(), "BitMap index out of bounds");
+  assert(l_offset <= r_offset, "l_offset > r_offset ?");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset-1) + 1;
+  idx_t res_offset = l_offset;
 
   // check bits including and to the _left_ of offset's position
-  idx_t res = map(index) >> bit_in_word(res_bit);
-  if (res != (uintptr_t) NoBits) {
+  idx_t pos = bit_in_word(res_offset);
+  idx_t res = map(index) >> pos;
+  if (res != (uintptr_t)NoBits) {
     // find the position of the 1-bit
-    for (; !(res & 1); res_bit++) {
+    for (; !(res & 1); res_offset++) {
       res = res >> 1;
     }
-    assert(res_bit >= beg_bit && res_bit < end_bit, "just checking");
-    return res_bit;
+    assert(res_offset >= l_offset &&
+           res_offset < r_offset, "just checking");
+    return MIN2(res_offset, r_offset);
   }
   // skip over all word length 0-bit runs
   for (index++; index < r_index; index++) {
     res = map(index);
-    if (res != (uintptr_t) NoBits) {
+    if (res != (uintptr_t)NoBits) {
       // found a 1, return the offset
-      for (res_bit = bit_index(index); !(res & 1); res_bit++) {
+      for (res_offset = bit_index(index); !(res & 1); res_offset++) {
         res = res >> 1;
       }
       assert(res & 1, "tautology; see loop condition");
-      assert(res_bit >= beg_bit && res_bit < end_bit, "just checking");
-      return res_bit;
+      assert(res_offset >= l_offset, "just checking");
+      return MIN2(res_offset, r_offset);
+    }
+  }
+  return r_offset;
+}
+
+inline BitMap::idx_t
+BitMap::get_next_zero_offset_inline(idx_t l_offset, idx_t r_offset) const {
+  assert(l_offset <= size(), "BitMap index out of bounds");
+  assert(r_offset <= size(), "BitMap index out of bounds");
+  assert(l_offset <= r_offset, "l_offset > r_offset ?");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset-1) + 1;
+  idx_t res_offset = l_offset;
+
+  // check bits including and to the _left_ of offset's position
+  idx_t pos = res_offset & (BitsPerWord - 1);
+  idx_t res = (map(index) >> pos) | left_n_bits((int)pos);
+
+  if (res != (uintptr_t)AllBits) {
+    // find the position of the 0-bit
+    for (; res & 1; res_offset++) {
+      res = res >> 1;
+    }
+    assert(res_offset >= l_offset, "just checking");
+    return MIN2(res_offset, r_offset);
+  }
+  // skip over all word length 1-bit runs
+  for (index++; index < r_index; index++) {
+    res = map(index);
+    if (res != (uintptr_t)AllBits) {
+      // found a 0, return the offset
+      for (res_offset = index << LogBitsPerWord; res & 1;
+           res_offset++) {
+        res = res >> 1;
+      }
+      assert(!(res & 1), "tautology; see loop condition");
+      assert(res_offset >= l_offset, "just checking");
+      return MIN2(res_offset, r_offset);
     }
   }
-  return end_bit;
+  return r_offset;
+}
+
+inline BitMap::idx_t
+BitMap::get_next_one_offset_inline_aligned_right(idx_t l_offset,
+                                                 idx_t r_offset) const
+{
+  verify_range(l_offset, r_offset);
+  assert(bit_in_word(r_offset) == 0, "r_offset not word-aligned");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset);
+  idx_t res_offset = l_offset;
+
+  // check bits including and to the _left_ of offset's position
+  idx_t res = map(index) >> bit_in_word(res_offset);
+  if (res != (uintptr_t)NoBits) {
+    // find the position of the 1-bit
+    for (; !(res & 1); res_offset++) {
+      res = res >> 1;
+    }
+    assert(res_offset >= l_offset &&
+           res_offset < r_offset, "just checking");
+    return res_offset;
+  }
+  // skip over all word length 0-bit runs
+  for (index++; index < r_index; index++) {
+    res = map(index);
+    if (res != (uintptr_t)NoBits) {
+      // found a 1, return the offset
+      for (res_offset = bit_index(index); !(res & 1); res_offset++) {
+        res = res >> 1;
+      }
+      assert(res & 1, "tautology; see loop condition");
+      assert(res_offset >= l_offset && res_offset < r_offset, "just checking");
+      return res_offset;
+    }
+  }
+  return r_offset;
 }
+
+
+// Returns a bit mask for a range of bits [beg, end) within a single word.  Each
+// bit in the mask is 0 if the bit is in the range, 1 if not in the range.  The
+// returned mask can be used directly to clear the range, or inverted to set the
+// range.  Note:  end must not be 0.
+inline BitMap::bm_word_t
+BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const {
+  assert(end != 0, "does not work when end == 0");
+  assert(beg == end || word_index(beg) == word_index(end - 1),
+         "must be a single-word range");
+  bm_word_t mask = bit_mask(beg) - 1;   // low (right) bits
+  if (bit_in_word(end) != 0) {
+    mask |= ~(bit_mask(end) - 1);       // high (left) bits
+  }
+  return mask;
+}
+
+inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) {
+  memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t));
+}
+
+inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) {
+  memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t));
+}
+
+inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const {
+  idx_t bit_rounded_up = bit + (BitsPerWord - 1);
+  // Check for integer arithmetic overflow.
+  return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words();
+}
+
+inline BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset,
+                                          idx_t r_offset) const {
+  return get_next_one_offset_inline(l_offset, r_offset);
+}
+
+inline BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset,
+                                           idx_t r_offset) const {
+  return get_next_zero_offset_inline(l_offset, r_offset);
+}
+
+inline void BitMap2D::clear() {
+  _map.clear();
+}
--- a/hotspot/src/share/vm/utilities/debug.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/debug.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -668,7 +668,7 @@
   oop target;
   void do_oop(oop* o) {
     if (o != NULL && *o == target) {
-      tty->print_cr("0x%08x", o);
+      tty->print_cr(INTPTR_FORMAT, o);
     }
   }
   void do_oop(narrowOop* o) { ShouldNotReachHere(); }
@@ -687,13 +687,13 @@
 
 
 static void findref(intptr_t x) {
-  GenCollectedHeap *gch = GenCollectedHeap::heap();
+  CollectedHeap *ch = Universe::heap();
   LookForRefInGenClosure lookFor;
   lookFor.target = (oop) x;
   LookForRefInObjectClosure look_in_object((oop) x);
 
   tty->print_cr("Searching heap:");
-  gch->object_iterate(&look_in_object);
+  ch->object_iterate(&look_in_object);
 
   tty->print_cr("Searching strong roots:");
   Universe::oops_do(&lookFor, false);
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -99,7 +99,7 @@
   friend class VMStructs;
  private:
   char* i;
-#ifdef ASSERT
+#ifndef PRODUCT
  public:
   char* value() { return i; }
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/intHisto.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_intHisto.cpp.incl"
+
+IntHistogram::IntHistogram(int est, int max) : _max(max), _tot(0) {
+  assert(0 <= est && est <= max, "Preconditions");
+  _elements = new (ResourceObj::C_HEAP) GrowableArray<int>(est, true);
+  guarantee(_elements != NULL, "alloc failure");
+}
+
+void IntHistogram::add_entry(int outcome) {
+  if (outcome > _max) outcome = _max;
+  int new_count = _elements->at_grow(outcome) + 1;
+  _elements->at_put(outcome, new_count);
+  _tot++;
+}
+
+int IntHistogram::entries_for_outcome(int outcome) {
+  return _elements->at_grow(outcome);
+}
+
+void IntHistogram::print_on(outputStream* st) const {
+  double tot_d = (double)_tot;
+  st->print_cr("Outcome     # of occurrences   %% of occurrences");
+  st->print_cr("-----------------------------------------------");
+  for (int i=0; i < _elements->length()-2; i++) {
+    int cnt = _elements->at(i);
+    if (cnt != 0) {
+      st->print_cr("%7d        %10d         %8.4f",
+                   i, cnt, (double)cnt/tot_d);
+    }
+  }
+  // Does it have any max entries?
+  if (_elements->length()-1 == _max) {
+    int cnt = _elements->at(_max);
+    st->print_cr(">= %4d        %10d         %8.4f",
+                 _max, cnt, (double)cnt/tot_d);
+  }
+  st->print_cr("-----------------------------------------------");
+  st->print_cr("    All        %10d         %8.4f", _tot, 1.0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/intHisto.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This class implements a simple histogram.
+
+// A histogram summarizes a series of "measurements", each of which is
+// assumed (required in this implementation) to have an outcome that is a
+// non-negative integer.  The histogram efficiently maps measurement outcomes
+// to the number of measurements had that outcome.
+
+// To print the results, invoke print() on your Histogram*.
+
+// Note: there is already an existing "Histogram" class, in file
+// histogram.{hpp,cpp}, but to my mind that's not a histogram, it's a table
+// mapping strings to counts.  To be a histogram (IMHO) it needs to map
+// numbers (in fact, integers) to number of occurrences of that number.
+
+// ysr: (i am not sure i agree with the above note.) i suspect we want to have a
+// histogram template that will map an arbitrary type (with a defined order
+// relation) to a count.
+
+
+class IntHistogram : public CHeapObj {
+ protected:
+  int _max;
+  int _tot;
+  GrowableArray<int>* _elements;
+
+public:
+  // Create a new, empty table.  "est" is an estimate of the maximum outcome
+  // that will be added, and "max" is an outcome such that all outcomes at
+  // least that large will be bundled with it.
+  IntHistogram(int est, int max);
+  // Add a measurement with the given outcome to the sequence.
+  void add_entry(int outcome);
+  // Return the number of entries recorded so far with the given outcome.
+  int  entries_for_outcome(int outcome);
+  // Return the total number of entries recorded so far.
+  int  total_entries() { return _tot; }
+  // Return the number of entries recorded so far with the given outcome as
+  // a fraction of the total number recorded so far.
+  double fraction_for_outcome(int outcome) {
+    return
+      (double)entries_for_outcome(outcome)/
+      (double)total_entries();
+  }
+  // Print the histogram on the given output stream.
+  void print_on(outputStream* st) const;
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/numberSeq.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_numberSeq.cpp.incl"
+
+AbsSeq::AbsSeq(double alpha) :
+  _num(0), _sum(0.0), _sum_of_squares(0.0),
+  _davg(0.0), _dvariance(0.0), _alpha(alpha) {
+}
+
+void AbsSeq::add(double val) {
+  if (_num == 0) {
+    // if the sequence is empty, the davg is the same as the value
+    _davg = val;
+    // and the variance is 0
+    _dvariance = 0.0;
+  } else {
+    // otherwise, calculate both
+    _davg = (1.0 - _alpha) * val + _alpha * _davg;
+    double diff = val - _davg;
+    _dvariance = (1.0 - _alpha) * diff * diff + _alpha * _dvariance;
+  }
+}
+
+double AbsSeq::avg() const {
+  if (_num == 0)
+    return 0.0;
+  else
+    return _sum / total();
+}
+
+double AbsSeq::variance() const {
+  if (_num <= 1)
+    return 0.0;
+
+  double x_bar = avg();
+  double result = _sum_of_squares / total() - x_bar * x_bar;
+  if (result < 0.0) {
+    // due to loss-of-precision errors, the variance might be negative
+    // by a small bit
+
+    //    guarantee(-0.1 < result && result < 0.0,
+    //        "if variance is negative, it should be very small");
+    result = 0.0;
+  }
+  return result;
+}
+
+double AbsSeq::sd() const {
+  double var = variance();
+  guarantee( var >= 0.0, "variance should not be negative" );
+  return sqrt(var);
+}
+
+double AbsSeq::davg() const {
+  return _davg;
+}
+
+double AbsSeq::dvariance() const {
+  if (_num <= 1)
+    return 0.0;
+
+  double result = _dvariance;
+  if (result < 0.0) {
+    // due to loss-of-precision errors, the variance might be negative
+    // by a small bit
+
+    guarantee(-0.1 < result && result < 0.0,
+               "if variance is negative, it should be very small");
+    result = 0.0;
+  }
+  return result;
+}
+
+double AbsSeq::dsd() const {
+  double var = dvariance();
+  guarantee( var >= 0.0, "variance should not be negative" );
+  return sqrt(var);
+}
+
+NumberSeq::NumberSeq(double alpha) :
+  AbsSeq(alpha), _maximum(0.0), _last(0.0) {
+}
+
+bool NumberSeq::check_nums(NumberSeq *total, int n, NumberSeq **parts) {
+  for (int i = 0; i < n; ++i) {
+    if (parts[i] != NULL && total->num() != parts[i]->num())
+      return false;
+  }
+  return true;
+}
+
+NumberSeq::NumberSeq(NumberSeq *total, int n, NumberSeq **parts) {
+  guarantee(check_nums(total, n, parts), "all seq lengths should match");
+  double sum = total->sum();
+  for (int i = 0; i < n; ++i) {
+    if (parts[i] != NULL)
+      sum -= parts[i]->sum();
+  }
+
+  _num = total->num();
+  _sum = sum;
+
+  // we do not calculate these...
+  _sum_of_squares = -1.0;
+  _maximum = -1.0;
+  _davg = -1.0;
+  _dvariance = -1.0;
+}
+
+void NumberSeq::add(double val) {
+  AbsSeq::add(val);
+
+  _last = val;
+  if (_num == 0) {
+    _maximum = val;
+  } else {
+    if (val > _maximum)
+      _maximum = val;
+  }
+  _sum += val;
+  _sum_of_squares += val * val;
+  ++_num;
+}
+
+
+TruncatedSeq::TruncatedSeq(int length, double alpha):
+  AbsSeq(alpha), _length(length), _next(0) {
+  _sequence = NEW_C_HEAP_ARRAY(double, _length);
+  for (int i = 0; i < _length; ++i)
+    _sequence[i] = 0.0;
+}
+
+void TruncatedSeq::add(double val) {
+  AbsSeq::add(val);
+
+  // get the oldest value in the sequence...
+  double old_val = _sequence[_next];
+  // ...remove it from the sum and sum of squares
+  _sum -= old_val;
+  _sum_of_squares -= old_val * old_val;
+
+  // ...and update them with the new value
+  _sum += val;
+  _sum_of_squares += val * val;
+
+  // now replace the old value with the new one
+  _sequence[_next] = val;
+  _next = (_next + 1) % _length;
+
+  // only increase it if the buffer is not full
+  if (_num < _length)
+    ++_num;
+
+  guarantee( variance() > -1.0, "variance should be >= 0" );
+}
+
+// can't easily keep track of this incrementally...
+double TruncatedSeq::maximum() const {
+  if (_num == 0)
+    return 0.0;
+  double ret = _sequence[0];
+  for (int i = 1; i < _num; ++i) {
+    double val = _sequence[i];
+    if (val > ret)
+      ret = val;
+  }
+  return ret;
+}
+
+double TruncatedSeq::last() const {
+  if (_num == 0)
+    return 0.0;
+  unsigned last_index = (_next + _length - 1) % _length;
+  return _sequence[last_index];
+}
+
+double TruncatedSeq::oldest() const {
+  if (_num == 0)
+    return 0.0;
+  else if (_num < _length)
+    // index 0 always oldest value until the array is full
+    return _sequence[0];
+  else {
+    // since the array is full, _next is over the oldest value
+    return _sequence[_next];
+  }
+}
+
+double TruncatedSeq::predict_next() const {
+  if (_num == 0)
+    return 0.0;
+
+  double num           = (double) _num;
+  double x_squared_sum = 0.0;
+  double x_sum         = 0.0;
+  double y_sum         = 0.0;
+  double xy_sum        = 0.0;
+  double x_avg         = 0.0;
+  double y_avg         = 0.0;
+
+  int first = (_next + _length - _num) % _length;
+  for (int i = 0; i < _num; ++i) {
+    double x = (double) i;
+    double y =  _sequence[(first + i) % _length];
+
+    x_squared_sum += x * x;
+    x_sum         += x;
+    y_sum         += y;
+    xy_sum        += x * y;
+  }
+  x_avg = x_sum / num;
+  y_avg = y_sum / num;
+
+  double Sxx = x_squared_sum - x_sum * x_sum / num;
+  double Sxy = xy_sum - x_sum * y_sum / num;
+  double b1 = Sxy / Sxx;
+  double b0 = y_avg - b1 * x_avg;
+
+  return b0 + b1 * num;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/numberSeq.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ **  This file contains a few classes that represent number sequence,
+ **  x1, x2, x3, ..., xN, and can calculate their avg, max, and sd.
+ **
+ **  Here's a quick description of the classes:
+ **
+ **    AbsSeq: abstract superclass
+ **    NumberSeq: the sequence is assumed to be very long and the
+ **      maximum, avg, sd, davg, and dsd are calculated over all its elements
+ **    TruncatedSeq: this class keeps track of the last L elements
+ **      of the sequence and calculates avg, max, and sd only over them
+ **/
+
+#define DEFAULT_ALPHA_VALUE 0.7
+
+class AbsSeq {
+private:
+  void init(double alpha);
+
+protected:
+  int    _num; // the number of elements in the sequence
+  double _sum; // the sum of the elements in the sequence
+  double _sum_of_squares; // the sum of squares of the elements in the sequence
+
+  double _davg; // decaying average
+  double _dvariance; // decaying variance
+  double _alpha; // factor for the decaying average / variance
+
+  // This is what we divide with to get the average. In a standard
+  // number sequence, this should just be the number of elements in it.
+  virtual double total() const { return (double) _num; };
+
+public:
+  AbsSeq(double alpha = DEFAULT_ALPHA_VALUE);
+
+  virtual void add(double val); // adds a new element to the sequence
+  void add(unsigned val) { add((double) val); }
+  virtual double maximum() const = 0; // maximum element in the sequence
+  virtual double last() const = 0; // last element added in the sequence
+
+  // the number of elements in the sequence
+  int num() const { return _num; }
+  // the sum of the elements in the sequence
+  double sum() const { return _sum; }
+
+  double avg() const; // the average of the sequence
+  double variance() const; // the variance of the sequence
+  double sd() const; // the standard deviation of the sequence
+
+  double davg() const; // decaying average
+  double dvariance() const; // decaying variance
+  double dsd() const; // decaying "standard deviation"
+};
+
+class NumberSeq: public AbsSeq {
+private:
+  bool check_nums(NumberSeq* total, int n, NumberSeq** parts);
+
+protected:
+  double _last;
+  double _maximum; // keep track of maximum value
+
+public:
+  NumberSeq(double alpha = DEFAULT_ALPHA_VALUE);
+  NumberSeq(NumberSeq* total, int n_parts, NumberSeq** parts);
+
+  virtual void add(double val);
+  virtual double maximum() const { return _maximum; }
+  virtual double last() const { return _last; }
+};
+
+class TruncatedSeq: public AbsSeq {
+private:
+  enum PrivateConstants {
+    DefaultSeqLength = 10
+  };
+  void init();
+protected:
+  double *_sequence; // buffers the last L elements in the sequence
+  int     _length; // this is L
+  int     _next;   // oldest slot in the array, i.e. next to be overwritten
+
+public:
+  // accepts a value for L
+  TruncatedSeq(int length = DefaultSeqLength,
+               double alpha = DEFAULT_ALPHA_VALUE);
+  virtual void add(double val);
+  virtual double maximum() const;
+  virtual double last() const; // the last value added to the sequence
+
+  double oldest() const; // the oldest valid value in the sequence
+  double predict_next() const; // prediction based on linear regression
+};
--- a/hotspot/src/share/vm/utilities/ostream.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/ostream.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -188,6 +188,17 @@
   print_raw(buf);
 }
 
+void outputStream::stamp(bool guard,
+                         const char* prefix,
+                         const char* suffix) {
+  if (!guard) {
+    return;
+  }
+  print_raw(prefix);
+  stamp();
+  print_raw(suffix);
+}
+
 void outputStream::date_stamp(bool guard,
                               const char* prefix,
                               const char* suffix) {
--- a/hotspot/src/share/vm/utilities/ostream.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/ostream.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -86,6 +86,10 @@
    // Time stamp
    TimeStamp& time_stamp() { return _stamp; }
    void stamp();
+   void stamp(bool guard, const char* prefix, const char* suffix);
+   void stamp(bool guard) {
+     stamp(guard, "", ": ");
+   }
    // Date stamp
    void date_stamp(bool guard, const char* prefix, const char* suffix);
    // A simplified call that includes a suffix of ": "
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -65,7 +65,8 @@
   os::sleep(Thread::current(), millis, false);
 }
 
-bool ParallelTaskTerminator::offer_termination() {
+bool
+ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
   Atomic::inc(&_offered_termination);
 
   juint yield_count = 0;
@@ -91,7 +92,8 @@
         sleep(WorkStealingSleepMillis);
       }
 
-      if (peek_in_queue_set()) {
+      if (peek_in_queue_set() ||
+          (terminator != NULL && terminator->should_exit_termination())) {
         Atomic::dec(&_offered_termination);
         return false;
       }
@@ -107,72 +109,72 @@
   }
 }
 
-bool ChunkTaskQueueWithOverflow::is_empty() {
-  return (_chunk_queue.size() == 0) &&
+bool RegionTaskQueueWithOverflow::is_empty() {
+  return (_region_queue.size() == 0) &&
          (_overflow_stack->length() == 0);
 }
 
-bool ChunkTaskQueueWithOverflow::stealable_is_empty() {
-  return _chunk_queue.size() == 0;
+bool RegionTaskQueueWithOverflow::stealable_is_empty() {
+  return _region_queue.size() == 0;
 }
 
-bool ChunkTaskQueueWithOverflow::overflow_is_empty() {
+bool RegionTaskQueueWithOverflow::overflow_is_empty() {
   return _overflow_stack->length() == 0;
 }
 
-void ChunkTaskQueueWithOverflow::initialize() {
-  _chunk_queue.initialize();
+void RegionTaskQueueWithOverflow::initialize() {
+  _region_queue.initialize();
   assert(_overflow_stack == 0, "Creating memory leak");
   _overflow_stack =
-    new (ResourceObj::C_HEAP) GrowableArray<ChunkTask>(10, true);
+    new (ResourceObj::C_HEAP) GrowableArray<RegionTask>(10, true);
 }
 
-void ChunkTaskQueueWithOverflow::save(ChunkTask t) {
-  if (TraceChunkTasksQueuing && Verbose) {
+void RegionTaskQueueWithOverflow::save(RegionTask t) {
+  if (TraceRegionTasksQueuing && Verbose) {
     gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t);
   }
-  if(!_chunk_queue.push(t)) {
+  if(!_region_queue.push(t)) {
     _overflow_stack->push(t);
   }
 }
 
-// Note that using this method will retrieve all chunks
+// Note that using this method will retrieve all regions
 // that have been saved but that it will always check
 // the overflow stack.  It may be more efficient to
 // check the stealable queue and the overflow stack
 // separately.
-bool ChunkTaskQueueWithOverflow::retrieve(ChunkTask& chunk_task) {
-  bool result = retrieve_from_overflow(chunk_task);
+bool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) {
+  bool result = retrieve_from_overflow(region_task);
   if (!result) {
-    result = retrieve_from_stealable_queue(chunk_task);
+    result = retrieve_from_stealable_queue(region_task);
   }
-  if (TraceChunkTasksQueuing && Verbose && result) {
+  if (TraceRegionTasksQueuing && Verbose && result) {
     gclog_or_tty->print_cr("  CTQ: retrieve " PTR_FORMAT, result);
   }
   return result;
 }
 
-bool ChunkTaskQueueWithOverflow::retrieve_from_stealable_queue(
-                                   ChunkTask& chunk_task) {
-  bool result = _chunk_queue.pop_local(chunk_task);
-  if (TraceChunkTasksQueuing && Verbose) {
-    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
+bool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue(
+                                   RegionTask& region_task) {
+  bool result = _region_queue.pop_local(region_task);
+  if (TraceRegionTasksQueuing && Verbose) {
+    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
   }
   return result;
 }
 
-bool ChunkTaskQueueWithOverflow::retrieve_from_overflow(
-                                        ChunkTask& chunk_task) {
+bool
+RegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) {
   bool result;
   if (!_overflow_stack->is_empty()) {
-    chunk_task = _overflow_stack->pop();
+    region_task = _overflow_stack->pop();
     result = true;
   } else {
-    chunk_task = (ChunkTask) NULL;
+    region_task = (RegionTask) NULL;
     result = false;
   }
-  if (TraceChunkTasksQueuing && Verbose) {
-    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
+  if (TraceRegionTasksQueuing && Verbose) {
+    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
   }
   return result;
 }
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -120,6 +120,11 @@
     return dirty_size(_bottom, get_top());
   }
 
+  void set_empty() {
+    _bottom = 0;
+    _age = Age();
+  }
+
   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
   juint max_elems() { return n() - 2; }
@@ -155,6 +160,9 @@
   // Delete any resource associated with the queue.
   ~GenericTaskQueue();
 
+  // apply the closure to all elements in the task queue
+  void oops_do(OopClosure* f);
+
 private:
   // Element array.
   volatile E* _elems;
@@ -172,6 +180,24 @@
 }
 
 template<class E>
+void GenericTaskQueue<E>::oops_do(OopClosure* f) {
+  // tty->print_cr("START OopTaskQueue::oops_do");
+  int iters = size();
+  juint index = _bottom;
+  for (int i = 0; i < iters; ++i) {
+    index = decrement_index(index);
+    // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
+    //            index, &_elems[index], _elems[index]);
+    E* t = (E*)&_elems[index];      // cast away volatility
+    oop* p = (oop*)t;
+    assert((*t)->is_oop_or_null(), "Not an oop or null");
+    f->do_oop(p);
+  }
+  // tty->print_cr("END OopTaskQueue::oops_do");
+}
+
+
+template<class E>
 bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) {
   if (dirty_n_elems == n() - 1) {
     // Actually means 0, so do the push.
@@ -383,6 +409,12 @@
   return false;
 }
 
+// When to terminate from the termination protocol.
+class TerminatorTerminator: public CHeapObj {
+public:
+  virtual bool should_exit_termination() = 0;
+};
+
 // A class to aid in the termination of a set of parallel tasks using
 // TaskQueueSet's for work stealing.
 
@@ -407,7 +439,14 @@
   // else is.  If returns "true", all threads are terminated.  If returns
   // "false", available work has been observed in one of the task queues,
   // so the global task is not complete.
-  bool offer_termination();
+  bool offer_termination() {
+    return offer_termination(NULL);
+  }
+
+  // As above, but it also terminates of the should_exit_termination()
+  // method of the terminator parameter returns true. If terminator is
+  // NULL, then it is ignored.
+  bool offer_termination(TerminatorTerminator* terminator);
 
   // Reset the terminator, so that it may be reused again.
   // The caller is responsible for ensuring that this is done
@@ -518,32 +557,32 @@
 typedef GenericTaskQueue<StarTask>     OopStarTaskQueue;
 typedef GenericTaskQueueSet<StarTask>  OopStarTaskQueueSet;
 
-typedef size_t ChunkTask;  // index for chunk
-typedef GenericTaskQueue<ChunkTask>    ChunkTaskQueue;
-typedef GenericTaskQueueSet<ChunkTask> ChunkTaskQueueSet;
+typedef size_t RegionTask;  // index for region
+typedef GenericTaskQueue<RegionTask>    RegionTaskQueue;
+typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet;
 
-class ChunkTaskQueueWithOverflow: public CHeapObj {
+class RegionTaskQueueWithOverflow: public CHeapObj {
  protected:
-  ChunkTaskQueue              _chunk_queue;
-  GrowableArray<ChunkTask>*   _overflow_stack;
+  RegionTaskQueue              _region_queue;
+  GrowableArray<RegionTask>*   _overflow_stack;
 
  public:
-  ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {}
+  RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {}
   // Initialize both stealable queue and overflow
   void initialize();
   // Save first to stealable queue and then to overflow
-  void save(ChunkTask t);
+  void save(RegionTask t);
   // Retrieve first from overflow and then from stealable queue
-  bool retrieve(ChunkTask& chunk_index);
+  bool retrieve(RegionTask& region_index);
   // Retrieve from stealable queue
-  bool retrieve_from_stealable_queue(ChunkTask& chunk_index);
+  bool retrieve_from_stealable_queue(RegionTask& region_index);
   // Retrieve from overflow
-  bool retrieve_from_overflow(ChunkTask& chunk_index);
+  bool retrieve_from_overflow(RegionTask& region_index);
   bool is_empty();
   bool stealable_is_empty();
   bool overflow_is_empty();
-  juint stealable_size() { return _chunk_queue.size(); }
-  ChunkTaskQueue* task_queue() { return &_chunk_queue; }
+  juint stealable_size() { return _region_queue.size(); }
+  RegionTaskQueue* task_queue() { return &_region_queue; }
 };
 
-#define USE_ChunkTaskQueueWithOverflow
+#define USE_RegionTaskQueueWithOverflow
--- a/hotspot/src/share/vm/utilities/workgroup.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/workgroup.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -28,13 +28,19 @@
 // Definitions of WorkGang methods.
 
 AbstractWorkGang::AbstractWorkGang(const char* name,
-                                   bool  are_GC_threads) :
+                                   bool  are_GC_task_threads,
+                                   bool  are_ConcurrentGC_threads) :
   _name(name),
-  _are_GC_threads(are_GC_threads) {
+  _are_GC_task_threads(are_GC_task_threads),
+  _are_ConcurrentGC_threads(are_ConcurrentGC_threads) {
+
+  assert(!(are_GC_task_threads && are_ConcurrentGC_threads),
+         "They cannot both be STW GC and Concurrent threads" );
+
   // Other initialization.
   _monitor = new Monitor(/* priority */       Mutex::leaf,
                          /* name */           "WorkGroup monitor",
-                         /* allow_vm_block */ are_GC_threads);
+                         /* allow_vm_block */ are_GC_task_threads);
   assert(monitor() != NULL, "Failed to allocate monitor");
   _terminate = false;
   _task = NULL;
@@ -44,16 +50,21 @@
 }
 
 WorkGang::WorkGang(const char* name,
-                   int           workers,
-                   bool          are_GC_threads) :
-  AbstractWorkGang(name, are_GC_threads) {
+                   int         workers,
+                   bool        are_GC_task_threads,
+                   bool        are_ConcurrentGC_threads) :
+  AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads)
+{
   // Save arguments.
   _total_workers = workers;
+
   if (TraceWorkGang) {
     tty->print_cr("Constructing work gang %s with %d threads", name, workers);
   }
   _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers);
-  assert(gang_workers() != NULL, "Failed to allocate gang workers");
+  if (gang_workers() == NULL) {
+    vm_exit_out_of_memory(0, "Cannot create GangWorker array.");
+  }
   for (int worker = 0; worker < total_workers(); worker += 1) {
     GangWorker* new_worker = new GangWorker(this, worker);
     assert(new_worker != NULL, "Failed to allocate GangWorker");
@@ -285,7 +296,11 @@
 }
 
 bool GangWorker::is_GC_task_thread() const {
-  return gang()->are_GC_threads();
+  return gang()->are_GC_task_threads();
+}
+
+bool GangWorker::is_ConcurrentGC_thread() const {
+  return gang()->are_ConcurrentGC_threads();
 }
 
 void GangWorker::print_on(outputStream* st) const {
@@ -312,26 +327,43 @@
 
 WorkGangBarrierSync::WorkGangBarrierSync()
   : _monitor(Mutex::safepoint, "work gang barrier sync", true),
-    _n_workers(0), _n_completed(0) {
+    _n_workers(0), _n_completed(0), _should_reset(false) {
 }
 
 WorkGangBarrierSync::WorkGangBarrierSync(int n_workers, const char* name)
   : _monitor(Mutex::safepoint, name, true),
-    _n_workers(n_workers), _n_completed(0) {
+    _n_workers(n_workers), _n_completed(0), _should_reset(false) {
 }
 
 void WorkGangBarrierSync::set_n_workers(int n_workers) {
   _n_workers   = n_workers;
   _n_completed = 0;
+  _should_reset = false;
 }
 
 void WorkGangBarrierSync::enter() {
   MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
+  if (should_reset()) {
+    // The should_reset() was set and we are the first worker to enter
+    // the sync barrier. We will zero the n_completed() count which
+    // effectively resets the barrier.
+    zero_completed();
+    set_should_reset(false);
+  }
   inc_completed();
   if (n_completed() == n_workers()) {
+    // At this point we would like to reset the barrier to be ready in
+    // case it is used again. However, we cannot set n_completed() to
+    // 0, even after the notify_all(), given that some other workers
+    // might still be waiting for n_completed() to become ==
+    // n_workers(). So, if we set n_completed() to 0, those workers
+    // will get stuck (as they will wake up, see that n_completed() !=
+    // n_workers() and go back to sleep). Instead, we raise the
+    // should_reset() flag and the barrier will be reset the first
+    // time a worker enters it again.
+    set_should_reset(true);
     monitor()->notify_all();
-  }
-  else {
+  } else {
     while (n_completed() != n_workers()) {
       monitor()->wait(/* no_safepoint_check */ true);
     }
@@ -442,3 +474,122 @@
   }
   return false;
 }
+
+bool FreeIdSet::_stat_init = false;
+FreeIdSet* FreeIdSet::_sets[NSets];
+bool FreeIdSet::_safepoint;
+
+FreeIdSet::FreeIdSet(int sz, Monitor* mon) :
+  _sz(sz), _mon(mon), _hd(0), _waiters(0), _index(-1), _claimed(0)
+{
+  _ids = new int[sz];
+  for (int i = 0; i < sz; i++) _ids[i] = i+1;
+  _ids[sz-1] = end_of_list; // end of list.
+  if (_stat_init) {
+    for (int j = 0; j < NSets; j++) _sets[j] = NULL;
+    _stat_init = true;
+  }
+  // Add to sets.  (This should happen while the system is still single-threaded.)
+  for (int j = 0; j < NSets; j++) {
+    if (_sets[j] == NULL) {
+      _sets[j] = this;
+      _index = j;
+      break;
+    }
+  }
+  guarantee(_index != -1, "Too many FreeIdSets in use!");
+}
+
+FreeIdSet::~FreeIdSet() {
+  _sets[_index] = NULL;
+}
+
+void FreeIdSet::set_safepoint(bool b) {
+  _safepoint = b;
+  if (b) {
+    for (int j = 0; j < NSets; j++) {
+      if (_sets[j] != NULL && _sets[j]->_waiters > 0) {
+        Monitor* mon = _sets[j]->_mon;
+        mon->lock_without_safepoint_check();
+        mon->notify_all();
+        mon->unlock();
+      }
+    }
+  }
+}
+
+#define FID_STATS 0
+
+int FreeIdSet::claim_par_id() {
+#if FID_STATS
+  thread_t tslf = thr_self();
+  tty->print("claim_par_id[%d]: sz = %d, claimed = %d\n", tslf, _sz, _claimed);
+#endif
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  while (!_safepoint && _hd == end_of_list) {
+    _waiters++;
+#if FID_STATS
+    if (_waiters > 5) {
+      tty->print("claim_par_id waiting[%d]: %d waiters, %d claimed.\n",
+                 tslf, _waiters, _claimed);
+    }
+#endif
+    _mon->wait(Mutex::_no_safepoint_check_flag);
+    _waiters--;
+  }
+  if (_hd == end_of_list) {
+#if FID_STATS
+    tty->print("claim_par_id[%d]: returning EOL.\n", tslf);
+#endif
+    return -1;
+  } else {
+    int res = _hd;
+    _hd = _ids[res];
+    _ids[res] = claimed;  // For debugging.
+    _claimed++;
+#if FID_STATS
+    tty->print("claim_par_id[%d]: returning %d, claimed = %d.\n",
+               tslf, res, _claimed);
+#endif
+    return res;
+  }
+}
+
+bool FreeIdSet::claim_perm_id(int i) {
+  assert(0 <= i && i < _sz, "Out of range.");
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  int prev = end_of_list;
+  int cur = _hd;
+  while (cur != end_of_list) {
+    if (cur == i) {
+      if (prev == end_of_list) {
+        _hd = _ids[cur];
+      } else {
+        _ids[prev] = _ids[cur];
+      }
+      _ids[cur] = claimed;
+      _claimed++;
+      return true;
+    } else {
+      prev = cur;
+      cur = _ids[cur];
+    }
+  }
+  return false;
+
+}
+
+void FreeIdSet::release_par_id(int id) {
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  assert(_ids[id] == claimed, "Precondition.");
+  _ids[id] = _hd;
+  _hd = id;
+  _claimed--;
+#if FID_STATS
+  tty->print("[%d] release_par_id(%d), waiters =%d,  claimed = %d.\n",
+             thr_self(), id, _waiters, _claimed);
+#endif
+  if (_waiters > 0)
+    // Notify all would be safer, but this is OK, right?
+    _mon->notify_all();
+}
--- a/hotspot/src/share/vm/utilities/workgroup.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/workgroup.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -72,7 +72,8 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  AbstractWorkGang(const char* name, bool are_GC_threads);
+  AbstractWorkGang(const char* name, bool are_GC_task_threads,
+                   bool are_ConcurrentGC_threads);
   ~AbstractWorkGang();
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task) = 0;
@@ -83,7 +84,8 @@
   const char* name() const;
 protected:
   // Initialize only instance data.
-  const bool _are_GC_threads;
+  const bool _are_GC_task_threads;
+  const bool _are_ConcurrentGC_threads;
   // Printing support.
   const char* _name;
   // The monitor which protects these data,
@@ -130,8 +132,11 @@
   int finished_workers() const {
     return _finished_workers;
   }
-  bool are_GC_threads() const {
-    return _are_GC_threads;
+  bool are_GC_task_threads() const {
+    return _are_GC_task_threads;
+  }
+  bool are_ConcurrentGC_threads() const {
+    return _are_ConcurrentGC_threads;
   }
   // Predicates.
   bool is_idle() const {
@@ -190,7 +195,8 @@
 class WorkGang: public AbstractWorkGang {
 public:
   // Constructor
-  WorkGang(const char* name, int workers, bool are_GC_threads);
+  WorkGang(const char* name, int workers,
+           bool are_GC_task_threads, bool are_ConcurrentGC_threads);
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task);
 };
@@ -206,6 +212,7 @@
   virtual void run();
   // Predicate for Thread
   virtual bool is_GC_task_thread() const;
+  virtual bool is_ConcurrentGC_thread() const;
   // Printing
   void print_on(outputStream* st) const;
   virtual void print() const { print_on(tty); }
@@ -228,12 +235,17 @@
   Monitor _monitor;
   int     _n_workers;
   int     _n_completed;
+  bool    _should_reset;
 
-  Monitor* monitor()       { return &_monitor; }
-  int      n_workers()     { return _n_workers; }
-  int      n_completed()   { return _n_completed; }
+  Monitor* monitor()        { return &_monitor; }
+  int      n_workers()      { return _n_workers; }
+  int      n_completed()    { return _n_completed; }
+  bool     should_reset()   { return _should_reset; }
 
-  void     inc_completed() { _n_completed++; }
+  void     zero_completed() { _n_completed = 0; }
+  void     inc_completed()  { _n_completed++; }
+
+  void     set_should_reset(bool v) { _should_reset = v; }
 
 public:
   WorkGangBarrierSync();
@@ -343,3 +355,42 @@
   // cleanup if necessary.
   bool all_tasks_completed();
 };
+
+// Represents a set of free small integer ids.
+class FreeIdSet {
+  enum {
+    end_of_list = -1,
+    claimed = -2
+  };
+
+  int _sz;
+  Monitor* _mon;
+
+  int* _ids;
+  int _hd;
+  int _waiters;
+  int _claimed;
+
+  static bool _safepoint;
+  typedef FreeIdSet* FreeIdSetPtr;
+  static const int NSets = 10;
+  static FreeIdSetPtr _sets[NSets];
+  static bool _stat_init;
+  int _index;
+
+public:
+  FreeIdSet(int sz, Monitor* mon);
+  ~FreeIdSet();
+
+  static void set_safepoint(bool b);
+
+  // Attempt to claim the given id permanently.  Returns "true" iff
+  // successful.
+  bool claim_perm_id(int i);
+
+  // Returns an unclaimed parallel id (waiting for one to be released if
+  // necessary).  Returns "-1" if a GC wakes up a wait for an id.
+  int claim_par_id();
+
+  void release_par_id(int id);
+};
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp	Wed Jul 05 16:43:15 2017 +0200
@@ -31,8 +31,8 @@
 class WorkData;
 
 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
-  const char* name, int workers, bool are_GC_threads) :
-  AbstractWorkGang(name, are_GC_threads) {
+  const char* name, int workers, bool are_GC_task_threads) :
+  AbstractWorkGang(name, are_GC_task_threads, false) {
   // Save arguments.
   _total_workers = workers;
   assert(_total_workers > 0, "Must have more than 1 worker");
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp	Wed Jul 05 16:43:15 2017 +0200
@@ -143,7 +143,8 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  YieldingFlexibleWorkGang(const char* name, int workers, bool are_GC_threads);
+  YieldingFlexibleWorkGang(const char* name, int workers,
+                           bool are_GC_task_threads);
 
   YieldingFlexibleGangTask* yielding_task() const {
     assert(task() == NULL || task()->is_YieldingFlexibleGang_task(),
--- a/hotspot/test/compiler/6646019/Test.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/test/compiler/6646019/Test.java	Wed Jul 05 16:43:15 2017 +0200
@@ -19,7 +19,6 @@
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
- *
  */
 
 /*
--- a/hotspot/test/compiler/6689060/Test.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/test/compiler/6689060/Test.java	Wed Jul 05 16:43:15 2017 +0200
@@ -19,7 +19,6 @@
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
- *
  */
 
 /*
--- a/hotspot/test/compiler/6695810/Test.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/test/compiler/6695810/Test.java	Wed Jul 05 16:43:15 2017 +0200
@@ -19,7 +19,6 @@
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
- *
  */
 
 /*
--- a/hotspot/test/compiler/6700047/Test6700047.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/hotspot/test/compiler/6700047/Test6700047.java	Wed Jul 05 16:43:15 2017 +0200
@@ -29,6 +29,8 @@
  */
 
 public class Test6700047 {
+    static byte[] dummy = new byte[256];
+
     public static void main(String[] args) {
         for (int i = 0; i < 100000; i++) {
             intToLeftPaddedAsciiBytes();
@@ -53,6 +55,7 @@
         if (offset > 0) {
             for(int j = 0; j < offset; j++) {
                 result++;
+                dummy[i] = 0;
             }
         }
         return result;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6711100/Test.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6711100
+ * @summary 64bit fastdebug server vm crashes with assert(_base == Int,"Not an Int")
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test.<init> Test
+ */
+
+public class Test {
+
+    static byte b;
+
+    // The server compiler chokes on compiling
+    // this method when f() is not inlined
+    public Test() {
+        b = (new byte[1])[(new byte[f()])[-1]];
+    }
+
+    protected static int f() {
+      return 1;
+    }
+
+    public static void main(String[] args) {
+      try {
+        Test t = new Test();
+      } catch (ArrayIndexOutOfBoundsException e) {
+      }
+    }
+}
+
+
--- a/jaxp/.hgtags	Thu Oct 23 21:56:41 2008 -0700
+++ b/jaxp/.hgtags	Wed Jul 05 16:43:15 2017 +0200
@@ -12,3 +12,4 @@
 eac46d1eb7f0935ba04f1c7929ec15423fd0309e jdk7-b35
 c84ca638db42a8b6b227b4e3b63bca192c5ca634 jdk7-b36
 af49591bc486d82aa04b832257de0d18adc9af52 jdk7-b37
+e9f750f0a3a00413a7b77028b2ecdabb7129ae32 jdk7-b38
--- a/jaxws/.hgtags	Thu Oct 23 21:56:41 2008 -0700
+++ b/jaxws/.hgtags	Wed Jul 05 16:43:15 2017 +0200
@@ -12,3 +12,4 @@
 b0f01c2508b690dd225298edfec70b5e8b8dc367 jdk7-b35
 f60187f44a0d62906a5e2f6bd0989b5b24c1ca1e jdk7-b36
 a2a6f9edf761934faf59ea60d7fe7178371302cd jdk7-b37
+9ce439969184c753a9ba3caf8ed277b05230f2e5 jdk7-b38
--- a/jdk/.hgtags	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/.hgtags	Wed Jul 05 16:43:15 2017 +0200
@@ -12,3 +12,4 @@
 cf4894b78ceb966326e93bf221db0c2d14d59218 jdk7-b35
 134fd1a656ea85acd1f97f6700f75029b9b472a0 jdk7-b36
 14f50aee4989b75934d385c56a83da0c23d2f68b jdk7-b37
+cc5f810b5af8a3a83b0df5a29d9e24d7a0ff8086 jdk7-b38
--- a/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java	Wed Jul 05 16:43:15 2017 +0200
@@ -32,6 +32,7 @@
 import java.lang.reflect.GenericArrayType;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
 import java.util.Map;
 import java.util.WeakHashMap;
@@ -390,7 +391,31 @@
         if (type instanceof Class)
             return ((Class) type).getName();
         else
-            return type.toString();
+            return genericTypeString(type);
+    }
+
+    private static String genericTypeString(Type type) {
+        if (type instanceof Class<?>) {
+            Class<?> c = (Class<?>) type;
+            if (c.isArray())
+                return genericTypeString(c.getComponentType()) + "[]";
+            else
+                return c.getName();
+        } else if (type instanceof GenericArrayType) {
+            GenericArrayType gat = (GenericArrayType) type;
+            return genericTypeString(gat.getGenericComponentType()) + "[]";
+        } else if (type instanceof ParameterizedType) {
+            ParameterizedType pt = (ParameterizedType) type;
+            StringBuilder sb = new StringBuilder();
+            sb.append(genericTypeString(pt.getRawType())).append("<");
+            String sep = "";
+            for (Type t : pt.getActualTypeArguments()) {
+                sb.append(sep).append(genericTypeString(t));
+                sep = ", ";
+            }
+            return sb.append(">").toString();
+        } else
+            return "???";
     }
 
     private final PerInterfaceMap<ConvertingMethod>
--- a/jdk/src/share/classes/java/nio/channels/SelectableChannel.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/java/nio/channels/SelectableChannel.java	Wed Jul 05 16:43:15 2017 +0200
@@ -191,6 +191,9 @@
      * @throws  ClosedChannelException
      *          If this channel is closed
      *
+     * @throws  ClosedSelectorException
+     *          If the selector is closed
+     *
      * @throws  IllegalBlockingModeException
      *          If this channel is in blocking mode
      *
@@ -246,6 +249,9 @@
      * @throws  ClosedChannelException
      *          If this channel is closed
      *
+     * @throws  ClosedSelectorException
+     *          If the selector is closed
+     *
      * @throws  IllegalBlockingModeException
      *          If this channel is in blocking mode
      *
--- a/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java	Wed Jul 05 16:43:15 2017 +0200
@@ -175,6 +175,16 @@
      * the selector is invoked while holding the appropriate locks.  The
      * resulting key is added to this channel's key set before being returned.
      * </p>
+     *
+     * @throws  ClosedSelectorException {@inheritDoc}
+     *
+     * @throws  IllegalBlockingModeException {@inheritDoc}
+     *
+     * @throws  IllegalSelectorException {@inheritDoc}
+     *
+     * @throws  CancelledKeyException {@inheritDoc}
+     *
+     * @throws  IllegalArgumentException {@inheritDoc}
      */
     public final SelectionKey register(Selector sel, int ops,
                                        Object att)
--- a/jdk/src/share/classes/javax/management/event/EventClient.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/javax/management/event/EventClient.java	Wed Jul 05 16:43:15 2017 +0200
@@ -265,12 +265,20 @@
             public ScheduledThreadPoolExecutor createThreadPool(ThreadGroup group) {
                 ThreadFactory daemonThreadFactory = new DaemonThreadFactory(
                         "JMX EventClient lease renewer %d");
-                ScheduledThreadPoolExecutor exec = new ScheduledThreadPoolExecutor(
-                        20, daemonThreadFactory);
-                exec.setKeepAliveTime(1, TimeUnit.SECONDS);
-                exec.allowCoreThreadTimeOut(true);
-                exec.setRemoveOnCancelPolicy(true);
-                return exec;
+                ScheduledThreadPoolExecutor executor =
+                        new ScheduledThreadPoolExecutor(20, daemonThreadFactory);
+                executor.setKeepAliveTime(1, TimeUnit.SECONDS);
+                executor.allowCoreThreadTimeOut(true);
+                executor.setRemoveOnCancelPolicy(true);
+                // By default, a ScheduledThreadPoolExecutor will keep jobs
+                // in its queue even after they have been cancelled.  They
+                // will only be removed when their scheduled time arrives.
+                // Since the job references the LeaseRenewer which references
+                // this EventClient, this can lead to a moderately large number
+                // of objects remaining referenced until the renewal time
+                // arrives.  Hence the above call, which removes the job from
+                // the queue as soon as it is cancelled.
+                return executor;
             }
         };
         return leaseRenewerThreadPool.getThreadPoolExecutor(create);
@@ -381,7 +389,7 @@
             listenerId =
                     eventClientDelegate.addListener(clientId, name, filter);
         } catch (EventClientNotFoundException ecnfe) {
-            final IOException ioe = new IOException();
+            final IOException ioe = new IOException(ecnfe.getMessage());
             ioe.initCause(ecnfe);
             throw ioe;
         }
@@ -488,7 +496,7 @@
             listenerId =
                     eventClientDelegate.addSubscriber(clientId, name, filter);
         } catch (EventClientNotFoundException ecnfe) {
-            final IOException ioe = new IOException();
+            final IOException ioe = new IOException(ecnfe.getMessage());
             ioe.initCause(ecnfe);
             throw ioe;
         }
--- a/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java	Wed Jul 05 16:43:15 2017 +0200
@@ -91,7 +91,7 @@
      * the fetching.
      *
      * @param delegate The {@code EventClientDelegateMBean} to work with.
-     * @param executor Used to do the fetching. A new thread is created if
+     * @param fetchExecutor Used to do the fetching. A new thread is created if
      * {@code null}.
      * @throws IOException If failed to work with the {@code delegate}.
      * @throws MBeanException if unable to add a client to the remote
@@ -101,12 +101,12 @@
      * @throws IllegalArgumentException If {@code delegate} is {@code null}.
      */
     public FetchingEventRelay(EventClientDelegateMBean delegate,
-            Executor executor) throws IOException, MBeanException {
+            Executor fetchExecutor) throws IOException, MBeanException {
         this(delegate,
                 DEFAULT_BUFFER_SIZE,
                 DEFAULT_WAITING_TIMEOUT,
                 DEFAULT_MAX_NOTIFICATIONS,
-                executor);
+                fetchExecutor);
     }
 
     /**
@@ -120,7 +120,7 @@
      * @param timeout The waiting time in millseconds when fetching
      * notifications from an {@code EventClientDelegateMBean}.
      * @param maxNotifs The maximum notifications to fetch every time.
-     * @param executor Used to do the fetching. A new thread is created if
+     * @param fetchExecutor Used to do the fetching. A new thread is created if
      * {@code null}.
      * @throws IOException if failed to communicate with the {@code delegate}.
      * @throws MBeanException if unable to add a client to the remote
@@ -133,12 +133,12 @@
             int bufferSize,
             long timeout,
             int maxNotifs,
-            Executor executor) throws IOException, MBeanException {
+            Executor fetchExecutor) throws IOException, MBeanException {
         this(delegate,
                 bufferSize,
                 timeout,
                 maxNotifs,
-                executor,
+                fetchExecutor,
                 FetchingEventForwarder.class.getName(),
                 new Object[] {bufferSize},
                 new String[] {int.class.getName()});
@@ -155,7 +155,7 @@
      * @param timeout The waiting time in millseconds when fetching
      * notifications from an {@code EventClientDelegateMBean}.
      * @param maxNotifs The maximum notifications to fetch every time.
-     * @param executor Used to do the fetching.
+     * @param fetchExecutor Used to do the fetching.
      * @param forwarderName the class name of a user specific EventForwarder
      * to create in server to forward notifications to this object. The class
      * should be a subclass of the class {@link FetchingEventForwarder}.
@@ -174,7 +174,7 @@
             int bufferSize,
             long timeout,
             int maxNotifs,
-            Executor executor,
+            Executor fetchExecutor,
             String forwarderName,
             Object[] params,
             String[] sig) throws IOException, MBeanException {
@@ -184,11 +184,11 @@
                     bufferSize+" "+
                     timeout+" "+
                     maxNotifs+" "+
-                    executor+" "+
+                    fetchExecutor+" "+
                     forwarderName+" ");
         }
 
-        if(delegate == null) {
+        if (delegate == null) {
             throw new NullPointerException("Null EventClientDelegateMBean!");
         }
 
@@ -212,16 +212,16 @@
         this.timeout = timeout;
         this.maxNotifs = maxNotifs;
 
-        if (executor == null) {
-            ScheduledThreadPoolExecutor stpe = new ScheduledThreadPoolExecutor(1,
-                    daemonThreadFactory);
-            stpe.setKeepAliveTime(1, TimeUnit.SECONDS);
-            stpe.allowCoreThreadTimeOut(true);
-            executor = stpe;
-            this.defaultExecutor = stpe;
+        if (fetchExecutor == null) {
+            ScheduledThreadPoolExecutor executor =
+                    new ScheduledThreadPoolExecutor(1, daemonThreadFactory);
+            executor.setKeepAliveTime(1, TimeUnit.SECONDS);
+            executor.allowCoreThreadTimeOut(true);
+            fetchExecutor = executor;
+            this.defaultExecutor = executor;
         } else
             this.defaultExecutor = null;
-        this.executor = executor;
+        this.fetchExecutor = fetchExecutor;
 
         startSequenceNumber = 0;
         fetchingJob = new MyJob();
@@ -258,7 +258,7 @@
 
     private class MyJob extends RepeatedSingletonJob {
         public MyJob() {
-            super(executor);
+            super(fetchExecutor);
         }
 
         public boolean isSuspended() {
@@ -368,7 +368,7 @@
     private String clientId;
     private boolean stopped = false;
 
-    private final Executor executor;
+    private final Executor fetchExecutor;
     private final ExecutorService defaultExecutor;
     private final MyJob fetchingJob;
 
--- a/jdk/src/share/classes/javax/management/monitor/Monitor.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/javax/management/monitor/Monitor.java	Wed Jul 05 16:43:15 2017 +0200
@@ -181,7 +181,7 @@
     /**
      * Executor Service.
      */
-    private static final ExecutorService executor;
+    private static final ThreadPoolExecutor executor;
     static {
         final String maximumPoolSizeSysProp = "jmx.x.monitor.maximum.pool.size";
         final String maximumPoolSizeStr = AccessController.doPrivileged(
@@ -218,7 +218,7 @@
                 TimeUnit.SECONDS,
                 new LinkedBlockingQueue<Runnable>(),
                 new DaemonThreadFactory("Executor"));
-        ((ThreadPoolExecutor)executor).allowCoreThreadTimeOut(true);
+        executor.allowCoreThreadTimeOut(true);
     }
 
     /**
--- a/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java	Wed Jul 05 16:43:15 2017 +0200
@@ -71,9 +71,8 @@
 import java.util.Properties;
 import java.util.Set;
 import java.util.WeakHashMap;
-import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Executor;
-import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@@ -421,12 +420,12 @@
             public ThreadPoolExecutor createThreadPool(ThreadGroup group) {
                 ThreadFactory daemonThreadFactory = new DaemonThreadFactory(
                         "JMX RMIConnector listener dispatch %d");
-                ThreadPoolExecutor exec = new ThreadPoolExecutor(
+                ThreadPoolExecutor executor = new ThreadPoolExecutor(
                         1, 10, 1, TimeUnit.SECONDS,
-                        new LinkedBlockingDeque<Runnable>(),
+                        new LinkedBlockingQueue<Runnable>(),
                         daemonThreadFactory);
-                exec.allowCoreThreadTimeOut(true);
-                return exec;
+                executor.allowCoreThreadTimeOut(true);
+                return executor;
             }
         };
         return listenerDispatchThreadPool.getThreadPoolExecutor(create);
@@ -1503,7 +1502,7 @@
             super(period);
         }
 
-        public void gotIOException (IOException ioe) throws IOException {
+        public void gotIOException(IOException ioe) throws IOException {
             if (ioe instanceof NoSuchObjectException) {
                 // need to restart
                 super.gotIOException(ioe);
--- a/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java	Wed Jul 05 16:43:15 2017 +0200
@@ -80,7 +80,7 @@
 import static sun.management.AgentConfigurationError.*;
 import sun.management.ConnectorAddressLink;
 import sun.management.FileSystem;
-import sun.management.snmp.util.MibLogger;
+import com.sun.jmx.remote.util.ClassLogger;
 
 import com.sun.jmx.remote.internal.RMIExporter;
 import com.sun.jmx.remote.security.JMXPluggableAuthenticator;
@@ -99,6 +99,7 @@
         public static final String PORT = "0";
         public static final String CONFIG_FILE_NAME = "management.properties";
         public static final String USE_SSL = "true";
+        public static final String USE_LOCAL_ONLY = "true";
         public static final String USE_REGISTRY_SSL = "false";
         public static final String USE_AUTHENTICATION = "true";
         public static final String PASSWORD_FILE_NAME = "jmxremote.password";
@@ -115,6 +116,8 @@
                 "com.sun.management.jmxremote.port";
         public static final String CONFIG_FILE_NAME =
                 "com.sun.management.config.file";
+        public static final String USE_LOCAL_ONLY =
+                "com.sun.management.jmxremote.local.only";
         public static final String USE_SSL =
                 "com.sun.management.jmxremote.ssl";
         public static final String USE_REGISTRY_SSL =
@@ -384,7 +387,7 @@
             checkAccessFile(accessFileName);
         }
 
-        if (log.isDebugOn()) {
+        if (log.debugOn()) {
             log.debug("initialize",
                     Agent.getText("jmxremote.ConnectorBootstrap.initialize") +
                     "\n\t" + PropertyNames.PORT + "=" + port +
@@ -477,6 +480,18 @@
         MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
         try {
             JMXServiceURL url = new JMXServiceURL("rmi", localhost, 0);
+            // Do we accept connections from local interfaces only?
+            Properties props = Agent.getManagementProperties();
+            if (props ==  null) {
+                props = new Properties();
+            }
+            String useLocalOnlyStr = props.getProperty(
+                    PropertyNames.USE_LOCAL_ONLY, DefaultValues.USE_LOCAL_ONLY);
+            boolean useLocalOnly = Boolean.valueOf(useLocalOnlyStr).booleanValue();
+            if (useLocalOnly) {
+                env.put(RMIConnectorServer.RMI_SERVER_SOCKET_FACTORY_ATTRIBUTE,
+                        new LocalRMIServerSocketFactory());
+            }
             JMXConnectorServer server =
                     JMXConnectorServerFactory.newJMXConnectorServer(url, env, mbs);
             server.start();
@@ -764,7 +779,7 @@
     private ConnectorBootstrap() {
     }
 
-    // XXX Revisit: should probably clone this MibLogger....
-    private static final MibLogger log =
-            new MibLogger(ConnectorBootstrap.class);
+    private static final ClassLogger log =
+        new ClassLogger(ConnectorBootstrap.class.getPackage().getName(),
+                        "ConnectorBootstrap");
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/management/jmxremote/LocalRMIServerSocketFactory.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.management.jmxremote;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.NetworkInterface;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.net.SocketException;
+import java.rmi.server.RMIServerSocketFactory;
+import java.util.Enumeration;
+
+/**
+ * This RMI server socket factory creates server sockets that
+ * will only accept connection requests from clients running
+ * on the host where the RMI remote objects have been exported.
+ */
+public final class LocalRMIServerSocketFactory implements RMIServerSocketFactory {
+    /**
+     * Creates a server socket that only accepts connection requests from
+     * clients running on the host where the RMI remote objects have been
+     * exported.
+     */
+    public ServerSocket createServerSocket(int port) throws IOException {
+        return new ServerSocket(port) {
+            @Override
+            public Socket accept() throws IOException {
+                Socket socket = super.accept();
+                InetAddress remoteAddr = socket.getInetAddress();
+                final String msg = "The server sockets created using the " +
+                        "LocalRMIServerSocketFactory only accept connections " +
+                        "from clients running on the host where the RMI " +
+                        "remote objects have been exported.";
+                if (remoteAddr.isAnyLocalAddress()) {
+                    // local address: accept the connection.
+                    return socket;
+                }
+                // Retrieve all the network interfaces on this host.
+                Enumeration<NetworkInterface> nis;
+                try {
+                    nis = NetworkInterface.getNetworkInterfaces();
+                } catch (SocketException e) {
+                    try {
+                        socket.close();
+                    } catch (IOException ioe) {
+                        // Ignore...
+                    }
+                    throw new IOException(msg, e);
+                }
+                // Walk through the network interfaces to see
+                // if any of them matches the client's address.
+                // If true, then the client's address is local.
+                while (nis.hasMoreElements()) {
+                    NetworkInterface ni = nis.nextElement();
+                    Enumeration<InetAddress> addrs = ni.getInetAddresses();
+                    while (addrs.hasMoreElements()) {
+                        InetAddress localAddr = addrs.nextElement();
+                        if (localAddr.equals(remoteAddr)) {
+                            return socket;
+                        }
+                    }
+                }
+                // The client's address is remote so refuse the connection.
+                try {
+                    socket.close();
+                } catch (IOException ioe) {
+                    // Ignore...
+                }
+                throw new IOException(msg);
+            }
+        };
+    }
+
+    /**
+     * Two LocalRMIServerSocketFactory objects
+     * are equal if they are of the same type.
+     */
+    @Override
+    public boolean equals(Object obj) {
+        return (obj instanceof LocalRMIServerSocketFactory);
+    }
+
+    /**
+     * Returns a hash code value for this LocalRMIServerSocketFactory.
+     */
+    @Override
+    public int hashCode() {
+        return getClass().hashCode();
+    }
+}
--- a/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java	Wed Jul 05 16:43:15 2017 +0200
@@ -58,6 +58,9 @@
     // True if this Selector has been closed
     private boolean closed = false;
 
+    // Lock for close and cleanup
+    private Object closeLock = new Object();
+
     AbstractPollSelectorImpl(SelectorProvider sp, int channels, int offset) {
         super(sp);
         this.totalChannels = channels;
@@ -65,7 +68,11 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
-        pollWrapper.putEventOps(sk.getIndex(), ops);
+        synchronized (closeLock) {
+            if (closed)
+                throw new ClosedSelectorException();
+            pollWrapper.putEventOps(sk.getIndex(), ops);
+        }
     }
 
     public Selector wakeup() {
@@ -76,7 +83,9 @@
     protected abstract int doSelect(long timeout) throws IOException;
 
     protected void implClose() throws IOException {
-        if (!closed) {
+        synchronized (closeLock) {
+            if (closed)
+                return;
             closed = true;
             // Deregister channels
             for(int i=channelOffset; i<totalChannels; i++) {
@@ -129,23 +138,28 @@
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
-        // Check to see if the array is large enough
-        if (channelArray.length == totalChannels) {
-            // Make a larger array
-            int newSize = pollWrapper.totalChannels * 2;
-            SelectionKeyImpl temp[] = new SelectionKeyImpl[newSize];
-            // Copy over
-            for (int i=channelOffset; i<totalChannels; i++)
-                temp[i] = channelArray[i];
-            channelArray = temp;
-            // Grow the NativeObject poll array
-            pollWrapper.grow(newSize);
+        synchronized (closeLock) {
+            if (closed)
+                throw new ClosedSelectorException();
+
+            // Check to see if the array is large enough
+            if (channelArray.length == totalChannels) {
+                // Make a larger array
+                int newSize = pollWrapper.totalChannels * 2;
+                SelectionKeyImpl temp[] = new SelectionKeyImpl[newSize];
+                // Copy over
+                for (int i=channelOffset; i<totalChannels; i++)
+                    temp[i] = channelArray[i];
+                channelArray = temp;
+                // Grow the NativeObject poll array
+                pollWrapper.grow(newSize);
+            }
+            channelArray[totalChannels] = ski;
+            ski.setIndex(totalChannels);
+            pollWrapper.addEntry(ski.channel);
+            totalChannels++;
+            keys.add(ski);
         }
-        channelArray[totalChannels] = ski;
-        ski.setIndex(totalChannels);
-        pollWrapper.addEntry(ski.channel);
-        totalChannels++;
-        keys.add(ski);
     }
 
     protected void implDereg(SelectionKeyImpl ski) throws IOException {
--- a/jdk/src/share/classes/sun/security/provider/certpath/BasicChecker.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/classes/sun/security/provider/certpath/BasicChecker.java	Wed Jul 05 16:43:15 2017 +0200
@@ -162,7 +162,7 @@
             throw new CertPathValidatorException
                 (msg + " check failed", e, null, -1,
                  BasicReason.INVALID_SIGNATURE);
-        } catch (GeneralSecurityException e) {
+        } catch (Exception e) {
             throw new CertPathValidatorException(msg + " check failed", e);
         }
 
--- a/jdk/src/share/lib/management/management.properties	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/share/lib/management/management.properties	Wed Jul 05 16:43:15 2017 +0200
@@ -82,7 +82,7 @@
 #
 # com.sun.management.snmp.interface=<InetAddress>
 #      Specifies the local interface on which the SNMP agent will bind.
-#      This is usefull when running on machines which have several
+#      This is useful when running on machines which have several
 #      interfaces defined. It makes it possible to listen to a specific
 #      subnet accessible through that interface.
 #      Default for this property is "localhost".
@@ -144,6 +144,26 @@
 #
 
 #
+# ########## RMI connector settings for local management ##########
+#
+# com.sun.management.jmxremote.local.only=true|false
+#      Default for this property is true. (Case for true/false ignored)
+#      If this property is specified as true then the local JMX RMI connector
+#      server will only accept connection requests from clients running on
+#      the host where the out-of-the-box JMX management agent is running.
+#      In order to ensure backwards compatibility this property could be
+#      set to false. However, deploying the local management agent in this
+#      way is discouraged because the local JMX RMI connector server will
+#      accept connection requests from any client either local or remote.
+#      For remote management the remote JMX RMI connector server should
+#      be used instead with authentication and SSL/TLS encryption enabled.
+#
+
+# For allowing the local management agent accept local
+# and remote connection requests use the following line
+# com.sun.management.jmxremote.local.only=false
+
+#
 # ###################### RMI SSL #############################
 #
 # com.sun.management.jmxremote.ssl=true|false
--- a/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java	Wed Jul 05 16:43:15 2017 +0200
@@ -46,15 +46,15 @@
     // The poll object
     DevPollArrayWrapper pollWrapper;
 
-    // The number of valid channels in this Selector's poll array
-    private int totalChannels;
-
     // Maps from file descriptors to keys
     private Map<Integer,SelectionKeyImpl> fdToKey;
 
     // True if this Selector has been closed
     private boolean closed = false;
 
+    // Lock for close/cleanup
+    private Object closeLock = new Object();
+
     // Lock for interrupt triggering and clearing
     private Object interruptLock = new Object();
     private boolean interruptTriggered = false;
@@ -72,7 +72,6 @@
         pollWrapper = new DevPollArrayWrapper();
         pollWrapper.initInterrupt(fd0, fd1);
         fdToKey = new HashMap<Integer,SelectionKeyImpl>();
-        totalChannels = 1;
     }
 
     protected int doSelect(long timeout)
@@ -131,45 +130,39 @@
     }
 
     protected void implClose() throws IOException {
-        if (!closed) {
-            closed = true;
-
-            // prevent further wakeup
-            synchronized (interruptLock) {
-                interruptTriggered = true;
-            }
+        if (closed)
+            return;
+        closed = true;
 
-            FileDispatcher.closeIntFD(fd0);
-            FileDispatcher.closeIntFD(fd1);
-            if (pollWrapper != null) {
+        // prevent further wakeup
+        synchronized (interruptLock) {
+            interruptTriggered = true;
+        }
 
-                pollWrapper.release(fd0);
-                pollWrapper.closeDevPollFD();
-                pollWrapper = null;
-                selectedKeys = null;
+        FileDispatcher.closeIntFD(fd0);
+        FileDispatcher.closeIntFD(fd1);
 
-                // Deregister channels
-                Iterator i = keys.iterator();
-                while (i.hasNext()) {
-                    SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
-                    deregister(ski);
-                    SelectableChannel selch = ski.channel();
-                    if (!selch.isOpen() && !selch.isRegistered())
-                        ((SelChImpl)selch).kill();
-                    i.remove();
-                }
-                totalChannels = 0;
+        pollWrapper.release(fd0);
+        pollWrapper.closeDevPollFD();
+        selectedKeys = null;
 
-            }
-            fd0 = -1;
-            fd1 = -1;
+        // Deregister channels
+        Iterator i = keys.iterator();
+        while (i.hasNext()) {
+            SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
+            deregister(ski);
+            SelectableChannel selch = ski.channel();
+            if (!selch.isOpen() && !selch.isRegistered())
+                ((SelChImpl)selch).kill();
+            i.remove();
         }
+        fd0 = -1;
+        fd1 = -1;
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
         int fd = IOUtil.fdVal(ski.channel.getFD());
         fdToKey.put(Integer.valueOf(fd), ski);
-        totalChannels++;
         keys.add(ski);
     }
 
@@ -179,7 +172,6 @@
         int fd = ski.channel.getFDVal();
         fdToKey.remove(Integer.valueOf(fd));
         pollWrapper.release(fd);
-        totalChannels--;
         ski.setIndex(-1);
         keys.remove(ski);
         selectedKeys.remove(ski);
@@ -190,6 +182,8 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
+        if (closed)
+            throw new ClosedSelectorException();
         int fd = IOUtil.fdVal(sk.channel.getFD());
         pollWrapper.setInterest(fd, ops);
     }
--- a/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java	Wed Jul 05 16:43:15 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2005-2007 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,7 +31,6 @@
 import java.util.*;
 import sun.misc.*;
 
-
 /**
  * An implementation of Selector for Linux 2.6+ kernels that uses
  * the epoll event notification facility.
@@ -51,7 +50,7 @@
     private Map<Integer,SelectionKeyImpl> fdToKey;
 
     // True if this Selector has been closed
-    private boolean closed = false;
+    private volatile boolean closed = false;
 
     // Lock for interrupt triggering and clearing
     private Object interruptLock = new Object();
@@ -128,40 +127,41 @@
     }
 
     protected void implClose() throws IOException {
-        if (!closed) {
-            closed = true;
+        if (closed)
+            return;
+        closed = true;
 
-            // prevent further wakeup
-            synchronized (interruptLock) {
-                interruptTriggered = true;
-            }
+        // prevent further wakeup
+        synchronized (interruptLock) {
+            interruptTriggered = true;
+        }
 
-            FileDispatcher.closeIntFD(fd0);
-            FileDispatcher.closeIntFD(fd1);
-            if (pollWrapper != null) {
+        FileDispatcher.closeIntFD(fd0);
+        FileDispatcher.closeIntFD(fd1);
 
-                pollWrapper.release(fd0);
-                pollWrapper.closeEPollFD();
-                pollWrapper = null;
-                selectedKeys = null;
+        pollWrapper.release(fd0);
+        pollWrapper.closeEPollFD();
+        // it is possible
+        selectedKeys = null;
 
-                // Deregister channels
-                Iterator i = keys.iterator();
-                while (i.hasNext()) {
-                    SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
-                    deregister(ski);
-                    SelectableChannel selch = ski.channel();
-                    if (!selch.isOpen() && !selch.isRegistered())
-                        ((SelChImpl)selch).kill();
-                    i.remove();
-                }
-            }
-            fd0 = -1;
-            fd1 = -1;
+        // Deregister channels
+        Iterator i = keys.iterator();
+        while (i.hasNext()) {
+            SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
+            deregister(ski);
+            SelectableChannel selch = ski.channel();
+            if (!selch.isOpen() && !selch.isRegistered())
+                ((SelChImpl)selch).kill();
+            i.remove();
         }
+
+        fd0 = -1;
+        fd1 = -1;
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
+        if (closed)
+            throw new ClosedSelectorException();
         int fd = IOUtil.fdVal(ski.channel.getFD());
         fdToKey.put(Integer.valueOf(fd), ski);
         pollWrapper.add(fd);
@@ -183,6 +183,8 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
+        if (closed)
+            throw new ClosedSelectorException();
         int fd = IOUtil.fdVal(sk.channel.getFD());
         pollWrapper.setInterest(fd, ops);
     }
--- a/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java	Thu Oct 23 21:56:41 2008 -0700
+++ b/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java	Wed Jul 05 16:43:15 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright 2002-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2002-2007 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,6 +80,9 @@
     // File descriptors corresponding to source and sink
     private final int wakeupSourceFd, wakeupSinkFd;
 
+    // Lock for close cleanup
+    private Object closeLock = new Object();
+
     // Maps file descriptors to their indices in  pollArray
     private final static class FdMap extends HashMap<Integer, MapEntry> {
         static final long serialVersionUID = 0L;
@@ -473,42 +476,48 @@
     }
 
     protected void implClose() throws IOException {
-        if (channelArray != null) {
-            if (pollWrapper != null) {
-                // prevent further wakeup
-                synchronized (interruptLock) {
-                    interruptTriggered = true;
-                }
-                wakeupPipe.sink().close();
-                wakeupPipe.source().close();
-                for(int i = 1; i < totalChannels; i++) { // Deregister channels
-                    if (i % MAX_SELECTABLE_FDS != 0) { // skip wakeupEvent
-                        deregister(channelArray[i]);
-                        SelectableChannel selch = channelArray[i].channel();
-                        if (!selch.isOpen() && !selch.isRegistered())
-                            ((SelChImpl)selch).kill();
+        synchronized (closeLock) {
+            if (channelArray != null) {
+                if (pollWrapper != null) {
+                    // prevent further wakeup
+                    synchronized (interruptLock) {
+                        interruptTriggered = true;
                     }
-                }
-                pollWrapper.free();
-                pollWrapper = null;
-                selectedKeys = null;
-                channelArray = null;
-                threads.clear();
-                // Call startThreads. All remaining helper threads now exit,
-                // since threads.size() = 0;
-                startLock.startThreads();
+                    wakeupPipe.sink().close();
+                    wakeupPipe.source().close();
+                    for(int i = 1; i < totalChannels; i++) { // Deregister channels
+                        if (i % MAX_SELECTABLE_FDS != 0) { // skip wakeupEvent
+                            deregister(channelArray[i]);
+                            SelectableChannel selch = channelArray[i].channel();
+                            if (!selch.isOpen() && !selch.isRegistered())
+                                ((SelChImpl)selch).kill();
+                        }
+                    }
+                    pollWrapper.free();
+                    pollWrapper = null;
+                     selectedKeys = null;
+                     channelArray = null;
+                     threads.clear();
+                     // Call startThreads. All remaining helper threads now exit,
+                     // since threads.size() = 0;
+                     startLock.startThreads();
+                 }
             }
         }
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
-        growIfNeeded();
-        channelArray[totalChannels] = ski;
-        ski.setIndex(totalChannels);
-        fdMap.put(ski);
-        keys.add(ski);
-        pollWrapper.addEntry(totalChannels, ski);
-        totalChannels++;
+        synchronized (closeLock) {
+            if (pollWrapper == null)
+                throw new ClosedSelectorException();
+            growIfNeeded();
+            channelArray[totalChannels] = ski;
+            ski.setIndex(totalChannels);
+            fdMap.put(ski);
+            keys.add(ski);
+            pollWrapper.addEntry(totalChannels, ski);
+            totalChannels++;
+        }
     }
 
     private void growIfNeeded() {
@@ -554,7 +563,11 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
-        pollWrapper.putEventOps(sk.getIndex(), ops);
+        synchronized (closeLock) {
+            if (pollWrapper == null)
+                throw new ClosedSelectorException();
+            pollWrapper.putEventOps(sk.getIndex(), ops);
+        }
     }
 
     public Selector wakeup() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/nio/channels/Selector/CloseThenRegister.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/* @test
+ * @bug 5025260
+ * @summary ClosedSelectorException is expected when register after close
+ */
+
+import java.net.*;
+import java.nio.channels.*;
+
+public class CloseThenRegister {
+
+    public static void main (String [] args) throws Exception {
+        try {
+            Selector s = Selector.open();
+            s.close();
+            ServerSocketChannel c = ServerSocketChannel.open();
+            c.socket().bind(new InetSocketAddress(40000));
+            c.configureBlocking(false);
+            c.register(s, SelectionKey.OP_ACCEPT);
+        } catch (ClosedSelectorException cse) {
+            return;
+        }
+        throw new RuntimeException("register after close does not cause CSE!");
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/javax/management/mxbean/TypeNameTest.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6757225
+ * @summary Test that type names in MXBeans match their spec.
+ * @author Eamonn McManus
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.util.List;
+import java.util.Map;
+import javax.management.MBeanAttributeInfo;
+import javax.management.MBeanInfo;
+import javax.management.MBeanServer;
+import javax.management.MBeanServerFactory;
+import javax.management.ObjectName;
+import javax.management.StandardMBean;
+
+public class TypeNameTest {
+    public static interface TestMXBean {
+        public int getInt();
+        public String IntName = "int";
+
+        public Map<String, Integer> getMapSI();
+        public String MapSIName = "java.util.Map<java.lang.String, java.lang.Integer>";
+
+        public Map<String, int[]> getMapSInts();
+        public String MapSIntsName = "java.util.Map<java.lang.String, int[]>";
+
+        public List<List<int[]>> getListListInts();
+        public String ListListIntsName = "java.util.List<java.util.List<int[]>>";
+    }
+
+    private static InvocationHandler nullIH = new InvocationHandler() {
+        public Object invoke(Object proxy, Method method, Object[] args)
+                throws Throwable {
+            return null;
+        }
+    };
+
+    static String failure;
+
+    public static void main(String[] args) throws Exception {
+        TestMXBean testImpl = (TestMXBean) Proxy.newProxyInstance(
+                TestMXBean.class.getClassLoader(), new Class<?>[] {TestMXBean.class}, nullIH);
+        Object mxbean = new StandardMBean(testImpl, TestMXBean.class, true);
+        MBeanServer mbs = MBeanServerFactory.newMBeanServer();
+        ObjectName name = new ObjectName("a:b=c");
+        mbs.registerMBean(mxbean, name);
+        MBeanInfo mbi = mbs.getMBeanInfo(name);
+        MBeanAttributeInfo[] mbais = mbi.getAttributes();
+        for (MBeanAttributeInfo mbai : mbais) {
+            String attrName = mbai.getName();
+            String attrTypeName = (String) mbai.getDescriptor().getFieldValue("originalType");
+            String fieldName = attrName + "Name";
+            Field nameField = TestMXBean.class.getField(fieldName);
+            String expectedTypeName = (String) nameField.get(null);
+            if (expectedTypeName.equals(attrTypeName)) {
+                System.out.println("OK: " + attrName + ": " + attrTypeName);
+            } else {
+                failure = "For attribute " + attrName + " expected type name \"" +
+                        expectedTypeName + "\", found type name \"" + attrTypeName +
+                        "\"";
+                System.out.println("FAIL: " + failure);
+            }
+        }
+        if (failure == null)
+            System.out.println("TEST PASSED");
+        else
+            throw new Exception("TEST FAILED: " + failure);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/Action.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * Action used in Context.doAs
+ */
+public interface Action {
+    /**
+     * This method always reads a byte block and emits another one
+     */
+    byte[] run(Context s, byte[] input) throws Exception;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/BasicKrb5Test.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6706974
+ * @summary Add krb5 test infrastructure
+ */
+
+import org.ietf.jgss.GSSName;
+import sun.security.jgss.GSSUtil;
+import sun.security.krb5.Config;
+import sun.security.krb5.internal.crypto.EType;
+
+/**
+ * Basic JGSS/krb5 test with 3 parties: client, server, backend server. Each
+ * party uses JAAS login to get subjects and executes JGSS calls using
+ * Subject.doAs.
+ */
+public class BasicKrb5Test {
+
+    /**
+     * @param args empty or etype
+     */
+    public static void main(String[] args)
+            throws Exception {
+
+        String etype = null;
+        if (args.length > 0) {
+            etype = args[0];
+        }
+
+        // Creates and starts the KDC. This line must be put ahead of etype check
+        // since the check needs a krb5.conf.
+        new OneKDC(etype).writeJAASConf();
+
+        System.out.println("Testing etype " + etype);
+        if (etype != null && !EType.isSupported(Config.getInstance().getType(etype))) {
+            System.out.println("Not supported.");
+            System.exit(0);
+        }
+
+        new BasicKrb5Test().go(OneKDC.SERVER, OneKDC.BACKEND);
+    }
+
+    void go(final String server, final String backend) throws Exception {
+        Context c, s, s2, b;
+        c = Context.fromJAAS("client");
+        s = Context.fromJAAS("server");
+        b = Context.fromJAAS("backend");
+
+        c.startAsClient(server, GSSUtil.GSS_KRB5_MECH_OID);
+        c.x().requestCredDeleg(true);
+        s.startAsServer(GSSUtil.GSS_KRB5_MECH_OID);
+
+        c.status();
+        s.status();
+
+        Context.handshake(c, s);
+        GSSName client = c.x().getSrcName();
+
+        c.status();
+        s.status();
+
+        Context.transmit("i say high --", c, s);
+        Context.transmit("   you say low", s, c);
+
+        s2 = s.delegated();
+        s.dispose();
+        s = null;
+
+        s2.startAsClient(backend, GSSUtil.GSS_KRB5_MECH_OID);
+        b.startAsServer(GSSUtil.GSS_KRB5_MECH_OID);
+
+        s2.status();
+        b.status();
+
+        Context.handshake(s2, b);
+        GSSName client2 = b.x().getSrcName();
+
+        if (!client.equals(client2)) {
+            throw new Exception("Delegation failed");
+        }
+
+        s2.status();
+        b.status();
+
+        Context.transmit("you say hello --", s2, b);
+        Context.transmit("   i say goodbye", b, s2);
+
+        s2.dispose();
+        b.dispose();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/CleanState.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6716534
+ * @summary Krb5LoginModule has not cleaned temp info between authentication attempts
+ */
+import com.sun.security.auth.module.Krb5LoginModule;
+import java.util.HashMap;
+import java.util.Map;
+import javax.security.auth.Subject;
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+
+public class CleanState {
+    public static void main(String[] args) throws Exception {
+        CleanState x = new CleanState();
+        new OneKDC(null);
+        x.go();
+    }
+
+    void go() throws Exception {
+        Krb5LoginModule krb5 = new Krb5LoginModule();
+
+        final String name = OneKDC.USER;
+        final char[] password = OneKDC.PASS;
+        char[] badpassword = "hellokitty".toCharArray();
+
+        Map<String,String> map = new HashMap<String,String>();
+        map.put("useTicketCache", "false");
+        map.put("doNotPrompt", "false");
+        map.put("tryFirstPass", "true");
+        Map<String,Object> shared = new HashMap<String,Object>();
+        shared.put("javax.security.auth.login.name", name);
+        shared.put("javax.security.auth.login.password", badpassword);
+
+        krb5.initialize(new Subject(), new CallbackHandler() {
+            @Override
+            public void handle(Callback[] callbacks) {
+                for(Callback callback: callbacks) {
+                    if (callback instanceof NameCallback) {
+                        ((NameCallback)callback).setName(name);
+                    }
+                    if (callback instanceof PasswordCallback) {
+                        ((PasswordCallback)callback).setPassword(password);
+                    }
+                }
+            }
+        }, shared, map);
+        krb5.login();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/Context.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,386 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+import com.sun.security.auth.module.Krb5LoginModule;
+import java.security.PrivilegedActionException;
+import java.security.PrivilegedExceptionAction;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import javax.security.auth.Subject;
+import javax.security.auth.kerberos.KerberosKey;
+import javax.security.auth.kerberos.KerberosTicket;
+import javax.security.auth.login.LoginContext;
+import org.ietf.jgss.GSSContext;
+import org.ietf.jgss.GSSCredential;
+import org.ietf.jgss.GSSException;
+import org.ietf.jgss.GSSManager;
+import org.ietf.jgss.GSSName;
+import org.ietf.jgss.MessageProp;
+import org.ietf.jgss.Oid;
+
+/**
+ * Context of a JGSS subject, encapsulating Subject and GSSContext.
+ *
+ * Three "constructors", which acquire the (private) credentials and fill
+ * it into the Subject:
+ *
+ * 1. static fromJAAS(): Creates a Context using a JAAS login config entry
+ * 2. static fromUserPass(): Creates a Context using a username and a password
+ * 3. delegated(): A new context which uses the delegated credentials from a
+ *    previously established acceptor Context
+ *
+ * Two context initiators, which create the GSSContext object inside:
+ *
+ * 1. startAsClient()
+ * 2. startAsServer()
+ *
+ * Privileged action:
+ *    doAs(): Performs an action in the name of the Subject
+ *
+ * Handshake process:
+ *    static handShake(initiator, acceptor)
+ *
+ * A four-phase typical data communication which includes all four GSS
+ * actions (wrap, unwrap, getMic and veryfyMiC):
+ *    static transmit(message, from, to)
+ */
+public class Context {
+
+    private Subject s;
+    private GSSContext x;
+    private boolean f;      // context established?
+    private String name;
+    private GSSCredential cred;     // see static method delegated().
+
+    private Context() {}
+
+    /**
+     * Using the delegated credentials from a previous acceptor
+     * @param c
+     */
+    public Context delegated() throws Exception {
+        Context out = new Context();
+        out.s = s;
+        out.cred = x.getDelegCred();
+        out.name = name + " as " + out.cred.getName().toString();
+        return out;
+    }
+
+    /**
+     * Logins with a JAAS login config entry name
+     */
+    public static Context fromJAAS(final String name) throws Exception {
+        Context out = new Context();
+        out.name = name;
+        LoginContext lc = new LoginContext(name);
+        lc.login();
+        out.s = lc.getSubject();
+        return out;
+    }
+
+    /**
+     * Logins with a username and a password, using Krb5LoginModule directly
+     * @param storeKey true if key should be saved, used on acceptor side
+     */
+    public static Context fromUserPass(String user, char[] pass, boolean storeKey) throws Exception {
+        Context out = new Context();
+        out.name = user;
+        out.s = new Subject();
+        Krb5LoginModule krb5 = new Krb5LoginModule();
+        Map<String, String> map = new HashMap<String, String>();
+        map.put("tryFirstPass", "true");
+        if (storeKey) {
+            map.put("storeKey", "true");
+        }
+        Map<String, Object> shared = new HashMap<String, Object>();
+        shared.put("javax.security.auth.login.name", user);
+        shared.put("javax.security.auth.login.password", pass);
+
+        krb5.initialize(out.s, null, shared, map);
+        krb5.login();
+        krb5.commit();
+        return out;
+    }
+
+    /**
+     * Starts as a client
+     * @param target communication peer
+     * @param mech GSS mech
+     * @throws java.lang.Exception
+     */
+    public void startAsClient(final String target, final Oid mech) throws Exception {
+        doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] dummy) throws Exception {
+                GSSManager m = GSSManager.getInstance();
+                me.x = m.createContext(
+                        target.indexOf('@') < 0 ?
+                            m.createName(target, null) :
+                            m.createName(target, GSSName.NT_HOSTBASED_SERVICE),
+                        mech,
+                        cred,
+                        GSSContext.DEFAULT_LIFETIME);
+                return null;
+            }
+        }, null);
+        f = false;
+    }
+
+    /**
+     * Starts as a server
+     * @param mech GSS mech
+     * @throws java.lang.Exception
+     */
+    public void startAsServer(final Oid mech) throws Exception {
+        doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] dummy) throws Exception {
+                GSSManager m = GSSManager.getInstance();
+                me.x = m.createContext(m.createCredential(
+                        null,
+                        GSSCredential.INDEFINITE_LIFETIME,
+                        mech,
+                        GSSCredential.ACCEPT_ONLY));
+                return null;
+            }
+        }, null);
+        f = false;
+    }
+
+    /**
+     * Accesses the internal GSSContext object. Currently it's used for --
+     *
+     * 1. calling requestXXX() before handshake
+     * 2. accessing source name
+     *
+     * Note: If the application needs to do any privileged call on this
+     * object, please use doAs(). Otherwise, it can be done directly. The
+     * methods listed above are all non-privileged calls.
+     *
+     * @return the GSSContext object
+     */
+    public GSSContext x() {
+        return x;
+    }
+
+    /**
+     * Disposes the GSSContext within
+     * @throws org.ietf.jgss.GSSException
+     */
+    public void dispose() throws GSSException {
+        x.dispose();
+    }
+
+    /**
+     * Does something using the Subject inside
+     * @param action the action
+     * @param in the input byte
+     * @return the output byte
+     * @throws java.lang.Exception
+     */
+    public byte[] doAs(final Action action, final byte[] in) throws Exception {
+        try {
+            return Subject.doAs(s, new PrivilegedExceptionAction<byte[]>() {
+
+                @Override
+                public byte[] run() throws Exception {
+                    return action.run(Context.this, in);
+                }
+            });
+        } catch (PrivilegedActionException pae) {
+            throw pae.getException();
+        }
+    }
+
+    /**
+     * Prints status of GSSContext and Subject
+     * @throws java.lang.Exception
+     */
+    public void status() throws Exception {
+        System.out.println("STATUS OF " + name.toUpperCase());
+        try {
+            StringBuffer sb = new StringBuffer();
+            if (x.getAnonymityState()) {
+                sb.append("anon, ");
+            }
+            if (x.getConfState()) {
+                sb.append("conf, ");
+            }
+            if (x.getCredDelegState()) {
+                sb.append("deleg, ");
+            }
+            if (x.getIntegState()) {
+                sb.append("integ, ");
+            }
+            if (x.getMutualAuthState()) {
+                sb.append("mutual, ");
+            }
+            if (x.getReplayDetState()) {
+                sb.append("rep det, ");
+            }
+            if (x.getSequenceDetState()) {
+                sb.append("seq det, ");
+            }
+            System.out.println("Context status of " + name + ": " + sb.toString());
+            System.out.println(x.getSrcName() + " -> " + x.getTargName());
+        } catch (Exception e) {
+            ;// Don't care
+        }
+        System.out.println("=====================================");
+        for (Object o : s.getPrivateCredentials()) {
+            System.out.println("    " + o.getClass());
+            if (o instanceof KerberosTicket) {
+                KerberosTicket kt = (KerberosTicket) o;
+                System.out.println("        " + kt.getServer() + " for " + kt.getClient());
+            } else if (o instanceof KerberosKey) {
+                KerberosKey kk = (KerberosKey) o;
+                System.out.print("        " + kk.getKeyType() + " " + kk.getVersionNumber() + " " + kk.getAlgorithm() + " ");
+                for (byte b : kk.getEncoded()) {
+                    System.out.printf("%02X", b & 0xff);
+                }
+                System.out.println();
+            } else if (o instanceof Map) {
+                Map map = (Map) o;
+                for (Object k : map.keySet()) {
+                    System.out.println("        " + k + ": " + map.get(k));
+                }
+            }
+        }
+    }
+
+    /**
+     * Transmits a message from one Context to another. The sender wraps the
+     * message and sends it to the receiver. The receiver unwraps it, creates
+     * a MIC of the clear text and sends it back to the sender. The sender
+     * verifies the MIC against the message sent earlier.
+     * @param message the message
+     * @param s1 the sender
+     * @param s2 the receiver
+     * @throws java.lang.Exception If anything goes wrong
+     */
+    static public void transmit(final String message, final Context s1,
+            final Context s2) throws Exception {
+        final byte[] messageBytes = message.getBytes();
+        System.out.printf("-------------------- TRANSMIT from %s to %s------------------------\n",
+                s1.name, s2.name);
+
+        byte[] t = s1.doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] dummy) throws Exception {
+                System.out.println("wrap");
+                MessageProp p1 = new MessageProp(0, true);
+                byte[] out = me.x.wrap(messageBytes, 0, messageBytes.length, p1);
+                System.out.println(printProp(p1));
+                return out;
+            }
+        }, null);
+
+        t = s2.doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] input) throws Exception {
+                MessageProp p1 = new MessageProp(0, true);
+                byte[] bytes = me.x.unwrap(input, 0, input.length, p1);
+                if (!Arrays.equals(messageBytes, bytes))
+                    throw new Exception("wrap/unwrap mismatch");
+                System.out.println("unwrap");
+                System.out.println(printProp(p1));
+                p1 = new MessageProp(0, true);
+                System.out.println("getMIC");
+                bytes = me.x.getMIC(bytes, 0, bytes.length, p1);
+                System.out.println(printProp(p1));
+                return bytes;
+            }
+        }, t);
+        // Re-unwrap should make p2.isDuplicateToken() returns true
+        s1.doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] input) throws Exception {
+                MessageProp p1 = new MessageProp(0, true);
+                System.out.println("verifyMIC");
+                me.x.verifyMIC(input, 0, input.length,
+                        messageBytes, 0, messageBytes.length,
+                        p1);
+                System.out.println(printProp(p1));
+                return null;
+            }
+        }, t);
+    }
+
+    /**
+     * Returns a string description of a MessageProp object
+     * @param prop the object
+     * @return the description
+     */
+    static public String printProp(MessageProp prop) {
+        StringBuffer sb = new StringBuffer();
+        sb.append("MessagePop: ");
+        sb.append("QOP="+ prop.getQOP() + ", ");
+        sb.append(prop.getPrivacy()?"privacy, ":"");
+        sb.append(prop.isDuplicateToken()?"dup, ":"");
+        sb.append(prop.isGapToken()?"gap, ":"");
+        sb.append(prop.isOldToken()?"old, ":"");
+        sb.append(prop.isUnseqToken()?"unseq, ":"");
+        sb.append(prop.getMinorString()+ "(" + prop.getMinorStatus()+")");
+        return sb.toString();
+    }
+
+    /**
+     * Handshake (security context establishment process) between two Contexts
+     * @param c the initiator
+     * @param s the acceptor
+     * @throws java.lang.Exception
+     */
+    static public void handshake(final Context c, final Context s) throws Exception {
+        byte[] t = new byte[0];
+        while (!c.f || !s.f) {
+            t = c.doAs(new Action() {
+                @Override
+                public byte[] run(Context me, byte[] input) throws Exception {
+                    if (me.x.isEstablished()) {
+                        me.f = true;
+                        System.out.println(c.name + " side established");
+                        return null;
+                    } else {
+                        System.out.println(c.name + " call initSecContext");
+                        return me.x.initSecContext(input, 0, input.length);
+                    }
+                }
+            }, t);
+
+            t = s.doAs(new Action() {
+                @Override
+                public byte[] run(Context me, byte[] input) throws Exception {
+                    if (me.x.isEstablished()) {
+                        me.f = true;
+                        System.out.println(s.name + " side established");
+                        return null;
+                    } else {
+                        System.out.println(s.name + " called acceptSecContext");
+                        return me.x.acceptSecContext(input, 0, input.length);
+                    }
+                }
+            }, t);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/CrossRealm.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6706974
+ * @summary Add krb5 test infrastructure
+ */
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.security.Security;
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import org.ietf.jgss.GSSContext;
+import org.ietf.jgss.GSSManager;
+import org.ietf.jgss.GSSName;
+import sun.security.jgss.GSSUtil;
+
+public class CrossRealm implements CallbackHandler {
+    public static void main(String[] args) throws Exception {
+        startKDCs();
+        xRealmAuth();
+    }
+
+    static void startKDCs() throws Exception {
+        // Create and start the KDC
+        KDC kdc1 = KDC.create("RABBIT.HOLE");
+        kdc1.addPrincipal("dummy", "bogus".toCharArray());
+        kdc1.addPrincipalRandKey("krbtgt/RABBIT.HOLE");
+        kdc1.addPrincipal("krbtgt/SNAKE.HOLE", "sharedsec".toCharArray());
+
+        KDC kdc2 = KDC.create("SNAKE.HOLE");
+        kdc2.addPrincipalRandKey("krbtgt/SNAKE.HOLE");
+        kdc2.addPrincipal("krbtgt/RABBIT.HOLE", "sharedsec".toCharArray());
+        kdc2.addPrincipalRandKey("host/www.snake.hole");
+
+        KDC.saveConfig("krb5-localkdc.conf", kdc1, kdc2,
+                "forwardable=true",
+                "[domain_realm]",
+                ".snake.hole=SNAKE.HOLE");
+        System.setProperty("java.security.krb5.conf", "krb5-localkdc.conf");
+    }
+
+    static void xRealmAuth() throws Exception {
+        Security.setProperty("auth.login.defaultCallbackHandler", "CrossRealm");
+        System.setProperty("java.security.auth.login.config", "jaas-localkdc.conf");
+        System.setProperty("javax.security.auth.useSubjectCredsOnly", "false");
+        FileOutputStream fos = new FileOutputStream("jaas-localkdc.conf");
+        fos.write(("com.sun.security.jgss.krb5.initiate {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule\n" +
+                "    required\n" +
+                "    principal=dummy\n" +
+                "    doNotPrompt=false\n" +
+                "    useTicketCache=false\n" +
+                "    ;\n" +
+                "};").getBytes());
+        fos.close();
+
+        GSSManager m = GSSManager.getInstance();
+        m.createContext(
+                m.createName("host@www.snake.hole", GSSName.NT_HOSTBASED_SERVICE),
+                GSSUtil.GSS_KRB5_MECH_OID,
+                null,
+                GSSContext.DEFAULT_LIFETIME).initSecContext(new byte[0], 0, 0);
+    }
+
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+        for (Callback callback : callbacks) {
+            if (callback instanceof NameCallback) {
+                ((NameCallback) callback).setName("dummy");
+            }
+            if (callback instanceof PasswordCallback) {
+                ((PasswordCallback) callback).setPassword("bogus".toCharArray());
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/KDC.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,969 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.net.*;
+import java.io.*;
+import java.lang.reflect.Method;
+import java.security.SecureRandom;
+import java.util.*;
+import java.util.concurrent.*;
+import sun.security.krb5.*;
+import sun.security.krb5.internal.*;
+import sun.security.krb5.internal.crypto.KeyUsage;
+import sun.security.krb5.internal.ktab.KeyTab;
+import sun.security.util.DerInputStream;
+import sun.security.util.DerOutputStream;
+import sun.security.util.DerValue;
+
+/**
+ * A KDC server.
+ * <p>
+ * Features:
+ * <ol>
+ * <li> Supports TCP and UDP
+ * <li> Supports AS-REQ and TGS-REQ
+ * <li> Principal db and other settings hard coded in application
+ * <li> Options, say, request preauth or not
+ * </ol>
+ * Side effects:
+ * <ol>
+ * <li> The Sun-internal class <code>sun.security.krb5.Config</code> is a
+ * singleton and initialized according to Kerberos settings (krb5.conf and
+ * java.security.krb5.* system properties). This means once it's initialized
+ * it will not automatically notice any changes to these settings (or file
+ * changes of krb5.conf). The KDC class normally does not touch these
+ * settings (except for the <code>writeKtab()</code> method). However, to make
+ * sure nothing ever goes wrong, if you want to make any changes to these
+ * settings after calling a KDC method, call <code>Config.refresh()</code> to
+ * make sure your changes are reflected in the <code>Config</code> object.
+ * </ol>
+ * Issues and TODOs:
+ * <ol>
+ * <li> Generates krb5.conf to be used on another machine, currently the kdc is
+ * always localhost
+ * <li> More options to KDC, say, error output, say, response nonce !=
+ * request nonce
+ * </ol>
+ * Note: This program uses internal krb5 classes (including reflection to
+ * access private fields and methods).
+ * <p>
+ * Usages:
+ * <p>
+ * 1. Init and start the KDC:
+ * <pre>
+ * KDC kdc = KDC.create("REALM.NAME", port, isDaemon);
+ * KDC kdc = KDC.create("REALM.NAME");
+ * </pre>
+ * Here, <code>port</code> is the UDP and TCP port number the KDC server
+ * listens on. If zero, a random port is chosen, which you can use getPort()
+ * later to retrieve the value.
+ * <p>
+ * If <code>isDaemon</code> is true, the KDC worker threads will be daemons.
+ * <p>
+ * The shortcut <code>KDC.create("REALM.NAME")</code> has port=0 and
+ * isDaemon=false, and is commonly used in an embedded KDC.
+ * <p>
+ * 2. Adding users:
+ * <pre>
+ * kdc.addPrincipal(String principal_name, char[] password);
+ * kdc.addPrincipalRandKey(String principal_name);
+ * </pre>
+ * A service principal's name should look like "host/f.q.d.n". The second form
+ * generates a random key. To expose this key, call <code>writeKtab()</code> to
+ * save the keys into a keytab file.
+ * <p>
+ * Note that you need to add the principal name krbtgt/REALM.NAME yourself.
+ * <p>
+ * Note that you can safely add a principal at any time after the KDC is
+ * started and before a user requests info on this principal.
+ * <p>
+ * 3. Other public methods:
+ * <ul>
+ * <li> <code>getPort</code>: Returns the port number the KDC uses
+ * <li> <code>getRealm</code>: Returns the realm name
+ * <li> <code>writeKtab</code>: Writes all principals' keys into a keytab file
+ * <li> <code>saveConfig</code>: Saves a krb5.conf file to access this KDC
+ * <li> <code>setOption</code>: Sets various options
+ * </ul>
+ * Read the javadoc for details. Lazy developer can use <code>OneKDC</code>
+ * directly.
+ */
+public class KDC {
+
+    // Under the hood.
+
+    // The random generator to generate random keys (including session keys)
+    private static SecureRandom secureRandom = new SecureRandom();
+    // Principal db
+    private Map<String,char[]> passwords = new HashMap<String,char[]>();
+    // Realm name
+    private String realm;
+    // The request/response job queue
+    private BlockingQueue<Job> q = new ArrayBlockingQueue<Job>(100);
+    // Service port number
+    private int port;
+    // Options
+    private Map<Option,Object> options = new HashMap<Option,Object>();
+
+    /**
+     * Option names, to be expanded forever.
+     */
+    public static enum Option {
+        /**
+         * Whether pre-authentication is required. Default Boolean.TRUE
+         */
+        PREAUTH_REQUIRED,
+    };
+
+    /**
+     * A standalone KDC server.
+     * @param args
+     * @throws java.lang.Exception
+     */
+    public static void main(String[] args) throws Exception {
+        if (args.length > 0) {
+            if (args[0].equals("-help") || args[0].equals("--help")) {
+                System.out.println("Usage:");
+                System.out.println("   java " + KDC.class + "       " +
+                        "Start KDC on port 8888");
+                return;
+            }
+        }
+        String localhost = "localhost";
+        try {
+            localhost = InetAddress.getByName(localhost)
+                    .getCanonicalHostName();
+        } catch (UnknownHostException uhe) {
+            ;   // Ignore, localhost is still "localhost"
+        }
+        KDC kdc = create("RABBIT.HOLE", 8888, false);
+        kdc.addPrincipal("dummy", "bogus".toCharArray());
+        kdc.addPrincipal("foo", "bar".toCharArray());
+        kdc.addPrincipalRandKey("krbtgt/" + kdc.realm);
+        kdc.addPrincipalRandKey("server/" + localhost);
+        kdc.addPrincipalRandKey("backend/" + localhost);
+    }
+
+    /**
+     * Creates and starts a KDC running as a daemon on a random port.
+     * @param realm the realm name
+     * @return the running KDC instance
+     * @throws java.io.IOException for any socket creation error
+     */
+    public static KDC create(String realm) throws IOException {
+        return create(realm, 0, true);
+    }
+
+    /**
+     * Creates and starts a KDC server.
+     * @param realm the realm name
+     * @param port the TCP and UDP port to listen to. A random port will to
+     *        chosen if zero.
+     * @param asDaemon if true, KDC threads will be daemons. Otherwise, not.
+     * @return the running KDC instance
+     * @throws java.io.IOException for any socket creation error
+     */
+    public static KDC create(String realm, int port, boolean asDaemon) throws IOException {
+        return new KDC(realm, port, asDaemon);
+    }
+
+    /**
+     * Sets an option
+     * @param key the option name
+     * @param obj the value
+     */
+    public void setOption(Option key, Object value) {
+        options.put(key, value);
+    }
+
+    /**
+     * Write all principals' keys into a keytab file. Note that the keys for
+     * the krbtgt principal for this realm will not be written.
+     * <p>
+     * Attention: This method references krb5.conf settings. If you need to
+     * setup krb5.conf later, please call <code>Config.refresh()</code> after
+     * the new setting. For example:
+     * <pre>
+     * kdc.writeKtab("/etc/kdc/ktab");  // Config is initialized,
+     * System.setProperty("java.security.krb5.conf", "/home/mykrb5.conf");
+     * Config.refresh();
+     * </pre>
+     *
+     * Inside this method there are 2 places krb5.conf is used:
+     * <ol>
+     * <li> (Fatal) Generating keys: EncryptionKey.acquireSecretKeys
+     * <li> (Has workaround) Creating PrincipalName
+     * </ol>
+     * @param tab The keytab filename to write to.
+     * @throws java.io.IOException for any file output error
+     * @throws sun.security.krb5.KrbException for any realm and/or principal
+     *         name error.
+     */
+    public void writeKtab(String tab) throws IOException, KrbException {
+        KeyTab ktab = KeyTab.create(tab);
+        for (String name : passwords.keySet()) {
+            if (name.equals("krbtgt/" + realm)) {
+                continue;
+            }
+            ktab.addEntry(new PrincipalName(name + "@" + realm,
+                    name.indexOf('/') < 0 ?
+                        PrincipalName.KRB_NT_UNKNOWN :
+                        PrincipalName.KRB_NT_SRV_HST), passwords.get(name));
+        }
+        ktab.save();
+    }
+
+    /**
+     * Adds a new principal to this realm with a given password.
+     * @param user the principal's name. For a service principal, use the
+     *        form of host/f.q.d.n
+     * @param pass the password for the principal
+     */
+    public void addPrincipal(String user, char[] pass) {
+        passwords.put(user, pass);
+    }
+
+    /**
+     * Adds a new principal to this realm with a random password
+     * @param user the principal's name. For a service principal, use the
+     *        form of host/f.q.d.n
+     */
+    public void addPrincipalRandKey(String user) {
+        passwords.put(user, randomPassword());
+    }
+
+    /**
+     * Returns the name of this realm
+     * @return the name of this realm
+     */
+    public String getRealm() {
+        return realm;
+    }
+
+    /**
+     * Writes a krb5.conf for one or more KDC that includes KDC locations for
+     * each realm and the default realm name. You can also add extra strings
+     * into the file. The method should be called like:
+     * <pre>
+     *   KDC.saveConfig("krb5.conf", kdc1, kdc2, ..., line1, line2, ...);
+     * </pre>
+     * Here you can provide one or more kdc# and zero or more line# arguments.
+     * The line# will be put after [libdefaults] and before [realms]. Therefore
+     * you can append new lines into [libdefaults] and/or create your new
+     * stanzas as well. Note that a newline character will be appended to
+     * each line# argument.
+     * <p>
+     * For example:
+     * <pre>
+     * KDC.saveConfig("krb5.conf", this);
+     * </pre>
+     * generates:
+     * <pre>
+     * [libdefaults]
+     * default_realm = REALM.NAME
+     *
+     * [realms]
+     *   REALM.NAME = {
+     *     kdc = localhost:port_number
+     *   }
+     * </pre>
+     *
+     * Another example:
+     * <pre>
+     * KDC.saveConfig("krb5.conf", kdc1, kdc2, "forwardable = true", "",
+     *         "[domain_realm]",
+     *         ".kdc1.com = KDC1.NAME");
+     * </pre>
+     * generates:
+     * <pre>
+     * [libdefaults]
+     * default_realm = KDC1.NAME
+     * forwardable = true
+     *
+     * [domain_realm]
+     * .kdc1.com = KDC1.NAME
+     *
+     * [realms]
+     *   KDC1.NAME = {
+     *     kdc = localhost:port1
+     *   }
+     *   KDC2.NAME = {
+     *     kdc = localhost:port2
+     *   }
+     * </pre>
+     * @param file the name of the file to write into
+     * @param kdc the first (and default) KDC
+     * @param more more KDCs or extra lines (in their appearing order) to
+     * insert into the krb5.conf file. This method reads each argument's type
+     * to determine what it's for. This argument can be empty.
+     * @throws java.io.IOException for any file output error
+     */
+    public static void saveConfig(String file, KDC kdc, Object... more)
+            throws IOException {
+        File f = new File(file);
+        StringBuffer sb = new StringBuffer();
+        sb.append("[libdefaults]\ndefault_realm = ");
+        sb.append(kdc.realm);
+        sb.append("\n");
+        for (Object o: more) {
+            if (o instanceof String) {
+                sb.append(o);
+                sb.append("\n");
+            }
+        }
+        sb.append("\n[realms]\n");
+        sb.append(realmLineForKDC(kdc));
+        for (Object o: more) {
+            if (o instanceof KDC) {
+                sb.append(realmLineForKDC((KDC)o));
+            }
+        }
+        FileOutputStream fos = new FileOutputStream(f);
+        fos.write(sb.toString().getBytes());
+        fos.close();
+    }
+
+    /**
+     * Returns the service port of the KDC server.
+     * @return the KDC service port
+     */
+    public int getPort() {
+        return port;
+    }
+
+    // Private helper methods
+
+    /**
+     * Private constructor, cannot be called outside.
+     * @param realm
+     */
+    private KDC(String realm) {
+        this.realm = realm;
+    }
+
+    /**
+     * A constructor that starts the KDC service also.
+     */
+    protected KDC(String realm, int port, boolean asDaemon)
+            throws IOException {
+        this(realm);
+        startServer(port, asDaemon);
+    }
+    /**
+     * Generates a 32-char random password
+     * @return the password
+     */
+    private static char[] randomPassword() {
+        char[] pass = new char[32];
+        for (int i=0; i<32; i++)
+            pass[i] = (char)secureRandom.nextInt();
+        return pass;
+    }
+
+    /**
+     * Generates a random key for the given encryption type.
+     * @param eType the encryption type
+     * @return the generated key
+     * @throws sun.security.krb5.KrbException for unknown/unsupported etype
+     */
+    private static EncryptionKey generateRandomKey(int eType)
+            throws KrbException  {
+        // Is 32 enough for AES256? I should have generated the keys directly
+        // but different cryptos have different rules on what keys are valid.
+        char[] pass = randomPassword();
+        String algo;
+        switch (eType) {
+            case EncryptedData.ETYPE_DES_CBC_MD5: algo = "DES"; break;
+            case EncryptedData.ETYPE_DES3_CBC_HMAC_SHA1_KD: algo = "DESede"; break;
+            case EncryptedData.ETYPE_AES128_CTS_HMAC_SHA1_96: algo = "AES128"; break;
+            case EncryptedData.ETYPE_ARCFOUR_HMAC: algo = "ArcFourHMAC"; break;
+            case EncryptedData.ETYPE_AES256_CTS_HMAC_SHA1_96: algo = "AES256"; break;
+            default: algo = "DES"; break;
+        }
+        return new EncryptionKey(pass, "NOTHING", algo);    // Silly
+    }
+
+    /**
+     * Returns the password for a given principal
+     * @param p principal
+     * @return the password
+     * @throws sun.security.krb5.KrbException when the principal is not inside
+     *         the database.
+     */
+    private char[] getPassword(PrincipalName p) throws KrbException {
+        char[] pass = passwords.get(p.getNameString());
+        if (pass == null) {
+            throw new KrbException(Krb5.KDC_ERR_C_PRINCIPAL_UNKNOWN);
+        }
+        return pass;
+    }
+
+    /**
+     * Returns the salt string for the principal. For normal users, the
+     * concatenation for the realm name and the sections of the principal;
+     * for krgtgt/A@B and krbtgt/B@A, always return AB (so that inter-realm
+     * principals have the same key).
+     * @param p principal
+     * @return the salt
+     */
+    private String getSalt(PrincipalName p) {
+        String[] ns = p.getNameStrings();
+        if (ns[0].equals("krbtgt") && ns.length > 1) {
+            // Shared cross-realm keys must be the same
+            if (ns[1].compareTo(realm) < 0) {
+                return ns[1] + realm;
+            } else {
+                return realm + ns[1];
+            }
+        } else {
+            String s = getRealm();
+            for (String n: p.getNameStrings()) {
+                s += n;
+            }
+            return s;
+        }
+    }
+
+    /**
+     * Returns the key for a given principal of the given encryption type
+     * @param p the principal
+     * @param etype the encryption type
+     * @return the key
+     * @throws sun.security.krb5.KrbException for unknown/unsupported etype
+     */
+    private EncryptionKey keyForUser(PrincipalName p, int etype) throws KrbException {
+        try {
+            // Do not call EncryptionKey.acquireSecretKeys(), otherwise
+            // the krb5.conf config file would be loaded.
+            Method stringToKey = EncryptionKey.class.getDeclaredMethod("stringToKey", char[].class, String.class, byte[].class, Integer.TYPE);
+            stringToKey.setAccessible(true);
+            return new EncryptionKey((byte[]) stringToKey.invoke(null, getPassword(p), getSalt(p), null, etype), etype, null);
+        } catch (InvocationTargetException ex) {
+            KrbException ke = (KrbException)ex.getCause();
+            throw ke;
+        } catch (Exception e) {
+            throw new RuntimeException(e);  // should not happen
+        }
+    }
+
+    /**
+     * Processes an incoming request and generates a response.
+     * @param in the request
+     * @return the response
+     * @throws java.lang.Exception for various errors
+     */
+    private byte[] processMessage(byte[] in) throws Exception {
+        if ((in[0] & 0x1f) == Krb5.KRB_AS_REQ)
+            return processAsReq(in);
+        else
+            return processTgsReq(in);
+    }
+
+    /**
+     * Processes a TGS_REQ and generates a TGS_REP (or KRB_ERROR)
+     * @param in the request
+     * @return the response
+     * @throws java.lang.Exception for various errors
+     */
+    private byte[] processTgsReq(byte[] in) throws Exception {
+        TGSReq tgsReq = new TGSReq(in);
+        try {
+            System.out.println(realm + "> " + tgsReq.reqBody.cname +
+                    " sends TGS-REQ for " +
+                    tgsReq.reqBody.sname);
+            KDCReqBody body = tgsReq.reqBody;
+            int etype = 0;
+
+            // Reflection: PAData[] pas = tgsReq.pAData;
+            Field f = KDCReq.class.getDeclaredField("pAData");
+            f.setAccessible(true);
+            PAData[] pas = (PAData[])f.get(tgsReq);
+
+            Ticket tkt = null;
+            EncTicketPart etp = null;
+            if (pas == null || pas.length == 0) {
+                throw new KrbException(Krb5.KDC_ERR_PADATA_TYPE_NOSUPP);
+            } else {
+                for (PAData pa: pas) {
+                    if (pa.getType() == Krb5.PA_TGS_REQ) {
+                        APReq apReq = new APReq(pa.getValue());
+                        EncryptedData ed = apReq.authenticator;
+                        tkt = apReq.ticket;
+                        etype = tkt.encPart.getEType();
+                        EncryptionKey kkey = null;
+                        if (!tkt.realm.toString().equals(realm)) {
+                            if (tkt.sname.getNameString().equals("krbtgt/" + realm)) {
+                                kkey = keyForUser(new PrincipalName("krbtgt/" + tkt.realm.toString(), realm), etype);
+                            }
+                        } else {
+                            kkey = keyForUser(tkt.sname, etype);
+                        }
+                        byte[] bb = tkt.encPart.decrypt(kkey, KeyUsage.KU_TICKET);
+                        DerInputStream derIn = new DerInputStream(bb);
+                        DerValue der = derIn.getDerValue();
+                        etp = new EncTicketPart(der.toByteArray());
+                    }
+                }
+                if (tkt == null) {
+                    throw new KrbException(Krb5.KDC_ERR_PADATA_TYPE_NOSUPP);
+                }
+            }
+            EncryptionKey skey = keyForUser(body.sname, etype);
+            if (skey == null) {
+                throw new KrbException(Krb5.KDC_ERR_SUMTYPE_NOSUPP); // TODO
+            }
+
+            // Session key for original ticket, TGT
+            EncryptionKey ckey = etp.key;
+
+            // Session key for session with the service
+            EncryptionKey key = generateRandomKey(etype);
+
+            // Check time, TODO
+            KerberosTime till = body.till;
+            if (till == null) {
+                throw new KrbException(Krb5.KDC_ERR_NEVER_VALID); // TODO
+            } else if (till.isZero()) {
+                till = new KerberosTime(new Date().getTime() + 1000 * 3600 * 11);
+            }
+
+            boolean[] bFlags = new boolean[Krb5.TKT_OPTS_MAX+1];
+            if (body.kdcOptions.get(KDCOptions.FORWARDABLE)) {
+                bFlags[Krb5.TKT_OPTS_FORWARDABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.FORWARDED) ||
+                    etp.flags.get(Krb5.TKT_OPTS_FORWARDED)) {
+                bFlags[Krb5.TKT_OPTS_FORWARDED] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.RENEWABLE)) {
+                bFlags[Krb5.TKT_OPTS_RENEWABLE] = true;
+                //renew = new KerberosTime(new Date().getTime() + 1000 * 3600 * 24 * 7);
+            }
+            if (body.kdcOptions.get(KDCOptions.PROXIABLE)) {
+                bFlags[Krb5.TKT_OPTS_PROXIABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.POSTDATED)) {
+                bFlags[Krb5.TKT_OPTS_POSTDATED] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.ALLOW_POSTDATE)) {
+                bFlags[Krb5.TKT_OPTS_MAY_POSTDATE] = true;
+            }
+            bFlags[Krb5.TKT_OPTS_INITIAL] = true;
+
+            TicketFlags tFlags = new TicketFlags(bFlags);
+            EncTicketPart enc = new EncTicketPart(
+                    tFlags,
+                    key,
+                    etp.crealm,
+                    etp.cname,
+                    new TransitedEncoding(1, new byte[0]),  // TODO
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.addresses,
+                    null);
+            Ticket t = new Ticket(
+                    body.crealm,
+                    body.sname,
+                    new EncryptedData(skey, enc.asn1Encode(), KeyUsage.KU_TICKET)
+            );
+            EncTGSRepPart enc_part = new EncTGSRepPart(
+                    key,
+                    new LastReq(new LastReqEntry[]{
+                        new LastReqEntry(0, new KerberosTime(new Date().getTime() - 10000))
+                    }),
+                    body.getNonce(),    // TODO: detect replay
+                    new KerberosTime(new Date().getTime() + 1000 * 3600 * 24),
+                    // Next 5 and last MUST be same with ticket
+                    tFlags,
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.crealm,
+                    body.sname,
+                    body.addresses
+                    );
+            EncryptedData edata = new EncryptedData(ckey, enc_part.asn1Encode(), KeyUsage.KU_ENC_TGS_REP_PART_SESSKEY);
+            TGSRep tgsRep = new TGSRep(null,
+                    etp.crealm,
+                    etp.cname,
+                    t,
+                    edata);
+            System.out.println("     Return " + tgsRep.cname
+                    + " ticket for " + tgsRep.ticket.sname);
+
+            DerOutputStream out = new DerOutputStream();
+            out.write(DerValue.createTag(DerValue.TAG_APPLICATION,
+                    true, (byte)Krb5.KRB_TGS_REP), tgsRep.asn1Encode());
+            return out.toByteArray();
+        } catch (KrbException ke) {
+            ke.printStackTrace(System.out);
+            KRBError kerr = ke.getError();
+            KDCReqBody body = tgsReq.reqBody;
+            System.out.println("     Error " + ke.returnCode()
+                    + " " +ke.returnCodeMessage());
+            if (kerr == null) {
+                kerr = new KRBError(null, null, null,
+                        new KerberosTime(new Date()),
+                        0,
+                        ke.returnCode(),
+                        body.crealm, body.cname,
+                        new Realm(getRealm()), body.sname,
+                        KrbException.errorMessage(ke.returnCode()),
+                        null);
+            }
+            return kerr.asn1Encode();
+        }
+    }
+
+    /**
+     * Processes a AS_REQ and generates a AS_REP (or KRB_ERROR)
+     * @param in the request
+     * @return the response
+     * @throws java.lang.Exception for various errors
+     */
+    private byte[] processAsReq(byte[] in) throws Exception {
+        ASReq asReq = new ASReq(in);
+        int[] eTypes = null;
+        try {
+            System.out.println(realm + "> " + asReq.reqBody.cname +
+                    " sends AS-REQ for " +
+                    asReq.reqBody.sname);
+
+            KDCReqBody body = asReq.reqBody;
+
+            // Reflection: int[] eType = body.eType;
+            Field f = KDCReqBody.class.getDeclaredField("eType");
+            f.setAccessible(true);
+            eTypes = (int[])f.get(body);
+            int eType = eTypes[0];
+
+            EncryptionKey ckey = keyForUser(body.cname, eType);
+            EncryptionKey skey = keyForUser(body.sname, eType);
+            if (ckey == null) {
+                throw new KrbException(Krb5.KDC_ERR_ETYPE_NOSUPP);
+            }
+            if (skey == null) {
+                throw new KrbException(Krb5.KDC_ERR_SUMTYPE_NOSUPP); // TODO
+            }
+
+            // Session key
+            EncryptionKey key = generateRandomKey(eType);
+            // Check time, TODO
+            KerberosTime till = body.till;
+            if (till == null) {
+                throw new KrbException(Krb5.KDC_ERR_NEVER_VALID); // TODO
+            } else if (till.isZero()) {
+                till = new KerberosTime(new Date().getTime() + 1000 * 3600 * 11);
+            }
+            //body.from
+            boolean[] bFlags = new boolean[Krb5.TKT_OPTS_MAX+1];
+            if (body.kdcOptions.get(KDCOptions.FORWARDABLE)) {
+                bFlags[Krb5.TKT_OPTS_FORWARDABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.RENEWABLE)) {
+                bFlags[Krb5.TKT_OPTS_RENEWABLE] = true;
+                //renew = new KerberosTime(new Date().getTime() + 1000 * 3600 * 24 * 7);
+            }
+            if (body.kdcOptions.get(KDCOptions.PROXIABLE)) {
+                bFlags[Krb5.TKT_OPTS_PROXIABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.POSTDATED)) {
+                bFlags[Krb5.TKT_OPTS_POSTDATED] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.ALLOW_POSTDATE)) {
+                bFlags[Krb5.TKT_OPTS_MAY_POSTDATE] = true;
+            }
+            bFlags[Krb5.TKT_OPTS_INITIAL] = true;
+
+            f = KDCReq.class.getDeclaredField("pAData");
+            f.setAccessible(true);
+            PAData[] pas = (PAData[])f.get(asReq);
+            if (pas == null || pas.length == 0) {
+                Object preauth = options.get(Option.PREAUTH_REQUIRED);
+                if (preauth == null || preauth.equals(Boolean.TRUE)) {
+                    throw new KrbException(Krb5.KDC_ERR_PREAUTH_REQUIRED);
+                }
+            } else {
+                try {
+                    Constructor<EncryptedData> ctor = EncryptedData.class.getDeclaredConstructor(DerValue.class);
+                    ctor.setAccessible(true);
+                    EncryptedData data = ctor.newInstance(new DerValue(pas[0].getValue()));
+                    data.decrypt(ckey, KeyUsage.KU_PA_ENC_TS);
+                } catch (Exception e) {
+                    throw new KrbException(Krb5.KDC_ERR_PREAUTH_FAILED);
+                }
+                bFlags[Krb5.TKT_OPTS_PRE_AUTHENT] = true;
+            }
+
+            TicketFlags tFlags = new TicketFlags(bFlags);
+            EncTicketPart enc = new EncTicketPart(
+                    tFlags,
+                    key,
+                    body.crealm,
+                    body.cname,
+                    new TransitedEncoding(1, new byte[0]),
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.addresses,
+                    null);
+            Ticket t = new Ticket(
+                    body.crealm,
+                    body.sname,
+                    new EncryptedData(skey, enc.asn1Encode(), KeyUsage.KU_TICKET)
+            );
+            EncASRepPart enc_part = new EncASRepPart(
+                    key,
+                    new LastReq(new LastReqEntry[]{
+                        new LastReqEntry(0, new KerberosTime(new Date().getTime() - 10000))
+                    }),
+                    body.getNonce(),    // TODO: detect replay?
+                    new KerberosTime(new Date().getTime() + 1000 * 3600 * 24),
+                    // Next 5 and last MUST be same with ticket
+                    tFlags,
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.crealm,
+                    body.sname,
+                    body.addresses
+                    );
+            EncryptedData edata = new EncryptedData(ckey, enc_part.asn1Encode(), KeyUsage.KU_ENC_AS_REP_PART);
+            ASRep asRep = new ASRep(null,
+                    body.crealm,
+                    body.cname,
+                    t,
+                    edata);
+
+            System.out.println("     Return " + asRep.cname
+                    + " ticket for " + asRep.ticket.sname);
+
+            DerOutputStream out = new DerOutputStream();
+            out.write(DerValue.createTag(DerValue.TAG_APPLICATION,
+                    true, (byte)Krb5.KRB_AS_REP), asRep.asn1Encode());
+            return out.toByteArray();
+        } catch (KrbException ke) {
+            ke.printStackTrace(System.out);
+            KRBError kerr = ke.getError();
+            KDCReqBody body = asReq.reqBody;
+            System.out.println("     Error " + ke.returnCode()
+                    + " " +ke.returnCodeMessage());
+            byte[] eData = null;
+            if (kerr == null) {
+                if (ke.returnCode() == Krb5.KDC_ERR_PREAUTH_REQUIRED ||
+                        ke.returnCode() == Krb5.KDC_ERR_PREAUTH_FAILED) {
+                    PAData pa;
+
+                    ETypeInfo2 ei2 = new ETypeInfo2(eTypes[0], null, null);
+                    DerOutputStream eid = new DerOutputStream();
+                    eid.write(DerValue.tag_Sequence, ei2.asn1Encode());
+
+                    pa = new PAData(Krb5.PA_ETYPE_INFO2, eid.toByteArray());
+
+                    DerOutputStream bytes = new DerOutputStream();
+                    bytes.write(new PAData(Krb5.PA_ENC_TIMESTAMP, new byte[0]).asn1Encode());
+                    bytes.write(pa.asn1Encode());
+
+                    boolean allOld = true;
+                    for (int i: eTypes) {
+                        if (i == EncryptedData.ETYPE_AES128_CTS_HMAC_SHA1_96 ||
+                                i == EncryptedData.ETYPE_AES256_CTS_HMAC_SHA1_96) {
+                            allOld = false;
+                            break;
+                        }
+                    }
+                    if (allOld) {
+                        ETypeInfo ei = new ETypeInfo(eTypes[0], null);
+                        eid = new DerOutputStream();
+                        eid.write(DerValue.tag_Sequence, ei.asn1Encode());
+                        pa = new PAData(Krb5.PA_ETYPE_INFO, eid.toByteArray());
+                        bytes.write(pa.asn1Encode());
+                    }
+                    DerOutputStream temp = new DerOutputStream();
+                    temp.write(DerValue.tag_Sequence, bytes);
+                    eData = temp.toByteArray();
+                }
+                kerr = new KRBError(null, null, null,
+                        new KerberosTime(new Date()),
+                        0,
+                        ke.returnCode(),
+                        body.crealm, body.cname,
+                        new Realm(getRealm()), body.sname,
+                        KrbException.errorMessage(ke.returnCode()),
+                        eData);
+            }
+            return kerr.asn1Encode();
+        }
+    }
+
+    /**
+     * Generates a line for a KDC to put inside [realms] of krb5.conf
+     * @param kdc the KDC
+     * @return REALM.NAME = { kdc = localhost:port }
+     */
+    private static String realmLineForKDC(KDC kdc) {
+        return String.format("  %s = {\n    kdc = localhost:%d\n  }\n", kdc.realm, kdc.port);
+    }
+
+    /**
+     * Start the KDC service. This server listens on both UDP and TCP using
+     * the same port number. It uses three threads to deal with requests.
+     * They can be set to daemon threads if requested.
+     * @param port the port number to listen to. If zero, a random available
+     *  port no less than 8000 will be chosen and used.
+     * @param asDaemon true if the KDC threads should be daemons
+     * @throws java.io.IOException for any communication error
+     */
+    protected void startServer(int port, boolean asDaemon) throws IOException {
+        DatagramSocket u1 = null;
+        ServerSocket t1 = null;
+        if (port > 0) {
+            u1 = new DatagramSocket(port, InetAddress.getByName("127.0.0.1"));
+            t1 = new ServerSocket(port);
+        } else {
+            while (true) {
+                // Try to find a port number that's both TCP and UDP free
+                try {
+                    port = 8000 + new java.util.Random().nextInt(10000);
+                    u1 = null;
+                    u1 = new DatagramSocket(port, InetAddress.getByName("127.0.0.1"));
+                    t1 = new ServerSocket(port);
+                    break;
+                } catch (Exception e) {
+                    if (u1 != null) u1.close();
+                }
+            }
+        }
+        final DatagramSocket udp = u1;
+        final ServerSocket tcp = t1;
+        System.out.println("Start KDC on " + port);
+
+        this.port = port;
+
+        // The UDP consumer
+        Thread thread = new Thread() {
+            public void run() {
+                while (true) {
+                    try {
+                        byte[] inbuf = new byte[8192];
+                        DatagramPacket p = new DatagramPacket(inbuf, inbuf.length);
+                        udp.receive(p);
+                        System.out.println("-----------------------------------------------");
+                        System.out.println(">>>>> UDP packet received");
+                        q.put(new Job(processMessage(Arrays.copyOf(inbuf, p.getLength())), udp, p));
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+        };
+        thread.setDaemon(asDaemon);
+        thread.start();
+
+        // The TCP consumer
+        thread = new Thread() {
+            public void run() {
+                while (true) {
+                    try {
+                        Socket socket = tcp.accept();
+                        System.out.println("-----------------------------------------------");
+                        System.out.println(">>>>> TCP connection established");
+                        DataInputStream in = new DataInputStream(socket.getInputStream());
+                        DataOutputStream out = new DataOutputStream(socket.getOutputStream());
+                        byte[] token = new byte[in.readInt()];
+                        in.readFully(token);
+                        q.put(new Job(processMessage(token), socket, out));
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+        };
+        thread.setDaemon(asDaemon);
+        thread.start();
+
+        // The dispatcher
+        thread = new Thread() {
+            public void run() {
+                while (true) {
+                    try {
+                        q.take().send();
+                    } catch (Exception e) {
+                    }
+                }
+            }
+        };
+        thread.setDaemon(true);
+        thread.start();
+    }
+
+    /**
+     * Helper class to encapsulate a job in a KDC.
+     */
+    private static class Job {
+        byte[] token;           // The received request at creation time and
+                                // the response at send time
+        Socket s;               // The TCP socket from where the request comes
+        DataOutputStream out;   // The OutputStream of the TCP socket
+        DatagramSocket s2;      // The UDP socket from where the request comes
+        DatagramPacket dp;      // The incoming UDP datagram packet
+        boolean useTCP;         // Whether TCP or UDP is used
+
+        // Creates a job object for TCP
+        Job(byte[] token, Socket s, DataOutputStream out) {
+            useTCP = true;
+            this.token = token;
+            this.s = s;
+            this.out = out;
+        }
+
+        // Creates a job object for UDP
+        Job(byte[] token, DatagramSocket s2, DatagramPacket dp) {
+            useTCP = false;
+            this.token = token;
+            this.s2 = s2;
+            this.dp = dp;
+        }
+
+        // Sends the output back to the client
+        void send() {
+            try {
+                if (useTCP) {
+                    System.out.println(">>>>> TCP request honored");
+                    out.writeInt(token.length);
+                    out.write(token);
+                    s.close();
+                } else {
+                    System.out.println(">>>>> UDP request honored");
+                    s2.send(new DatagramPacket(token, token.length, dp.getAddress(), dp.getPort()));
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/KerberosHashEqualsTest.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 4641821
+ * @summary hashCode() and equals() for KerberosKey and KerberosTicket
+ */
+
+import java.net.InetAddress;
+import java.util.Date;
+import javax.security.auth.kerberos.KerberosKey;
+import javax.security.auth.kerberos.KerberosPrincipal;
+import javax.security.auth.kerberos.KerberosTicket;
+
+public class KerberosHashEqualsTest {
+    public static void main(String[] args) throws Exception {
+        new OneKDC(null);
+        new KerberosHashEqualsTest().check();
+    }
+
+    void checkSame(Object o1, Object o2) {
+        if(!o1.equals(o2)) {
+            throw new RuntimeException("equals() fails");
+        }
+        if(o1.hashCode() != o2.hashCode()) {
+            throw new RuntimeException("hashCode() not same");
+        }
+    }
+
+    void checkNotSame(Object o1, Object o2) {
+        if(o1.equals(o2)) {
+            throw new RuntimeException("equals() succeeds");
+        }
+    }
+
+    void check() throws Exception {
+
+        // The key part:
+        // new KerberosKey(principal, bytes, keyType, version)
+
+        KerberosKey k1, k2;
+        KerberosPrincipal CLIENT = new KerberosPrincipal("client");
+        KerberosPrincipal SERVER = new KerberosPrincipal("server");
+        byte[] PASS = "pass".getBytes();
+
+        k1 = new KerberosKey(CLIENT, PASS, 1, 1);
+        k2 = new KerberosKey(CLIENT, PASS, 1, 1);
+        checkSame(k1, k1);  // me is me
+        checkSame(k1, k2);  // same
+
+        // A destroyed key doesn't equal to any key
+        k2.destroy();
+        checkNotSame(k1, k2);
+        checkNotSame(k2, k1);
+        k1.destroy();
+        checkNotSame(k1, k2);   // even if they are both destroyed
+        checkNotSame(k2, k1);
+        checkSame(k2, k2);
+
+        // a little difference means not equal
+        k1 = new KerberosKey(CLIENT, PASS, 1, 1);
+        k2 = new KerberosKey(SERVER, PASS, 1, 1);
+        checkNotSame(k1, k2);   // Different principal name
+
+        k2 = new KerberosKey(CLIENT, "ssap".getBytes(), 1, 1);
+        checkNotSame(k1, k2);   // Different password
+
+        k2 = new KerberosKey(CLIENT, PASS, 2, 1);
+        checkNotSame(k1, k2);   // Different keytype
+
+        k2 = new KerberosKey(CLIENT, PASS, 1, 2);
+        checkNotSame(k1, k2);   // Different version
+
+        k2 = new KerberosKey(null, PASS, 1, 2);
+        checkNotSame(k1, k2);   // null is not non-null
+
+        k1 = new KerberosKey(null, PASS, 1, 2);
+        checkSame(k1, k2);      // null is null
+
+        checkNotSame(k1, "Another Object");
+
+        // The ticket part:
+        // new KerberosTicket(asn1 bytes, client, server, session key, type, flags,
+        //      auth, start, end, renewUntil times, address)
+
+        KerberosTicket t1, t2;
+
+        byte[] ASN1 = "asn1".getBytes();
+        boolean[] FORWARDABLE = new boolean[] {true, true};
+        boolean[] ALLTRUE = new boolean[] {true, true, true, true, true, true, true, true, true, true};
+        Date D0 = new Date(0);
+
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkSame(t1, t1);
+        checkSame(t1, t2);
+
+        // destroyed tickets doesn't equal to each other
+        t1.destroy();
+        checkNotSame(t1, t2);
+        checkNotSame(t2, t1);
+
+        t2.destroy();
+        checkNotSame(t1, t2);   // even if they are both destroyed
+        checkNotSame(t2, t1);
+
+        checkSame(t2, t2);  // unless they are the same object
+
+        // a little difference means not equal
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        t2 = new KerberosTicket("asn11".getBytes(), CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different ASN1 encoding
+
+        t2 = new KerberosTicket(ASN1, new KerberosPrincipal("client1"), SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different client
+
+        t2 = new KerberosTicket(ASN1, CLIENT, new KerberosPrincipal("server1"), PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different server
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, "pass1".getBytes(), 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different session key
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 2, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different key type
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, new boolean[] {true, false}, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different flags, not FORWARDABLE
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, new Date(1), D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different authtime
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, new Date(1), D0, D0, null);
+        checkNotSame(t1, t2);   // Different starttime
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, new Date(1), D0, null);
+        checkNotSame(t1, t2);   // Different endtime
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, new InetAddress[2]);
+        checkNotSame(t1, t2);   // Different client addresses
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, new Date(1), null);
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, new Date(2), null);
+        checkSame(t1, t2);      // renewtill is ignored when RENEWABLE ticket flag is not set.
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, ALLTRUE, D0, D0, D0, new Date(1), null);
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, ALLTRUE, D0, D0, D0, new Date(2), null);
+        checkNotSame(t1, t2);   // renewtill is used when RENEWABLE is set.
+
+        checkNotSame(t1, "Another Object");
+        System.out.println("Good!");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/OneKDC.java	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.security.Security;
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import sun.security.krb5.Config;
+
+/**
+ * This class starts a simple KDC with one realm, several typical principal
+ * names, generates delete-on-exit krb5.conf and keytab files, and setup
+ * system properties for them. There's also a helper method to generate a
+ * JAAS login config file that can be used for JAAS or JGSS apps.
+ * <p>
+ * Just call this line to start everything:
+ * <pre>
+ * new OneKDC(null).writeJaasConf();
+ * </pre>
+ */
+public class OneKDC extends KDC {
+
+    // The krb5 codes would try to canonicalize hostnames before creating
+    // a service principal name, so let's find out the canonicalized form
+    // of localhost first. The following codes mimic the process inside
+    // PrincipalName.java.
+    static String localhost = "localhost";
+    static {
+        try {
+            localhost = InetAddress.getByName(localhost)
+                    .getCanonicalHostName();
+        } catch (UnknownHostException uhe) {
+            ;   // Ignore, localhost is still "localhost"
+        }
+    }
+    public static final String USER = "dummy";
+    public static final char[] PASS = "bogus".toCharArray();
+    public static String SERVER = "server/" + localhost;
+    public static String BACKEND = "backend/" + localhost;
+    public static final String KRB5_CONF = "localkdc-krb5.conf";
+    public static final String KTAB = "localkdc.ktab";
+    public static final String JAAS_CONF = "localkdc-jaas.conf";
+    public static final String REALM = "RABBIT.HOLE";
+
+    /**
+     * Creates the KDC and starts it.
+     * @param etype Encryption type, null if not specified
+     * @throws java.lang.Exception if there's anything wrong
+     */
+    public OneKDC(String etype) throws Exception {
+        super(REALM, 0, true);
+        addPrincipal(USER, PASS);
+        addPrincipalRandKey("krbtgt/" + REALM);
+        addPrincipalRandKey(SERVER);
+        addPrincipalRandKey(BACKEND);
+        KDC.saveConfig(KRB5_CONF, this,
+                "forwardable = true",
+                "default_keytab_name = " + KTAB,
+                etype == null ? "" : "default_tkt_enctypes=" + etype + "\ndefault_tgs_enctypes=" + etype);
+        System.setProperty("java.security.krb5.conf", KRB5_CONF);
+        // Whatever krb5.conf had been loaded before, we reload ours now.
+        Config.refresh();
+
+        writeKtab(KTAB);
+        new File(KRB5_CONF).deleteOnExit();
+        new File(KTAB).deleteOnExit();
+    }
+
+    /**
+     * Writes a JAAS login config file, which contains as many as useful
+     * entries, including JGSS style initiator/acceptor and normal JAAS
+     * entries with names using existing OneKDC principals.
+     * @throws java.lang.Exception if anything goes wrong
+     */
+    public void writeJAASConf() throws IOException {
+        System.setProperty("java.security.auth.login.config", JAAS_CONF);
+        File f = new File(JAAS_CONF);
+        FileOutputStream fos = new FileOutputStream(f);
+        fos.write((
+                "com.sun.security.jgss.krb5.initiate {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required;\n};\n" +
+                "com.sun.security.jgss.krb5.accept {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required\n" +
+                "    principal=\"" + SERVER + "\"\n" +
+                "    useKeyTab=true\n" +
+                "    isInitiator=false\n" +
+                "    storeKey=true;\n};\n" +
+                "client {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required;\n};\n" +
+                "server {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required\n" +
+                "    principal=\"" + SERVER + "\"\n" +
+                "    useKeyTab=true\n" +
+                "    storeKey=true;\n};\n" +
+                "backend {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required\n" +
+                "    principal=\"" + BACKEND + "\"\n" +
+                "    useKeyTab=true\n" +
+                "    storeKey=true\n" +
+                "    isInitiator=false;\n};\n"
+                ).getBytes());
+        fos.close();
+        f.deleteOnExit();
+        Security.setProperty("auth.login.defaultCallbackHandler", "OneKDC$CallbackForClient");
+    }
+
+    /**
+     * The default callback handler for JAAS login. Note that this handler is
+     * hard coded to provide only info for USER1. If you need to provide info
+     * for another principal, please use Context.fromUserPass() instead.
+     */
+    public static class CallbackForClient implements CallbackHandler {
+        public void handle(Callback[] callbacks) {
+            String user = OneKDC.USER;
+            char[] pass = OneKDC.PASS;
+            for (Callback callback : callbacks) {
+                if (callback instanceof NameCallback) {
+                    System.out.println("Callback for name: " + user);
+                    ((NameCallback) callback).setName(user);
+                }
+                if (callback instanceof PasswordCallback) {
+                    System.out.println("Callback for pass: "
+                            + new String(pass));
+                    ((PasswordCallback) callback).setPassword(pass);
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/basic.sh	Wed Jul 05 16:43:15 2017 +0200
@@ -0,0 +1,65 @@
+#
+# Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+# CA 95054 USA or visit www.sun.com if you need additional information or
+# have any questions.
+#
+
+# @test
+# @bug 6706974
+# @summary Add krb5 test infrastructure
+# @run shell/timeout=300 basic.sh
+#
+
+if [ "${TESTSRC}" = "" ] ; then
+  TESTSRC="."
+fi
+if [ "${TESTJAVA}" = "" ] ; then
+  echo "TESTJAVA not set.  Test cannot execute."
+  echo "FAILED!!!"
+  exit 1
+fi
+
+# set platform-dependent variables
+OS=`uname -s`
+case "$OS" in
+  Windows_* )
+    FS="\\"
+    ;;
+  * )
+    FS="/"
+    ;;
+esac
+
+${TESTJAVA}${FS}bin${FS}javac -d . \
+    ${TESTSRC}${FS}BasicKrb5Test.java \
+    ${TESTSRC}${FS}KDC.java \
+    ${TESTSRC}${FS}OneKDC.java \
+    ${TESTSRC}${FS}Action.java \
+    ${TESTSRC}${FS}Context.java \
+    || exit 10
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test || exit 100
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des-cbc-crc || exit 1
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des-cbc-md5 || exit 3
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des3-cbc-sha1 || exit 16
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test aes128-cts || exit 17
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test aes256-cts || exit 18
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test rc4-hmac || exit 23
+
+exit 0