Merge jdk7-b39
authorduke
Wed, 05 Jul 2017 16:43:17 +0200
changeset 1489 126f365cec6c
parent 1488 ec72429f79c4 (diff)
parent 1369 fc432de066d9 (current diff)
child 1490 68aa3af7feda
child 1491 4e9ce4796705
child 1492 69062295a58c
child 1494 ef0a3bbff58c
child 1507 adb3d133aa0a
child 1509 89e3d8869c94
child 1520 14c451a86ec1
child 1522 116996236e31
child 1524 a0bb1aca37c4
child 1526 1b144d394c43
child 1536 8aba8469c7ca
Merge
--- a/.hgtags-top-repo	Wed Jul 05 16:42:40 2017 +0200
+++ b/.hgtags-top-repo	Wed Jul 05 16:43:17 2017 +0200
@@ -12,3 +12,4 @@
 143c1abedb7d3095eff0f9ee5fec9bf48e3490fc jdk7-b35
 4b4f5fea8d7d0743f0c30d91fcd9bf9d96e5d2ad jdk7-b36
 744554f5a3290e11c71cd2ddb1aff49e431f9ed0 jdk7-b37
+cc47a76899ed33a2c513cb688348244c9b5a1288 jdk7-b38
--- a/corba/.hgtags	Wed Jul 05 16:42:40 2017 +0200
+++ b/corba/.hgtags	Wed Jul 05 16:43:17 2017 +0200
@@ -12,3 +12,4 @@
 3867c4d14a5bfdbb37c97b4874ccb0ee5343111c jdk7-b35
 0723891eb8d1c27e67c54163af0b4cea05a4e036 jdk7-b36
 59d5848bdedebe91cc2753acce78911bcb4a66db jdk7-b37
+08be802754b0296c91a7713b6d85a015dbcd5349 jdk7-b38
--- a/hotspot/.hgtags	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/.hgtags	Wed Jul 05 16:43:17 2017 +0200
@@ -12,3 +12,4 @@
 5fa96a5a7e76da7c8dad12486293a0456c2c116c jdk7-b35
 e91159f921a58af3698e6479ea1fc5818da66d09 jdk7-b36
 9ee9cf798b59e7d51f8c0a686959f313867a55d6 jdk7-b37
+d9bc824aa078573829bb66572af847e26e1bd12e jdk7-b38
--- a/hotspot/make/hotspot_distro	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/make/hotspot_distro	Wed Jul 05 16:43:17 2017 +0200
@@ -1,4 +1,4 @@
-#
+# 
 # Copyright 2006-2008 Sun Microsystems, Inc.  All Rights Reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
@@ -19,7 +19,7 @@
 # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 # CA 95054 USA or visit www.sun.com if you need additional information or
 # have any questions.
-#
+# 
 
 #
 # This file format must remain compatible with both
--- a/hotspot/make/hotspot_version	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/make/hotspot_version	Wed Jul 05 16:43:17 2017 +0200
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=14
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=05
+HS_BUILD_NUMBER=06
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/hotspot/make/linux/makefiles/top.make	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/make/linux/makefiles/top.make	Wed Jul 05 16:43:17 2017 +0200
@@ -64,6 +64,7 @@
                           $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                           $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                           $(VM)/gc_implementation/includeDB_gc_parNew \
+                          $(VM)/gc_implementation/includeDB_gc_g1     \
                           $(VM)/gc_implementation/includeDB_gc_serial \
                           $(VM)/gc_implementation/includeDB_gc_shared
 
--- a/hotspot/make/solaris/makefiles/top.make	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/make/solaris/makefiles/top.make	Wed Jul 05 16:43:17 2017 +0200
@@ -54,6 +54,7 @@
                      $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                      $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                      $(VM)/gc_implementation/includeDB_gc_parNew \
+                     $(VM)/gc_implementation/includeDB_gc_g1 \
                      $(VM)/gc_implementation/includeDB_gc_serial \
                      $(VM)/gc_implementation/includeDB_gc_shared
 
--- a/hotspot/make/windows/makefiles/generated.make	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/make/windows/makefiles/generated.make	Wed Jul 05 16:43:17 2017 +0200
@@ -50,7 +50,8 @@
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \
-           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1
 
 IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \
                 $(WorkSpace)/src/share/vm/includeDB_features
--- a/hotspot/make/windows/makefiles/makedeps.make	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/make/windows/makefiles/makedeps.make	Wed Jul 05 16:43:17 2017 +0200
@@ -64,6 +64,7 @@
         -relativeInclude src\share\vm\gc_implementation\shared \
         -relativeInclude src\share\vm\gc_implementation\parNew \
         -relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \
+        -relativeInclude src\share\vm\gc_implementation\g1 \
         -relativeInclude src\share\vm\gc_interface \
         -relativeInclude src\share\vm\asm \
         -relativeInclude src\share\vm\memory \
@@ -115,6 +116,7 @@
         -additionalFile includeDB_gc_parallel \
         -additionalFile includeDB_gc_parallelScavenge \
         -additionalFile includeDB_gc_concurrentMarkSweep \
+        -additionalFile includeDB_gc_g1 \
         -additionalFile includeDB_gc_parNew \
         -additionalFile includeDB_gc_shared \
         -additionalFile includeDB_gc_serial \
--- a/hotspot/make/windows/makefiles/vm.make	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/make/windows/makefiles/vm.make	Wed Jul 05 16:43:17 2017 +0200
@@ -117,6 +117,7 @@
   /I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\
   /I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\
   /I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\
+  /I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\
   /I "$(WorkSpace)\src\share\vm\gc_interface"\
   /I "$(WorkSpace)\src\share\vm\asm"         \
   /I "$(WorkSpace)\src\share\vm\memory"      \
@@ -146,6 +147,7 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
@@ -222,6 +224,9 @@
 {$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
         $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
 
+{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj::
+        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+
 {$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj::
         $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
 
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -130,6 +130,20 @@
   return 0x00;                  // illegal instruction 0x00000000
 }
 
+Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
+  switch (in) {
+  case rc_z:   return equal;
+  case rc_lez: return lessEqual;
+  case rc_lz:  return less;
+  case rc_nz:  return notEqual;
+  case rc_gz:  return greater;
+  case rc_gez: return greaterEqual;
+  default:
+    ShouldNotReachHere();
+  }
+  return equal;
+}
+
 // Generate a bunch 'o stuff (including v9's
 #ifndef PRODUCT
 void Assembler::test_v9() {
@@ -1213,31 +1227,19 @@
 }
 
 
-void MacroAssembler::store_check(Register tmp, Register obj) {
-  // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
-
-  /* $$$ This stuff needs to go into one of the BarrierSet generator
-     functions.  (The particular barrier sets will have to be friends of
-     MacroAssembler, I guess.) */
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+void MacroAssembler::card_table_write(jbyte* byte_map_base,
+                                      Register tmp, Register obj) {
 #ifdef _LP64
   srlx(obj, CardTableModRefBS::card_shift, obj);
 #else
   srl(obj, CardTableModRefBS::card_shift, obj);
 #endif
   assert( tmp != obj, "need separate temp reg");
-  Address rs(tmp, (address)ct->byte_map_base);
+  Address rs(tmp, (address)byte_map_base);
   load_address(rs);
   stb(G0, rs.base(), obj);
 }
 
-void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
-  store_check(tmp, obj);
-}
-
 // %%% Note:  The following six instructions have been moved,
 //            unchanged, from assembler_sparc.inline.hpp.
 //            They will be refactored at a later date.
@@ -1663,11 +1665,21 @@
 
   if (reg == G0)  return;       // always NULL, which is always an oop
 
-  char buffer[16];
+  char buffer[64];
+#ifdef COMPILER1
+  if (CommentedAssembly) {
+    snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+    block_comment(buffer);
+  }
+#endif
+
+  int len = strlen(file) + strlen(msg) + 1 + 4;
   sprintf(buffer, "%d", line);
-  int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
+  len += strlen(buffer);
+  sprintf(buffer, " at offset %d ", offset());
+  len += strlen(buffer);
   char * real_msg = new char[len];
-  sprintf(real_msg, "%s (%s:%d)", msg, file, line);
+  sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
 
   // Call indirectly to solve generation ordering problem
   Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
@@ -2059,6 +2071,27 @@
 #endif
 }
 
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, address d,
+                                     relocInfo::relocType rt ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, d, rt);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, d, rt);
+  }
+}
+
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, Label& L ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, L);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, L);
+  }
+}
+
 
 // instruction sequences factored across compiler & interpreter
 
@@ -3241,68 +3274,74 @@
   assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
 
-  // get eden boundaries
-  // note: we need both top & top_addr!
-  const Register top_addr = t1;
-  const Register end      = t2;
-
-  CollectedHeap* ch = Universe::heap();
-  set((intx)ch->top_addr(), top_addr);
-  intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
-  ld_ptr(top_addr, delta, end);
-  ld_ptr(top_addr, 0, obj);
-
-  // try to allocate
-  Label retry;
-  bind(retry);
-#ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    btst(MinObjAlignmentInBytesMask, obj);
-    br(Assembler::zero, false, Assembler::pt, L);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    br(Assembler::always, false, Assembler::pt, slow_case);
     delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
-  }
+  } else {
+    // get eden boundaries
+    // note: we need both top & top_addr!
+    const Register top_addr = t1;
+    const Register end      = t2;
+
+    CollectedHeap* ch = Universe::heap();
+    set((intx)ch->top_addr(), top_addr);
+    intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
+    ld_ptr(top_addr, delta, end);
+    ld_ptr(top_addr, 0, obj);
+
+    // try to allocate
+    Label retry;
+    bind(retry);
+#ifdef ASSERT
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      btst(MinObjAlignmentInBytesMask, obj);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
 #endif // ASSERT
-  const Register free = end;
-  sub(end, obj, free);                                   // compute amount of free space
-  if (var_size_in_bytes->is_valid()) {
-    // size is unknown at compile time
-    cmp(free, var_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, var_size_in_bytes, end);
-  } else {
-    // size is known at compile time
-    cmp(free, con_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, con_size_in_bytes, end);
-  }
-  // Compare obj with the value at top_addr; if still equal, swap the value of
-  // end with the value at top_addr. If not equal, read the value at top_addr
-  // into end.
-  casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-  // if someone beat us on the allocation, try again, otherwise continue
-  cmp(obj, end);
-  brx(Assembler::notEqual, false, Assembler::pn, retry);
-  delayed()->mov(end, obj);                              // nop if successfull since obj == end
+    const Register free = end;
+    sub(end, obj, free);                                   // compute amount of free space
+    if (var_size_in_bytes->is_valid()) {
+      // size is unknown at compile time
+      cmp(free, var_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, var_size_in_bytes, end);
+    } else {
+      // size is known at compile time
+      cmp(free, con_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, con_size_in_bytes, end);
+    }
+    // Compare obj with the value at top_addr; if still equal, swap the value of
+    // end with the value at top_addr. If not equal, read the value at top_addr
+    // into end.
+    casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+    // if someone beat us on the allocation, try again, otherwise continue
+    cmp(obj, end);
+    brx(Assembler::notEqual, false, Assembler::pn, retry);
+    delayed()->mov(end, obj);                              // nop if successfull since obj == end
 
 #ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    const Register top_addr = t1;
-
-    set((intx)ch->top_addr(), top_addr);
-    ld_ptr(top_addr, 0, top_addr);
-    btst(MinObjAlignmentInBytesMask, top_addr);
-    br(Assembler::zero, false, Assembler::pt, L);
-    delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      const Register top_addr = t1;
+
+      set((intx)ch->top_addr(), top_addr);
+      ld_ptr(top_addr, 0, top_addr);
+      btst(MinObjAlignmentInBytesMask, top_addr);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
+#endif // ASSERT
   }
-#endif // ASSERT
 }
 
 
@@ -3554,6 +3593,468 @@
   }
 }
 
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+static uint num_stores = 0;
+static uint num_null_pre_stores = 0;
+
+static void count_null_pre_vals(void* pre_val) {
+  num_stores++;
+  if (pre_val == NULL) num_null_pre_stores++;
+  if ((num_stores % 1000000) == 0) {
+    tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
+                  num_stores, num_null_pre_stores,
+                  100.0*(float)num_null_pre_stores/(float)num_stores);
+  }
+}
+
+static address satb_log_enqueue_with_frame = 0;
+static u_char* satb_log_enqueue_with_frame_end = 0;
+
+static address satb_log_enqueue_frameless = 0;
+static u_char* satb_log_enqueue_frameless_end = 0;
+
+static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+
+// The calls to this don't work.  We'd need to do a fair amount of work to
+// make it work.
+static void check_index(int ind) {
+  assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
+         "Invariants.")
+}
+
+static void generate_satb_log_enqueue(bool with_frame) {
+  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+  Register pre_val;
+
+  Label refill, restart;
+  if (with_frame) {
+    masm.save_frame(0);
+    pre_val = I0;  // Was O0 before the save.
+  } else {
+    pre_val = O0;
+  }
+  int satb_q_index_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int satb_q_buf_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+         in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
+         "check sizes in assembly below");
+
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+  if (!with_frame) {
+    // Use return-from-leaf
+    masm.retl();
+    masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  } else {
+    // Not delayed.
+    masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  }
+  if (with_frame) {
+    masm.ret();
+    masm.delayed()->restore();
+  }
+  masm.bind(refill);
+
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &SATBMarkQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L0);
+  masm.mov(G3_scratch, L1);
+  masm.mov(G4, L2);
+  // We need the value of O0 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O0, L3);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+  masm.call_VM_leaf(L5, handle_zero, G2_thread);
+  masm.mov(L0, G1_scratch);
+  masm.mov(L1, G3_scratch);
+  masm.mov(L2, G4);
+  masm.mov(L3, O0);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  if (with_frame) {
+    satb_log_enqueue_with_frame = start;
+    satb_log_enqueue_with_frame_end = masm.pc();
+  } else {
+    satb_log_enqueue_frameless = start;
+    satb_log_enqueue_frameless_end = masm.pc();
+  }
+}
+
+static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
+  if (with_frame) {
+    if (satb_log_enqueue_with_frame == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated with-frame satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
+                             satb_log_enqueue_with_frame_end,
+                             tty);
+      }
+    }
+  } else {
+    if (satb_log_enqueue_frameless == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_frameless != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated frameless satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_frameless,
+                             satb_log_enqueue_frameless_end,
+                             tty);
+      }
+    }
+  }
+}
+
+void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
+  assert(offset == 0 || index == noreg, "choose one");
+
+  if (G1DisablePreBarrier) return;
+  // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+  Label filtered;
+  // satb_log_barrier_work0(tmp, filtered);
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ld(G2,
+       in_bytes(JavaThread::satb_mark_queue_offset() +
+                PtrQueue::byte_offset_of_active()),
+       tmp);
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    ldsb(G2,
+         in_bytes(JavaThread::satb_mark_queue_offset() +
+                  PtrQueue::byte_offset_of_active()),
+         tmp);
+  }
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+  delayed() -> nop();
+
+  // satb_log_barrier_work1(tmp, offset);
+  if (index == noreg) {
+    if (Assembler::is_simm13(offset)) {
+      ld_ptr(obj, offset, tmp);
+    } else {
+      set(offset, tmp);
+      ld_ptr(obj, tmp, tmp);
+    }
+  } else {
+    ld_ptr(obj, index, tmp);
+  }
+
+  // satb_log_barrier_work2(obj, tmp, offset);
+
+  // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
+
+  const Register pre_val = tmp;
+
+  if (G1SATBBarrierPrintNullPreVals) {
+    save_frame(0);
+    mov(pre_val, O0);
+    // Save G-regs that target may use.
+    mov(G1, L1);
+    mov(G2, L2);
+    mov(G3, L3);
+    mov(G4, L4);
+    mov(G5, L5);
+    call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
+    delayed()->nop();
+    // Restore G-regs that target may have used.
+    mov(L1, G1);
+    mov(L2, G2);
+    mov(L3, G3);
+    mov(L4, G4);
+    mov(L5, G5);
+    restore(G0, G0, G0);
+  }
+
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
+  delayed() -> nop();
+
+  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
+  // case, pre_val will be a scratch G-reg, but there's some cases in which
+  // it's an O-reg.  In the first case, do a normal call.  In the latter,
+  // do a save here and call the frameless version.
+
+  guarantee(pre_val->is_global() || pre_val->is_out(),
+            "Or we need to think harder.");
+  if (pre_val->is_global() && !preserve_o_regs) {
+    generate_satb_log_enqueue_if_necessary(true); // with frame.
+    call(satb_log_enqueue_with_frame);
+    delayed()->mov(pre_val, O0);
+  } else {
+    generate_satb_log_enqueue_if_necessary(false); // with frameless.
+    save_frame(0);
+    call(satb_log_enqueue_frameless);
+    delayed()->mov(pre_val->after_save(), O0);
+    restore();
+  }
+
+  bind(filtered);
+}
+
+static jint num_ct_writes = 0;
+static jint num_ct_writes_filtered_in_hr = 0;
+static jint num_ct_writes_filtered_null = 0;
+static jint num_ct_writes_filtered_pop = 0;
+static G1CollectedHeap* g1 = NULL;
+
+static Thread* count_ct_writes(void* filter_val, void* new_val) {
+  Atomic::inc(&num_ct_writes);
+  if (filter_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_in_hr);
+  } else if (new_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_null);
+  } else {
+    if (g1 == NULL) {
+      g1 = G1CollectedHeap::heap();
+    }
+    if ((HeapWord*)new_val < g1->popular_object_boundary()) {
+      Atomic::inc(&num_ct_writes_filtered_pop);
+    }
+  }
+  if ((num_ct_writes % 1000000) == 0) {
+    jint num_ct_writes_filtered =
+      num_ct_writes_filtered_in_hr +
+      num_ct_writes_filtered_null +
+      num_ct_writes_filtered_pop;
+
+    tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
+                  "   (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
+                  num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_in_hr/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_null/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_pop/
+                  (float)num_ct_writes);
+  }
+  return Thread::current();
+}
+
+static address dirty_card_log_enqueue = 0;
+static u_char* dirty_card_log_enqueue_end = 0;
+
+// This gets to assume that o0 contains the object address.
+static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+
+  Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+  masm.srlx(O0, CardTableModRefBS::card_shift, O0);
+#else
+  masm.srl(O0, CardTableModRefBS::card_shift, O0);
+#endif
+  Address rs(O1, (address)byte_map_base);
+  masm.load_address(rs); // O1 := <card table base>
+  masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                      O2, not_already_dirty);
+  // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
+  // case, harmless if not.
+  masm.delayed()->add(O0, O1, O3);
+
+  // We didn't take the branch, so we're already dirty: return.
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->nop();
+
+  // Not dirty.
+  masm.bind(not_already_dirty);
+  // First, dirty it.
+  masm.stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+  int dirty_card_q_index_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int dirty_card_q_buf_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                      L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(O3, L1, L0);  // [_buf + index] := I0
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+  masm.bind(refill);
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &DirtyCardQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L3);
+  masm.mov(G3_scratch, L5);
+  // We need the value of O3 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O3, L6);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+
+  masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+  masm.mov(L3, G1_scratch);
+  masm.mov(L5, G3_scratch);
+  masm.mov(L6, O3);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  dirty_card_log_enqueue = start;
+  dirty_card_log_enqueue_end = masm.pc();
+  // XXX Should have a guarantee here about not going off the end!
+  // Does it already do so?  Do an experiment...
+}
+
+static inline void
+generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
+  if (dirty_card_log_enqueue == 0) {
+    generate_dirty_card_log_enqueue(byte_map_base);
+    assert(dirty_card_log_enqueue != 0, "postcondition.");
+    if (G1SATBPrintStubs) {
+      tty->print_cr("Generated dirty_card enqueue:");
+      Disassembler::decode((u_char*)dirty_card_log_enqueue,
+                           dirty_card_log_enqueue_end,
+                           tty);
+    }
+  }
+}
+
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+
+  Label filtered;
+  MacroAssembler* post_filter_masm = this;
+
+  if (new_val == G0) return;
+  if (G1DisablePostBarrier) return;
+
+  G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::G1SATBCT ||
+         bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+  if (G1RSBarrierRegionFilter) {
+    xor3(store_addr, new_val, tmp);
+#ifdef _LP64
+    srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#else
+    srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#endif
+    if (G1PrintCTFilterStats) {
+      guarantee(tmp->is_global(), "Or stats won't work...");
+      // This is a sleazy hack: I'm temporarily hijacking G2, which I
+      // promise to restore.
+      mov(new_val, G2);
+      save_frame(0);
+      mov(tmp, O0);
+      mov(G2, O1);
+      // Save G-regs that target may use.
+      mov(G1, L1);
+      mov(G2, L2);
+      mov(G3, L3);
+      mov(G4, L4);
+      mov(G5, L5);
+      call(CAST_FROM_FN_PTR(address, &count_ct_writes));
+      delayed()->nop();
+      mov(O0, G2);
+      // Restore G-regs that target may have used.
+      mov(L1, G1);
+      mov(L3, G3);
+      mov(L4, G4);
+      mov(L5, G5);
+      restore(G0, G0, G0);
+    }
+    // XXX Should I predict this taken or not?  Does it mattern?
+    br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+    delayed()->nop();
+  }
+
+  // Now we decide how to generate the card table write.  If we're
+  // enqueueing, we call out to a generated function.  Otherwise, we do it
+  // inline here.
+
+  if (G1RSBarrierUseQueue) {
+    // If the "store_addr" register is an "in" or "local" register, move it to
+    // a scratch reg so we can pass it as an argument.
+    bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+    // Pick a scratch register different from "tmp".
+    Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+    // Make sure we use up the delay slot!
+    if (use_scr) {
+      post_filter_masm->mov(store_addr, scr);
+    } else {
+      post_filter_masm->nop();
+    }
+    generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+    save_frame(0);
+    call(dirty_card_log_enqueue);
+    if (use_scr) {
+      delayed()->mov(scr, O0);
+    } else {
+      delayed()->mov(store_addr->after_save(), O0);
+    }
+    restore();
+
+  } else {
+
+#ifdef _LP64
+    post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
+#else
+    post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
+#endif
+    assert( tmp != store_addr, "need separate temp reg");
+    Address rs(tmp, (address)bs->byte_map_base);
+    load_address(rs);
+    stb(G0, rs.base(), store_addr);
+  }
+
+  bind(filtered);
+
+}
+
+#endif  // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+  // If we're writing constant NULL, we can skip the write barrier.
+  if (new_val == G0) return;
+  CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef ||
+         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+  card_table_write(bs->byte_map_base, tmp, store_addr);
+}
+
 void MacroAssembler::load_klass(Register src_oop, Register klass) {
   // The number of bytes in this code is used by
   // MachCallDynamicJavaNode::ret_addr_offset()
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1439,7 +1439,11 @@
   // pp 214
 
   void save(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
-  void save(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void save(    Register s1, int simm13a, Register d ) {
+    // make sure frame is at least large enough for the register save area
+    assert(-simm13a >= 16 * wordSize, "frame too small");
+    emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
+  }
 
   void restore( Register s1 = G0,  Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
   void restore( Register s1,       int simm13a,      Register d      ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
@@ -1594,6 +1598,11 @@
   inline void wrasi(  Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
   inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
+  // For a given register condition, return the appropriate condition code
+  // Condition (the one you would use to get the same effect after "tst" on
+  // the target register.)
+  Assembler::Condition reg_cond_to_cc_cond(RCondition in);
+
 
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
@@ -1630,6 +1639,8 @@
 
   // restore global registers in case C code disturbed them
   static void restore_registers(MacroAssembler* a, Register r);
+
+
 };
 
 
@@ -1722,6 +1733,12 @@
   void br_null   ( Register s1, bool a, Predict p, Label& L );
   void br_notnull( Register s1, bool a, Predict p, Label& L );
 
+  // These versions will do the most efficient thing on v8 and v9.  Perhaps
+  // this is what the routine above was meant to do, but it didn't (and
+  // didn't cover both target address kinds.)
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L);
+
   inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
 
@@ -2056,9 +2073,23 @@
 #endif // ASSERT
 
  public:
-  // Stores
-  void store_check(Register tmp, Register obj);                // store check for obj - register is destroyed afterwards
-  void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards
+
+  // Write to card table for - register is destroyed afterwards.
+  void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
+
+  void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+#ifndef SERIALGC
+  // Array store and offset
+  void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs);
+
+  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+  // May do filtering, depending on the boolean arguments.
+  void g1_card_table_write(jbyte* byte_map_base,
+                           Register tmp, Register obj, Register new_val,
+                           bool region_filter, bool null_filter);
+#endif // SERIALGC
 
   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
   void push_fTOS();
--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -404,4 +404,55 @@
 }
 
 
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    pre_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
+  __ delayed()->mov(pre_val_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register addr_reg = addr()->as_pointer_register();
+  Register new_val_reg = new_val()->as_register();
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    new_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id));
+  __ delayed()->mov(addr_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+}
+
+#endif // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
 #undef __
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -2093,7 +2093,11 @@
   // the known type isn't loaded since the code sanity checks
   // in debug mode and the type isn't required when we know the exact type
   // also check that the type is an array type.
-  if (op->expected_type() == NULL) {
+  // We also, for now, always call the stub if the barrier set requires a
+  // write_ref_pre barrier (which the stub does, but none of the optimized
+  // cases currently does).
+  if (op->expected_type() == NULL ||
+      Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) {
     __ mov(src,     O0);
     __ mov(src_pos, O1);
     __ mov(dst,     O2);
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -365,6 +365,10 @@
     __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info);
   }
 
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+  }
   __ move(value.result(), array_addr, null_check_info);
   if (obj_store) {
     // Is this precise?
@@ -663,6 +667,10 @@
 
   __ add(obj.result(), offset.result(), addr);
 
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    pre_barrier(obj.result(), false, NULL);
+  }
+
   if (type == objectType)
     __ cas_obj(addr, cmp.result(), val.result(), t1, t2);
   else if (type == intType)
@@ -677,7 +685,11 @@
   LIR_Opr result = rlock_result(x);
   __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
   if (type == objectType) {  // Write-barrier needed for Object fields.
+#ifdef PRECISE_CARDMARK
+    post_barrier(addr, val.result());
+#else
     post_barrier(obj.result(), val.result());
+#endif // PRECISE_CARDMARK
   }
 }
 
@@ -1154,6 +1166,10 @@
         addr = new LIR_Address(base_op, index_op, type);
       }
 
+      if (is_obj) {
+        pre_barrier(LIR_OprFact::address(addr), false, NULL);
+        // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
+      }
       __ move(data, addr);
       if (is_obj) {
         // This address is precise
--- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -832,6 +832,163 @@
       }
       break;
 
+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      { // G4: previous value of memory
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+        Register pre_val = G4;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+
+        Label refill, restart;
+        bool with_frame = false; // I don't know if we can do with-frame.
+        int satb_q_index_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int satb_q_buf_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false,
+                          Assembler::pn, tmp, refill);
+
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
+        __ sub(tmp, oopSize, tmp);
+
+        __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(pre_val, L0);
+        __ mov(tmp,     L1);
+        __ mov(tmp2,    L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         SATBMarkQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, pre_val);
+        __ mov(L1, tmp);
+        __ mov(L2, tmp2);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+        Register addr = G4;
+        Register cardtable = G5;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+        jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
+
+        Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+        __ srlx(addr, CardTableModRefBS::card_shift, addr);
+#else
+        __ srl(addr, CardTableModRefBS::card_shift, addr);
+#endif
+
+        Address rs(cardtable, (address)byte_map_base);
+        __ load_address(rs); // cardtable := <card table base>
+        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
+
+        __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                          tmp, not_already_dirty);
+        // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch
+        // case, harmless if not.
+        __ delayed()->add(addr, cardtable, tmp2);
+
+        // We didn't take the branch, so we're already dirty: return.
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->nop();
+
+        // Not dirty.
+        __ bind(not_already_dirty);
+        // First, dirty it.
+        __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
+
+        Register tmp3 = cardtable;
+        Register tmp4 = tmp;
+
+        // these registers are now dead
+        addr = cardtable = tmp = noreg;
+
+        int dirty_card_q_index_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int dirty_card_q_buf_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                          tmp3, refill);
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
+        __ sub(tmp3, oopSize, tmp3);
+
+        __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(tmp2, L0);
+        __ mov(tmp3, L1);
+        __ mov(tmp4, L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         DirtyCardQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, tmp2);
+        __ mov(L1, tmp3);
+        __ mov(L2, tmp4);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+#endif // !SERIALGC
+
     default:
       { __ set_info("unimplemented entry", dont_gc_arguments);
         __ save_frame(0);
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1110,30 +1110,31 @@
   //  The input registers are overwritten.
   //
   void gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 only
     BarrierSet* bs = Universe::heap()->barrier_set();
     if (bs->has_write_ref_pre_barrier()) {
       assert(bs->has_write_ref_array_pre_opt(),
              "Else unsupported barrier set.");
 
-      assert(addr->is_global() && count->is_global(),
-             "If not, then we have to fix this code to handle more "
-             "general cases.");
-      // Get some new fresh output registers.
       __ save_frame(0);
       // Save the necessary global regs... will be used after.
-      __ mov(addr, L0);
-      __ mov(count, L1);
-
-      __ mov(addr, O0);
+      if (addr->is_global()) {
+        __ mov(addr, L0);
+      }
+      if (count->is_global()) {
+        __ mov(count, L1);
+      }
+      __ mov(addr->after_save(), O0);
       // Get the count into O1
       __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-      __ delayed()->mov(count, O1);
-      __ mov(L0, addr);
-      __ mov(L1, count);
+      __ delayed()->mov(count->after_save(), O1);
+      if (addr->is_global()) {
+        __ mov(L0, addr);
+      }
+      if (count->is_global()) {
+        __ mov(L1, count);
+      }
       __ restore();
     }
-#endif // 0
   }
   //
   //  Generate post-write barrier for array.
@@ -1150,22 +1151,17 @@
     BarrierSet* bs = Universe::heap()->barrier_set();
 
     switch (bs->kind()) {
-#if 0 // G1 - only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
-          assert(addr->is_global() && count->is_global(),
-                 "If not, then we have to fix this code to handle more "
-                 "general cases.");
           // Get some new fresh output registers.
           __ save_frame(0);
-          __ mov(addr, O0);
+          __ mov(addr->after_save(), O0);
           __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ delayed()->mov(count, O1);
+          __ delayed()->mov(count->after_save(), O1);
           __ restore();
         }
         break;
-#endif // 0 G1 - only
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
         {
@@ -2412,8 +2408,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    gen_write_ref_array_pre_barrier(G1, G5);
-
+    gen_write_ref_array_pre_barrier(O1, O2);
 
 #ifdef ASSERT
     // We sometimes save a frame (see partial_subtype_check below).
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -28,6 +28,79 @@
 #ifndef CC_INTERP
 #define __ _masm->
 
+// Misc helpers
+
+// Do an oop store like *(base + index + offset) = val
+// index can be noreg,
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Register base,
+                         Register index,
+                         int offset,
+                         Register val,
+                         Register tmp,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(tmp != val && tmp != base && tmp != index, "register collision");
+  assert(index == noreg || offset == 0, "only one offset");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true);
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ g1_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ card_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      ShouldNotReachHere();
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 
 //----------------------------------------------------------------------------------------------------
 // Platform-dependent initialization
@@ -758,6 +831,8 @@
   // O4:        array element klass
   // O5:        value klass
 
+  // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
   // Generate a fast subtype check.  Branch to store_ok if no
   // failure.  Throw if failure.
   __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok );
@@ -767,18 +842,14 @@
 
   // Store is OK.
   __ bind(store_ok);
-  __ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  // Quote from rememberedSet.hpp: For objArrays, the precise card
-  // corresponding to the pointer store is dirtied so we don't need to
-  // scavenge the entire array.
-  Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  __ add(element, O1);              // address the element precisely
-  __ store_check(G3_scratch, O1);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true);
+
   __ ba(false,done);
   __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value)
 
   __ bind(is_null);
-  __ store_heap_oop(Otos_i, element);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true);
+
   __ profile_null_seen(G3_scratch);
   __ inc(Lesp, 3* Interpreter::stackElementSize());     // adj sp (pops array, index and value)
   __ bind(done);
@@ -2449,8 +2520,9 @@
     // atos
     __ pop_ptr();
     __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
     __ ba(false, checkVolatile);
     __ delayed()->tst(Lscratch);
 
@@ -2491,8 +2563,9 @@
     __ pop_ptr();
     pop_and_check_object(Rclass);
     __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
     patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch);
     __ ba(false, checkVolatile);
     __ delayed()->tst(Lscratch);
@@ -2646,8 +2719,7 @@
       __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
       break;
     case Bytecodes::_fast_aputfield:
-      __ store_heap_oop(Otos_i, Rclass, Roffset);
-      __ store_check(G1_scratch, Rclass, Roffset);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
       break;
     default:
       ShouldNotReachHere();
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1575,6 +1575,35 @@
   emit_operand(src, dst);
 }
 
+void Assembler::movdqu(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefix(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_byte(0xF3);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0x6F);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::movdqu(Address dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefix(dst, src);
+  emit_byte(0x0F);
+  emit_byte(0x7F);
+  emit_operand(src, dst);
+}
+
 // Uses zero extension on 64bit
 
 void Assembler::movl(Register dst, int32_t imm32) {
@@ -5935,26 +5964,30 @@
                                    Label& slow_case) {
   assert(obj == rax, "obj must be in rax, for cmpxchg");
   assert_different_registers(obj, var_size_in_bytes, t1);
-  Register end = t1;
-  Label retry;
-  bind(retry);
-  ExternalAddress heap_top((address) Universe::heap()->top_addr());
-  movptr(obj, heap_top);
-  if (var_size_in_bytes == noreg) {
-    lea(end, Address(obj, con_size_in_bytes));
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    jmp(slow_case);
   } else {
-    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
-  }
-  // if end < obj then we wrapped around => object too long => slow case
-  cmpptr(end, obj);
-  jcc(Assembler::below, slow_case);
-  cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
-  jcc(Assembler::above, slow_case);
-  // Compare obj with the top addr, and if still equal, store the new top addr in
-  // end at the address of the top addr pointer. Sets ZF if was equal, and clears
-  // it otherwise. Use lock prefix for atomicity on MPs.
-  locked_cmpxchgptr(end, heap_top);
-  jcc(Assembler::notEqual, retry);
+    Register end = t1;
+    Label retry;
+    bind(retry);
+    ExternalAddress heap_top((address) Universe::heap()->top_addr());
+    movptr(obj, heap_top);
+    if (var_size_in_bytes == noreg) {
+      lea(end, Address(obj, con_size_in_bytes));
+    } else {
+      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    cmpptr(end, obj);
+    jcc(Assembler::below, slow_case);
+    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
+    jcc(Assembler::above, slow_case);
+    // Compare obj with the top addr, and if still equal, store the new top addr in
+    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+    // it otherwise. Use lock prefix for atomicity on MPs.
+    locked_cmpxchgptr(end, heap_top);
+    jcc(Assembler::notEqual, retry);
+  }
 }
 
 void MacroAssembler::enter() {
@@ -6491,6 +6524,179 @@
   }
 }
 
+//////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                                          Register thread,
+#endif
+                                          Register tmp,
+                                          Register tmp2,
+                                          bool tosca_live) {
+  LP64_ONLY(Register thread = r15_thread;)
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  Label done;
+  Label runtime;
+
+  // if (!marking_in_progress) goto done;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    cmpb(in_progress, 0);
+  }
+  jcc(Assembler::equal, done);
+
+  // if (x.f == NULL) goto done;
+  cmpptr(Address(obj, 0), NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+
+  LP64_ONLY(movslq(tmp, index);)
+  movptr(tmp2, Address(obj, 0));
+#ifdef _LP64
+  cmpq(tmp, 0);
+#else
+  cmpl(index, 0);
+#endif
+  jcc(Assembler::equal, runtime);
+#ifdef _LP64
+  subq(tmp, wordSize);
+  movl(index, tmp);
+  addq(tmp, buffer);
+#else
+  subl(index, wordSize);
+  movl(tmp, buffer);
+  addl(tmp, index);
+#endif
+  movptr(Address(tmp, 0), tmp2);
+  jmp(done);
+  bind(runtime);
+  // save the live input values
+  if(tosca_live) push(rax);
+  push(obj);
+#ifdef _LP64
+  movq(c_rarg0, Address(obj, 0));
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
+  pop(thread);
+#endif
+  pop(obj);
+  if(tosca_live) pop(rax);
+  bind(done);
+
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+#ifndef _LP64
+                                           Register thread,
+#endif
+                                           Register tmp,
+                                           Register tmp2) {
+
+  LP64_ONLY(Register thread = r15_thread;)
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  movptr(tmp, store_addr);
+  xorptr(tmp, new_val);
+  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+  jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  cmpptr(new_val, (int32_t) NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+#ifdef _LP64
+  const Register card_addr = tmp;
+
+  movq(card_addr, store_addr);
+  shrq(card_addr, CardTableModRefBS::card_shift);
+
+  lea(tmp2, cardtable);
+
+  // get the address of the card
+  addq(card_addr, tmp2);
+#else
+  const Register card_index = tmp;
+
+  movl(card_index, store_addr);
+  shrl(card_index, CardTableModRefBS::card_shift);
+
+  Address index(noreg, card_index, Address::times_1);
+  const Register card_addr = tmp;
+  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
+#endif
+  cmpb(Address(card_addr, 0), 0);
+  jcc(Assembler::equal, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  movb(Address(card_addr, 0), 0);
+
+  cmpl(queue_index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(queue_index, wordSize);
+  movptr(tmp2, buffer);
+#ifdef _LP64
+  movslq(rscratch1, queue_index);
+  addq(tmp2, rscratch1);
+  movq(Address(tmp2, 0), card_addr);
+#else
+  addl(tmp2, queue_index);
+  movl(Address(tmp2, 0), card_index);
+#endif
+  jmp(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr);
+  push(new_val);
+#ifdef _LP64
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  pop(thread);
+#endif
+  pop(new_val);
+  pop(store_addr);
+
+  bind(done);
+
+}
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////
+
+
 void MacroAssembler::store_check(Register obj) {
   // Does a store check for the oop in register obj. The content of
   // register obj is destroyed afterwards.
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -227,9 +227,11 @@
 #endif // ASSERT
 
   // accessors
-  bool uses(Register reg) const {
-    return _base == reg || _index == reg;
-  }
+  bool        uses(Register reg) const { return _base == reg || _index == reg; }
+  Register    base()             const { return _base;  }
+  Register    index()            const { return _index; }
+  ScaleFactor scale()            const { return _scale; }
+  int         disp()             const { return _disp;  }
 
   // Convert the raw encoding form into the form expected by the constructor for
   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
@@ -1053,6 +1055,11 @@
   void movdqa(XMMRegister dst, Address src);
   void movdqa(XMMRegister dst, XMMRegister src);
 
+  // Move Unaligned Double Quadword
+  void movdqu(Address     dst, XMMRegister src);
+  void movdqu(XMMRegister dst, Address src);
+  void movdqu(XMMRegister dst, XMMRegister src);
+
   void movl(Register dst, int32_t imm32);
   void movl(Address dst, int32_t imm32);
   void movl(Register dst, Register src);
@@ -1310,7 +1317,8 @@
 // on arguments should also go in here.
 
 class MacroAssembler: public Assembler {
- friend class LIR_Assembler;
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
  protected:
 
   Address as_Address(AddressLiteral adr);
@@ -1453,6 +1461,7 @@
   // The pointer will be loaded into the thread register.
   void get_thread(Register thread);
 
+
   // Support for VM calls
   //
   // It is imperative that all calls into the VM are handled via the call_VM macros.
@@ -1527,6 +1536,22 @@
   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 
+  void g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                            Register thread,
+#endif
+                            Register tmp,
+                            Register tmp2,
+                            bool     tosca_live);
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+#ifndef _LP64
+                             Register thread,
+#endif
+                             Register tmp,
+                             Register tmp2);
+
+
   // split store_check(Register obj) to enhance instruction interleaving
   void store_check_part_1(Register obj);
   void store_check_part_2(Register obj);
--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -456,5 +456,50 @@
   __ jmp(_continuation);
 }
 
+/////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+
+  // At this point we know that marking is in progress
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+
+  __ cmpptr(pre_val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ jmp(_continuation);
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ cmpptr(new_val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(addr()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ jmp(_continuation);
+}
+
+#endif // SERIALGC
+/////////////////////////////////////////////////////////////////////////////
 
 #undef __
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -302,6 +302,8 @@
   }
 
   if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
     __ move(value.result(), array_addr, null_check_info);
     // Seems to be a precise
     post_barrier(LIR_OprFact::address(array_addr), value.result());
@@ -756,7 +758,10 @@
   __ move(obj.result(), addr);
   __ add(addr, offset.result(), addr);
 
-
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Do the pre-write barrier, if any.
+    pre_barrier(addr, false, NULL);
+  }
 
   LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
   if (type == objectType)
@@ -1286,6 +1291,8 @@
     LIR_Address* addr = new LIR_Address(src, offset, type);
     bool is_obj = (type == T_ARRAY || type == T_OBJECT);
     if (is_obj) {
+      // Do the pre-write barrier, if any.
+      pre_barrier(LIR_OprFact::address(addr), false, NULL);
       __ move(data, addr);
       assert(src->is_register(), "must be register");
       // Seems to be a precise address
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1583,6 +1583,166 @@
       }
       break;
 
+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ movptr(rax, (int)id);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ push(rax);
+        __ push(rdx);
+
+        const Register pre_val = rax;
+        const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
+        const Register tmp = rdx;
+
+        NOT_LP64(__ get_thread(thread);)
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+
+        Label done;
+        Label runtime;
+
+        // Can we store original value in the thread's buffer?
+
+        LP64_ONLY(__ movslq(tmp, queue_index);)
+#ifdef _LP64
+        __ cmpq(tmp, 0);
+#else
+        __ cmpl(queue_index, 0);
+#endif
+        __ jcc(Assembler::equal, runtime);
+#ifdef _LP64
+        __ subq(tmp, wordSize);
+        __ movl(queue_index, tmp);
+        __ addq(tmp, buffer);
+#else
+        __ subl(queue_index, wordSize);
+        __ movl(tmp, buffer);
+        __ addl(tmp, queue_index);
+#endif
+
+        // prev_val (rax)
+        f.load_argument(0, pre_val);
+        __ movptr(Address(tmp, 0), pre_val);
+        __ jmp(done);
+
+        __ bind(runtime);
+        // load the pre-value
+        __ push(rcx);
+        f.load_argument(0, rcx);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread);
+        __ pop(rcx);
+
+        __ bind(done);
+        __ pop(rdx);
+        __ pop(rax);
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+
+        // arg0: store_address
+        Address store_addr(rbp, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regsion.
+        // Must check to see if card is already dirty
+
+        const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
+
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        __ push(rax);
+        __ push(rdx);
+
+        NOT_LP64(__ get_thread(thread);)
+        ExternalAddress cardtable((address)ct->byte_map_base);
+        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+        const Register card_addr = rdx;
+#ifdef _LP64
+        const Register tmp = rscratch1;
+        f.load_argument(0, card_addr);
+        __ shrq(card_addr, CardTableModRefBS::card_shift);
+        __ lea(tmp, cardtable);
+        // get the address of the card
+        __ addq(card_addr, tmp);
+#else
+        const Register card_index = rdx;
+        f.load_argument(0, card_index);
+        __ shrl(card_index, CardTableModRefBS::card_shift);
+
+        Address index(noreg, card_index, Address::times_1);
+        __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index)));
+#endif
+
+        __ cmpb(Address(card_addr, 0), 0);
+        __ jcc(Assembler::equal, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+
+        __ movb(Address(card_addr, 0), 0);
+
+        __ cmpl(queue_index, 0);
+        __ jcc(Assembler::equal, runtime);
+        __ subl(queue_index, wordSize);
+
+        const Register buffer_addr = rbx;
+        __ push(rbx);
+
+        __ movptr(buffer_addr, buffer);
+
+#ifdef _LP64
+        __ movslq(rscratch1, queue_index);
+        __ addptr(buffer_addr, rscratch1);
+#else
+        __ addptr(buffer_addr, queue_index);
+#endif
+        __ movptr(Address(buffer_addr, 0), card_addr);
+
+        __ pop(rbx);
+        __ jmp(done);
+
+        __ bind(runtime);
+        NOT_LP64(__ push(rcx);)
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+        NOT_LP64(__ pop(rcx);)
+
+        __ bind(done);
+        __ pop(rdx);
+        __ pop(rax);
+
+      }
+      break;
+#endif // !SERIALGC
+
     default:
       { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
         __ movptr(rax, (int)id);
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -44,8 +44,13 @@
   // Note: No need to save/restore bcp & locals (r13 & r14) pointer
   //       since these are callee saved registers and no blocking/
   //       GC can happen in leaf calls.
+  // Further Note: DO NOT save/restore bcp/locals. If a caller has
+  // already saved them so that it can use esi/edi as temporaries
+  // then a save/restore here will DESTROY the copy the caller
+  // saved! There used to be a save_bcp() that only happened in
+  // the ASSERT path (no restore_bcp). Which caused bizarre failures
+  // when jvm built with ASSERTs.
 #ifdef ASSERT
-  save_bcp();
   {
     Label L;
     cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
@@ -58,24 +63,9 @@
   // super call
   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
   // interpreter specific
-#ifdef ASSERT
-  {
-    Label L;
-    cmpptr(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r13 not callee saved?");
-    bind(L);
-  }
-  {
-    Label L;
-    cmpptr(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r14 not callee saved?");
-    bind(L);
-  }
-#endif
+  // Used to ASSERT that r13/r14 were equal to frame's bcp/locals
+  // but since they may not have been saved (and we don't want to
+  // save thme here (see note above) the assert is invalid.
 }
 
 void InterpreterMacroAssembler::call_VM_base(Register oop_result,
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -712,7 +712,6 @@
   //     end     -  element count
   void  gen_write_ref_array_pre_barrier(Register start, Register count) {
     assert_different_registers(start, count);
-#if 0 // G1 only
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
@@ -721,8 +720,8 @@
           __ pusha();                      // push registers
           __ push(count);
           __ push(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
+          __ addptr(rsp, 2*wordSize);
           __ popa();
         }
         break;
@@ -734,7 +733,6 @@
         ShouldNotReachHere();
 
     }
-#endif // 0 - G1 only
   }
 
 
@@ -750,20 +748,18 @@
     BarrierSet* bs = Universe::heap()->barrier_set();
     assert_different_registers(start, count);
     switch (bs->kind()) {
-#if 0 // G1 only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
           __ pusha();                      // push registers
           __ push(count);
           __ push(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
+          __ addptr(rsp, 2*wordSize);
           __ popa();
 
         }
         break;
-#endif // 0 G1 only
 
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
@@ -795,6 +791,69 @@
     }
   }
 
+
+  // Copy 64 bytes chunks
+  //
+  // Inputs:
+  //   from        - source array address
+  //   to_from     - destination array address - from
+  //   qword_count - 8-bytes element count, negative
+  //
+  void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
+    assert( UseSSE >= 2, "supported cpu only" );
+    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
+    // Copy 64-byte chunks
+    __ jmpb(L_copy_64_bytes);
+    __ align(16);
+  __ BIND(L_copy_64_bytes_loop);
+
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(from, 0));
+      __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
+      __ movdqu(xmm1, Address(from, 16));
+      __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
+      __ movdqu(xmm2, Address(from, 32));
+      __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
+      __ movdqu(xmm3, Address(from, 48));
+      __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
+
+    } else {
+      __ movq(xmm0, Address(from, 0));
+      __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
+      __ movq(xmm1, Address(from, 8));
+      __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
+      __ movq(xmm2, Address(from, 16));
+      __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
+      __ movq(xmm3, Address(from, 24));
+      __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
+      __ movq(xmm4, Address(from, 32));
+      __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
+      __ movq(xmm5, Address(from, 40));
+      __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
+      __ movq(xmm6, Address(from, 48));
+      __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
+      __ movq(xmm7, Address(from, 56));
+      __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
+    }
+
+    __ addl(from, 64);
+  __ BIND(L_copy_64_bytes);
+    __ subl(qword_count, 8);
+    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
+    __ addl(qword_count, 8);
+    __ jccb(Assembler::zero, L_exit);
+    //
+    // length is too short, just copy qwords
+    //
+  __ BIND(L_copy_8_bytes);
+    __ movq(xmm0, Address(from, 0));
+    __ movq(Address(from, to_from, Address::times_1), xmm0);
+    __ addl(from, 8);
+    __ decrement(qword_count);
+    __ jcc(Assembler::greater, L_copy_8_bytes);
+  __ BIND(L_exit);
+  }
+
   // Copy 64 bytes chunks
   //
   // Inputs:
@@ -803,6 +862,7 @@
   //   qword_count - 8-bytes element count, negative
   //
   void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
+    assert( VM_Version::supports_mmx(), "supported cpu only" );
     Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
     // Copy 64-byte chunks
     __ jmpb(L_copy_64_bytes);
@@ -880,7 +940,7 @@
     __ subptr(to, from); // to --> to_from
     __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
     __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-    if (!aligned && (t == T_BYTE || t == T_SHORT)) {
+    if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
       // align source address at 4 bytes address boundary
       if (t == T_BYTE) {
         // One byte misalignment happens only for byte arrays
@@ -910,20 +970,26 @@
       __ mov(count, rax);      // restore 'count'
       __ jmpb(L_copy_2_bytes); // all dwords were copied
     } else {
-      // align to 8 bytes, we know we are 4 byte aligned to start
-      __ testptr(from, 4);
-      __ jccb(Assembler::zero, L_copy_64_bytes);
-      __ movl(rax, Address(from, 0));
-      __ movl(Address(from, to_from, Address::times_1, 0), rax);
-      __ addptr(from, 4);
-      __ subl(count, 1<<shift);
+      if (!UseUnalignedLoadStores) {
+        // align to 8 bytes, we know we are 4 byte aligned to start
+        __ testptr(from, 4);
+        __ jccb(Assembler::zero, L_copy_64_bytes);
+        __ movl(rax, Address(from, 0));
+        __ movl(Address(from, to_from, Address::times_1, 0), rax);
+        __ addptr(from, 4);
+        __ subl(count, 1<<shift);
+      }
     __ BIND(L_copy_64_bytes);
       __ mov(rax, count);
       __ shrl(rax, shift+1);  // 8 bytes chunk count
       //
       // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
       //
-      mmx_copy_forward(from, to_from, rax);
+      if (UseXMMForArrayCopy) {
+        xmm_copy_forward(from, to_from, rax);
+      } else {
+        mmx_copy_forward(from, to_from, rax);
+      }
     }
     // copy tailing dword
   __ BIND(L_copy_4_bytes);
@@ -1073,13 +1139,20 @@
       __ align(16);
       // Move 8 bytes
     __ BIND(L_copy_8_bytes_loop);
-      __ movq(mmx0, Address(from, count, sf, 0));
-      __ movq(Address(to, count, sf, 0), mmx0);
+      if (UseXMMForArrayCopy) {
+        __ movq(xmm0, Address(from, count, sf, 0));
+        __ movq(Address(to, count, sf, 0), xmm0);
+      } else {
+        __ movq(mmx0, Address(from, count, sf, 0));
+        __ movq(Address(to, count, sf, 0), mmx0);
+      }
     __ BIND(L_copy_8_bytes);
       __ subl(count, 2<<shift);
       __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
       __ addl(count, 2<<shift);
-      __ emms();
+      if (!UseXMMForArrayCopy) {
+        __ emms();
+      }
     }
   __ BIND(L_copy_4_bytes);
     // copy prefix qword
@@ -1147,7 +1220,11 @@
 
     __ subptr(to, from); // to --> to_from
     if (VM_Version::supports_mmx()) {
-      mmx_copy_forward(from, to_from, count);
+      if (UseXMMForArrayCopy) {
+        xmm_copy_forward(from, to_from, count);
+      } else {
+        mmx_copy_forward(from, to_from, count);
+      }
     } else {
       __ jmpb(L_copy_8_bytes);
       __ align(16);
@@ -1200,8 +1277,13 @@
     __ align(16);
   __ BIND(L_copy_8_bytes_loop);
     if (VM_Version::supports_mmx()) {
-      __ movq(mmx0, Address(from, count, Address::times_8));
-      __ movq(Address(to, count, Address::times_8), mmx0);
+      if (UseXMMForArrayCopy) {
+        __ movq(xmm0, Address(from, count, Address::times_8));
+        __ movq(Address(to, count, Address::times_8), xmm0);
+      } else {
+        __ movq(mmx0, Address(from, count, Address::times_8));
+        __ movq(Address(to, count, Address::times_8), mmx0);
+      }
     } else {
       __ fild_d(Address(from, count, Address::times_8));
       __ fistp_d(Address(to, count, Address::times_8));
@@ -1210,7 +1292,7 @@
     __ decrement(count);
     __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
 
-    if (VM_Version::supports_mmx()) {
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
       __ emms();
     }
     inc_copy_counter_np(T_LONG);
@@ -1378,9 +1460,9 @@
     Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
 
     // Copy from low to high addresses, indexed from the end of each array.
+    gen_write_ref_array_pre_barrier(to, count);
     __ lea(end_from, end_from_addr);
     __ lea(end_to,   end_to_addr);
-    gen_write_ref_array_pre_barrier(to, count);
     assert(length == count, "");        // else fix next line:
     __ negptr(count);                   // negate and test the length
     __ jccb(Assembler::notZero, L_load_element);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1153,18 +1153,26 @@
   //     Destroy no registers!
   //
   void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 - only
-    assert_different_registers(addr, c_rarg1);
-    assert_different_registers(count, c_rarg0);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
           __ pusha();                      // push registers
-          __ movptr(c_rarg0, addr);
-          __ movptr(c_rarg1, count);
-          __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre));
+          if (count == c_rarg0) {
+            if (addr == c_rarg1) {
+              // exactly backwards!!
+              __ xchgptr(c_rarg1, c_rarg0);
+            } else {
+              __ movptr(c_rarg1, count);
+              __ movptr(c_rarg0, addr);
+            }
+
+          } else {
+            __ movptr(c_rarg0, addr);
+            __ movptr(c_rarg1, count);
+          }
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
           __ popa();
         }
         break;
@@ -1172,11 +1180,10 @@
       case BarrierSet::CardTableExtension:
       case BarrierSet::ModRef:
         break;
-      default      :
+      default:
         ShouldNotReachHere();
 
     }
-#endif // 0 G1 - only
   }
 
   //
@@ -1193,7 +1200,6 @@
     assert_different_registers(start, end, scratch);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
-#if 0 // G1 - only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
 
@@ -1206,11 +1212,10 @@
           __ shrptr(scratch, LogBytesPerWord);
           __ mov(c_rarg0, start);
           __ mov(c_rarg1, scratch);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
           __ popa();
         }
         break;
-#endif // 0 G1 - only
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
         {
@@ -1239,8 +1244,13 @@
           __ decrement(count);
           __ jcc(Assembler::greaterEqual, L_loop);
         }
-      }
-   }
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }
+
 
   // Copy big chunks forward
   //
@@ -1259,14 +1269,22 @@
     Label L_loop;
     __ align(16);
   __ BIND(L_loop);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
-    __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
-    __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
-    __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
-    __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+      __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+      __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
+      __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
+
+    } else {
+      __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
+      __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
+      __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
+      __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+    }
   __ BIND(L_copy_32_bytes);
     __ addptr(qword_count, 4);
     __ jcc(Assembler::lessEqual, L_loop);
@@ -1292,14 +1310,22 @@
     Label L_loop;
     __ align(16);
   __ BIND(L_loop);
-    __ movq(to, Address(from, qword_count, Address::times_8, 24));
-    __ movq(Address(dest, qword_count, Address::times_8, 24), to);
-    __ movq(to, Address(from, qword_count, Address::times_8, 16));
-    __ movq(Address(dest, qword_count, Address::times_8, 16), to);
-    __ movq(to, Address(from, qword_count, Address::times_8,  8));
-    __ movq(Address(dest, qword_count, Address::times_8,  8), to);
-    __ movq(to, Address(from, qword_count, Address::times_8,  0));
-    __ movq(Address(dest, qword_count, Address::times_8,  0), to);
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
+      __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
+      __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
+      __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
+
+    } else {
+      __ movq(to, Address(from, qword_count, Address::times_8, 24));
+      __ movq(Address(dest, qword_count, Address::times_8, 24), to);
+      __ movq(to, Address(from, qword_count, Address::times_8, 16));
+      __ movq(Address(dest, qword_count, Address::times_8, 16), to);
+      __ movq(to, Address(from, qword_count, Address::times_8,  8));
+      __ movq(Address(dest, qword_count, Address::times_8,  8), to);
+      __ movq(to, Address(from, qword_count, Address::times_8,  0));
+      __ movq(Address(dest, qword_count, Address::times_8,  0), to);
+    }
   __ BIND(L_copy_32_bytes);
     __ subptr(qword_count, 4);
     __ jcc(Assembler::greaterEqual, L_loop);
@@ -2282,7 +2308,7 @@
     // and report their number to the caller.
     assert_different_registers(rax, r14_length, count, to, end_to, rcx);
     __ lea(end_to, to_element_addr);
-    gen_write_ref_array_post_barrier(to, end_to, rcx);
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
     __ movptr(rax, r14_length);           // original oops
     __ addptr(rax, count);                // K = (original - remaining) oops
     __ notptr(rax);                       // report (-1^K) to caller
@@ -2291,7 +2317,7 @@
     // Come here on success only.
     __ BIND(L_do_card_marks);
     __ addptr(end_to, -wordSize);         // make an inclusive end pointer
-    gen_write_ref_array_post_barrier(to, end_to, rcx);
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
     __ xorptr(rax, rax);                  // return 0 on success
 
     // Common exit point (success or failure).
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -107,6 +107,78 @@
 //----------------------------------------------------------------------------------------------------
 // Miscelaneous helper routines
 
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        // We do it regardless of precise because we need the registers
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movl(rdx, obj.base());
+          }
+        } else {
+          __ leal(rdx, obj);
+        }
+        __ get_thread(rcx);
+        __ save_bcp();
+        __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
+
+        // Do the actual store
+        // noreg means NULL
+        if (val == noreg) {
+          __ movl(Address(rdx, 0), NULL_WORD);
+          // No post barrier for NULL
+        } else {
+          __ movl(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
+        }
+        __ restore_bcp();
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ movl(obj, NULL_WORD);
+        } else {
+          __ movl(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leal(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ movl(obj, NULL_WORD);
+      } else {
+        __ movl(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 Address TemplateTable::at_bcp(int offset) {
   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   return Address(rsi, offset);
@@ -876,6 +948,8 @@
   __ movptr(rax, at_tos());     // Value
   __ movl(rcx, at_tos_p1());  // Index
   __ movptr(rdx, at_tos_p2());  // Array
+
+  Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   index_check_without_pop(rdx, rcx);      // kills rbx,
   // do array store check - check for NULL value first
   __ testptr(rax, rax);
@@ -887,7 +961,7 @@
   __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
   __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
   // Compress array+index*wordSize+12 into a single register.  Frees ECX.
-  __ lea(rdx, Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ lea(rdx, element_address);
 
   // Generate subtype check.  Blows ECX.  Resets EDI to locals.
   // Superklass in EAX.  Subklass in EBX.
@@ -899,15 +973,20 @@
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movptr(rax, at_rsp());     // Value
-  __ movptr(Address(rdx, 0), rax);
-  __ store_check(rdx);
-  __ jmpb(done);
+
+  // Get the value to store
+  __ movptr(rax, at_rsp());
+  // and store it with appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
+
+  __ jmp(done);
 
   // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
   __ bind(is_null);
   __ profile_null_seen(rbx);
-  __ movptr(Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
+
+  // Store NULL, (noreg means NULL to do_oop_store)
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 
   // Pop stack arguments
   __ bind(done);
@@ -1515,7 +1594,7 @@
     // compute return address as bci in rax,
     __ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
     __ subptr(rax, Address(rcx, methodOopDesc::const_offset()));
-    // Adjust the bcp in ESI by the displacement in EDX
+    // Adjust the bcp in RSI by the displacement in EDX
     __ addptr(rsi, rdx);
     // Push return address
     __ push_i(rax);
@@ -1526,7 +1605,7 @@
 
   // Normal (non-jsr) branch handling
 
-  // Adjust the bcp in ESI by the displacement in EDX
+  // Adjust the bcp in RSI by the displacement in EDX
   __ addptr(rsi, rdx);
 
   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
@@ -2439,11 +2518,12 @@
   __ pop(atos);
   if (!is_static) pop_and_check_object(obj);
 
-  __ movptr(lo, rax );
-  __ store_check(obj, lo);  // Need to mark card
+  do_oop_store(_masm, lo, rax, _bs->kind(), false);
+
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
   }
+
   __ jmp(Done);
 
   __ bind(notObj);
@@ -2664,7 +2744,10 @@
       break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
@@ -2672,7 +2755,8 @@
   Label done;
   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
                                                Assembler::StoreStore));
-  __ jmpb(done);
+  // Barriers are so large that short branch doesn't reach!
+  __ jmp(done);
 
   // Same code as above, but don't need rdx to test for volatile.
   __ bind(notVolatile);
@@ -2694,7 +2778,10 @@
       break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
@@ -3054,8 +3141,6 @@
   Label initialize_object;  // including clearing the fields
   Label allocate_shared;
 
-  ExternalAddress heap_top((address)Universe::heap()->top_addr());
-
   __ get_cpool_and_tags(rcx, rax);
   // get instanceKlass
   __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(constantPoolOopDesc)));
@@ -3112,6 +3197,8 @@
   if (allow_shared_alloc) {
     __ bind(allocate_shared);
 
+    ExternalAddress heap_top((address)Universe::heap()->top_addr());
+
     Label retry;
     __ bind(retry);
     __ movptr(rax, heap_top);
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -115,6 +115,69 @@
 
 
 // Miscelaneous helper routines
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movq(rdx, obj.base());
+          }
+        } else {
+          __ leaq(rdx, obj);
+        }
+        __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
+        if (val == noreg) {
+          __ store_heap_oop(Address(rdx, 0), NULL_WORD);
+        } else {
+          __ store_heap_oop(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, val, r8, rbx);
+        }
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop(obj, NULL_WORD);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leaq(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop(obj, NULL_WORD);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
 
 Address TemplateTable::at_bcp(int offset) {
   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
@@ -560,8 +623,8 @@
   // rdx: array
   index_check(rdx, rax); // kills rbx
   __ load_heap_oop(rax, Address(rdx, rax,
-                       UseCompressedOops ? Address::times_4 : Address::times_8,
-                       arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+                                UseCompressedOops ? Address::times_4 : Address::times_8,
+                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 }
 
 void TemplateTable::baload() {
@@ -866,6 +929,11 @@
   __ movptr(rax, at_tos());    // value
   __ movl(rcx, at_tos_p1()); // index
   __ movptr(rdx, at_tos_p2()); // array
+
+  Address element_address(rdx, rcx,
+                          UseCompressedOops? Address::times_4 : Address::times_8,
+                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
   index_check(rdx, rcx);     // kills rbx
   // do array store check - check for NULL value first
   __ testptr(rax, rax);
@@ -879,9 +947,7 @@
                          sizeof(oopDesc) +
                          objArrayKlass::element_klass_offset_in_bytes()));
   // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
-  __ lea(rdx, Address(rdx, rcx,
-                      UseCompressedOops ? Address::times_4 : Address::times_8,
-                      arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ lea(rdx, element_address);
 
   // Generate subtype check.  Blows rcx, rdi
   // Superklass in rax.  Subklass in rbx.
@@ -893,18 +959,19 @@
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movptr(rax, at_tos()); // Value
-  __ store_heap_oop(Address(rdx, 0), rax);
-  __ store_check(rdx);
+
+  // Get the value we will store
+  __ movptr(rax, at_tos());
+  // Now store using the appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
   __ jmp(done);
 
   // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
   __ bind(is_null);
   __ profile_null_seen(rbx);
-  __ store_heap_oop(Address(rdx, rcx,
-                            UseCompressedOops ? Address::times_4 : Address::times_8,
-                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
-                    rax);
+
+  // Store a NULL
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 
   // Pop stack arguments
   __ bind(done);
@@ -2396,8 +2463,10 @@
   // atos
   __ pop(atos);
   if (!is_static) pop_and_check_object(obj);
-  __ store_heap_oop(field, rax);
-  __ store_check(obj, field); // Need to mark card
+
+  // Store into the field
+  do_oop_store(_masm, field, rax, _bs->kind(), false);
+
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
   }
@@ -2584,8 +2653,7 @@
   // access field
   switch (bytecode()) {
   case Bytecodes::_fast_aputfield:
-    __ store_heap_oop(field, rax);
-    __ store_check(rcx, field);
+    do_oop_store(_masm, field, rax, _bs->kind(), false);
     break;
   case Bytecodes::_fast_lputfield:
     __ movq(field, rax);
@@ -3044,8 +3112,6 @@
   Label initialize_header;
   Label initialize_object; // including clearing the fields
   Label allocate_shared;
-  ExternalAddress top((address)Universe::heap()->top_addr());
-  ExternalAddress end((address)Universe::heap()->end_addr());
 
   __ get_cpool_and_tags(rsi, rax);
   // get instanceKlass
@@ -3106,6 +3172,9 @@
   if (allow_shared_alloc) {
     __ bind(allocate_shared);
 
+    ExternalAddress top((address)Universe::heap()->top_addr());
+    ExternalAddress end((address)Universe::heap()->end_addr());
+
     const Register RtopAddr = rscratch1;
     const Register RendAddr = rscratch2;
 
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -242,9 +242,11 @@
   _supports_cx8 = supports_cmpxchg8();
   // if the OS doesn't support SSE, we can't use this feature even if the HW does
   if( !os::supports_sse())
-    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A);
-  if (UseSSE < 4)
-    _cpuFeatures &= ~CPU_SSE4;
+    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
+  if (UseSSE < 4) {
+    _cpuFeatures &= ~CPU_SSE4_1;
+    _cpuFeatures &= ~CPU_SSE4_2;
+  }
   if (UseSSE < 3) {
     _cpuFeatures &= ~CPU_SSE3;
     _cpuFeatures &= ~CPU_SSSE3;
@@ -261,7 +263,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -272,7 +274,8 @@
                (supports_sse2() ? ", sse2" : ""),
                (supports_sse3() ? ", sse3" : ""),
                (supports_ssse3()? ", ssse3": ""),
-               (supports_sse4() ? ", sse4" : ""),
+               (supports_sse4_1() ? ", sse4.1" : ""),
+               (supports_sse4_2() ? ", sse4.2" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow()   ? ", 3dnow"  : ""),
                (supports_3dnow2()  ? ", 3dnowext" : ""),
@@ -285,7 +288,7 @@
   // older Pentiums which do not support it.
   if( UseSSE > 4 ) UseSSE=4;
   if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4() ) // Drop to 3 if no SSE4 support
+  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
     UseSSE = MIN2((intx)3,UseSSE);
   if( !supports_sse3() ) // Drop to 2 if no SSE3 support
     UseSSE = MIN2((intx)2,UseSSE);
@@ -375,6 +378,14 @@
         MaxLoopPad = 11;
       }
 #endif // COMPILER2
+      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
+      }
+      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
+        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
     }
   }
 
@@ -413,7 +424,7 @@
 
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
-    tty->print_cr("Logical CPUs per package: %u",
+    tty->print_cr("Logical CPUs per core: %u",
                   logical_processors_per_package());
     tty->print_cr("UseSSE=%d",UseSSE);
     tty->print("Allocation: ");
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -68,9 +68,9 @@
                cmpxchg16: 1,
                         : 4,
                dca      : 1,
-                        : 4,
-               popcnt   : 1,
-                        : 8;
+               sse4_1   : 1,
+               sse4_2   : 1,
+                        : 11;
     } bits;
   };
 
@@ -177,8 +177,9 @@
      CPU_SSE2 = (1 << 7),
      CPU_SSE3 = (1 << 8), // sse3  comes from cpuid 1 (ECX)
      CPU_SSSE3= (1 << 9),
-     CPU_SSE4 = (1 <<10),
-     CPU_SSE4A= (1 <<11)
+     CPU_SSE4A= (1 <<10),
+     CPU_SSE4_1 = (1 << 11),
+     CPU_SSE4_2 = (1 << 12)
    } cpuFeatureFlags;
 
   // cpuid information block.  All info derived from executing cpuid with
@@ -240,22 +241,14 @@
   static CpuidInfo _cpuid_info;
 
   // Extractors and predicates
-  static bool is_extended_cpu_family() {
-    const uint32_t Extended_Cpu_Family = 0xf;
-    return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family;
-  }
   static uint32_t extended_cpu_family() {
     uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
-    if (is_extended_cpu_family()) {
-      result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
-    }
+    result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
     return result;
   }
   static uint32_t extended_cpu_model() {
     uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
-    if (is_extended_cpu_family()) {
-      result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
-    }
+    result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
     return result;
   }
   static uint32_t cpu_stepping() {
@@ -293,6 +286,10 @@
       result |= CPU_SSSE3;
     if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
       result |= CPU_SSE4A;
+    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0)
+      result |= CPU_SSE4_1;
+    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0)
+      result |= CPU_SSE4_2;
     return result;
   }
 
@@ -380,7 +377,8 @@
   static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
-  static bool supports_sse4()     { return (_cpuFeatures & CPU_SSE4) != 0; }
+  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
+  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   //
   // AMD features
   //
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -186,8 +186,10 @@
   if (!VM_Version::supports_sse2()) {
     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
   }
-  if (UseSSE < 4)
-    _cpuFeatures &= ~CPU_SSE4;
+  if (UseSSE < 4) {
+    _cpuFeatures &= ~CPU_SSE4_1;
+    _cpuFeatures &= ~CPU_SSE4_2;
+  }
   if (UseSSE < 3) {
     _cpuFeatures &= ~CPU_SSE3;
     _cpuFeatures &= ~CPU_SSSE3;
@@ -204,7 +206,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -215,7 +217,8 @@
                (supports_sse2() ? ", sse2" : ""),
                (supports_sse3() ? ", sse3" : ""),
                (supports_ssse3()? ", ssse3": ""),
-               (supports_sse4() ? ", sse4" : ""),
+               (supports_sse4_1() ? ", sse4.1" : ""),
+               (supports_sse4_2() ? ", sse4.2" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow()   ? ", 3dnow"  : ""),
                (supports_3dnow2()  ? ", 3dnowext" : ""),
@@ -228,7 +231,7 @@
   // older Pentiums which do not support it.
   if( UseSSE > 4 ) UseSSE=4;
   if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4() ) // Drop to 3 if no SSE4 support
+  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
     UseSSE = MIN2((intx)3,UseSSE);
   if( !supports_sse3() ) // Drop to 2 if no SSE3 support
     UseSSE = MIN2((intx)2,UseSSE);
@@ -314,6 +317,14 @@
         MaxLoopPad = 11;
       }
 #endif // COMPILER2
+      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
+      }
+      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
+        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
     }
   }
 
@@ -355,7 +366,7 @@
 
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
-    tty->print_cr("Logical CPUs per package: %u",
+    tty->print_cr("Logical CPUs per core: %u",
                   logical_processors_per_package());
     tty->print_cr("UseSSE=%d",UseSSE);
     tty->print("Allocation: ");
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -68,9 +68,9 @@
                cmpxchg16: 1,
                         : 4,
                dca      : 1,
-                        : 4,
-               popcnt   : 1,
-                        : 8;
+               sse4_1   : 1,
+               sse4_2   : 1,
+                        : 11;
     } bits;
   };
 
@@ -177,8 +177,9 @@
      CPU_SSE2 = (1 << 7),
      CPU_SSE3 = (1 << 8),
      CPU_SSSE3= (1 << 9),
-     CPU_SSE4 = (1 <<10),
-     CPU_SSE4A= (1 <<11)
+     CPU_SSE4A= (1 <<10),
+     CPU_SSE4_1 = (1 << 11),
+     CPU_SSE4_2 = (1 << 12)
    } cpuFeatureFlags;
 
   // cpuid information block.  All info derived from executing cpuid with
@@ -240,22 +241,14 @@
   static CpuidInfo _cpuid_info;
 
   // Extractors and predicates
-  static bool is_extended_cpu_family() {
-    const uint32_t Extended_Cpu_Family = 0xf;
-    return _cpuid_info.std_cpuid1_eax.bits.family == Extended_Cpu_Family;
-  }
   static uint32_t extended_cpu_family() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
-    if (is_extended_cpu_family()) {
-      result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
-    }
+    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
     return result;
   }
   static uint32_t extended_cpu_model() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
-    if (is_extended_cpu_family()) {
-      result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
-    }
+    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
     return result;
   }
   static uint32_t cpu_stepping() {
@@ -293,6 +286,10 @@
       result |= CPU_SSSE3;
     if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
       result |= CPU_SSE4A;
+    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
+      result |= CPU_SSE4_1;
+    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
+      result |= CPU_SSE4_2;
     return result;
   }
 
@@ -380,7 +377,8 @@
   static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
-  static bool supports_sse4()     { return (_cpuFeatures & CPU_SSE4) != 0; }
+  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
+  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   //
   // AMD features
   //
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Jul 05 16:43:17 2017 +0200
@@ -4810,6 +4810,16 @@
   interface(CONST_INTER);
 %}
 
+// Long Immediate zero
+operand immL_M1() %{
+  predicate( n->get_long() == -1L );
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Long immediate from 0 to 127.
 // Used for a shorter form of long mul by 10.
 operand immL_127() %{
@@ -8621,6 +8631,18 @@
   ins_pipe( ialu_reg_reg );
 %}
 
+// Xor Register with Immediate -1
+instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
+  match(Set dst (XorI dst imm));  
+
+  size(2);
+  format %{ "NOT    $dst" %}  
+  ins_encode %{
+     __ notl($dst$$Register);
+  %}
+  ins_pipe( ialu_reg );
+%}
+
 // Xor Register with Immediate
 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (XorI dst src));
@@ -8938,6 +8960,18 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
+// Xor Long Register with Immediate -1
+instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
+  match(Set dst (XorL dst imm));  
+  format %{ "NOT    $dst.lo\n\t"
+            "NOT    $dst.hi" %}
+  ins_encode %{
+     __ notl($dst$$Register);
+     __ notl(HIGH_FROM_LOW($dst$$Register));
+  %}
+  ins_pipe( ialu_reg_long );
+%}
+
 // Xor Long Register with Immediate
 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
   match(Set dst (XorL dst src));
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Jul 05 16:43:17 2017 +0200
@@ -9309,6 +9309,17 @@
   ins_pipe(ialu_reg_reg);
 %}
 
+// Xor Register with Immediate -1
+instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
+  match(Set dst (XorI dst imm));  
+
+  format %{ "not    $dst" %}  
+  ins_encode %{
+     __ notl($dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
 // Xor Register with Immediate
 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 %{
@@ -9529,6 +9540,17 @@
   ins_pipe(ialu_reg_reg);
 %}
 
+// Xor Register with Immediate -1
+instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
+  match(Set dst (XorL dst imm));  
+
+  format %{ "notq   $dst" %}  
+  ins_encode %{
+     __ notq($dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
 // Xor Register with Immediate
 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 %{
--- a/hotspot/src/os/linux/launcher/java.c	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/linux/launcher/java.c	Wed Jul 05 16:43:17 2017 +0200
@@ -1110,7 +1110,7 @@
         if (propname) {
             jclass cls;
             jmethodID mid;
-            NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System"));
+            NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System"));
             NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                                    env, cls,
                                    "getProperty",
@@ -1125,7 +1125,7 @@
 static jboolean isEncodingSupported(JNIEnv *env, jstring enc) {
     jclass cls;
     jmethodID mid;
-    NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset"));
+    NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset"));
     NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                            env, cls,
                            "isSupported",
@@ -1161,7 +1161,7 @@
 #else
             if (isEncodingSupported(env, enc) == JNI_TRUE) {
 #endif
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([BLjava/lang/String;)V"));
                 str = (*env)->NewObject(env, cls, mid, ary, enc);
@@ -1172,7 +1172,7 @@
                   the encoding name, in which the StringCoding class will
                   pickup the iso-8859-1 as the fallback converter for us.
                 */
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([B)V"));
                 str = (*env)->NewObject(env, cls, mid, ary);
@@ -1195,7 +1195,7 @@
     jarray ary;
     int i;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
     NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0));
     for (i = 0; i < strc; i++) {
         jstring str = NewPlatformString(env, *strv++);
@@ -1224,6 +1224,7 @@
         c = *t++;
         *s++ = (c == '.') ? '/' : c;
     } while (c != '\0');
+    // use the application class loader for main-class
     cls = (*env)->FindClass(env, buf);
     free(buf);
 
@@ -1250,7 +1251,7 @@
     jobject jar, man, attr;
     jstring str, result = 0;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile"));
     NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "(Ljava/lang/String;)V"));
     NULL_CHECK0(str = NewPlatformString(env, jarname));
@@ -1471,7 +1472,7 @@
     jclass ver;
     jmethodID print;
 
-    NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version"));
+    NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version"));
     NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V"));
 
     (*env)->CallStaticVoidMethod(env, ver, print);
--- a/hotspot/src/os/linux/launcher/java.h	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/linux/launcher/java.h	Wed Jul 05 16:43:17 2017 +0200
@@ -100,5 +100,15 @@
  * Make launcher spit debug output.
  */
 extern jboolean _launcher_debug;
+/*
+ * This allows for finding classes from the VM's bootstrap class loader
+ * directly, FindClass uses the application class loader internally, this will
+ * cause unnecessary searching of the classpath for the required classes.
+ */
+typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env,
+                                                const char *name,
+                                                jboolean throwError));
+
+jclass FindBootStrapClass(JNIEnv *env, const char *classname);
 
 #endif /* _JAVA_H_ */
--- a/hotspot/src/os/linux/launcher/java_md.c	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/linux/launcher/java_md.c	Wed Jul 05 16:43:17 2017 +0200
@@ -1826,3 +1826,23 @@
 {
     return(borrowed_unsetenv(name));
 }
+/*
+ * The implementation for finding classes from the bootstrap
+ * class loader, refer to java.h
+ */
+static FindClassFromBootLoader_t *findBootClass = NULL;
+
+jclass
+FindBootStrapClass(JNIEnv *env, const char* classname)
+{
+   if (findBootClass == NULL) {
+       findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT,
+          "JVM_FindClassFromBootLoader");
+       if (findBootClass == NULL) {
+           fprintf(stderr, "Error: could load method JVM_FindClassFromBootLoader");
+           return NULL;
+       }
+   }
+   return findBootClass(env, classname, JNI_FALSE);
+}
+
--- a/hotspot/src/os/linux/vm/globals_linux.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/linux/vm/globals_linux.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -38,5 +38,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1261,6 +1261,17 @@
   return (1000 * 1000);
 }
 
+// For now, we say that linux does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime()   { return false; }
+bool os::vtime_enabled()  { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
   timeval time;
   int status = gettimeofday(&time, NULL);
--- a/hotspot/src/os/solaris/launcher/java.c	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/solaris/launcher/java.c	Wed Jul 05 16:43:17 2017 +0200
@@ -1110,7 +1110,7 @@
         if (propname) {
             jclass cls;
             jmethodID mid;
-            NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System"));
+            NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System"));
             NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                                    env, cls,
                                    "getProperty",
@@ -1125,7 +1125,7 @@
 static jboolean isEncodingSupported(JNIEnv *env, jstring enc) {
     jclass cls;
     jmethodID mid;
-    NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset"));
+    NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset"));
     NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
                            env, cls,
                            "isSupported",
@@ -1161,7 +1161,7 @@
 #else
             if (isEncodingSupported(env, enc) == JNI_TRUE) {
 #endif
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([BLjava/lang/String;)V"));
                 str = (*env)->NewObject(env, cls, mid, ary, enc);
@@ -1172,7 +1172,7 @@
                   the encoding name, in which the StringCoding class will
                   pickup the iso-8859-1 as the fallback converter for us.
                 */
-                NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+                NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
                 NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "([B)V"));
                 str = (*env)->NewObject(env, cls, mid, ary);
@@ -1195,7 +1195,7 @@
     jarray ary;
     int i;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
     NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0));
     for (i = 0; i < strc; i++) {
         jstring str = NewPlatformString(env, *strv++);
@@ -1224,6 +1224,7 @@
         c = *t++;
         *s++ = (c == '.') ? '/' : c;
     } while (c != '\0');
+    // use the application class loader for the main-class
     cls = (*env)->FindClass(env, buf);
     free(buf);
 
@@ -1250,7 +1251,7 @@
     jobject jar, man, attr;
     jstring str, result = 0;
 
-    NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile"));
+    NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile"));
     NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
                                           "(Ljava/lang/String;)V"));
     NULL_CHECK0(str = NewPlatformString(env, jarname));
@@ -1471,7 +1472,7 @@
     jclass ver;
     jmethodID print;
 
-    NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version"));
+    NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version"));
     NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V"));
 
     (*env)->CallStaticVoidMethod(env, ver, print);
--- a/hotspot/src/os/solaris/launcher/java.h	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/solaris/launcher/java.h	Wed Jul 05 16:43:17 2017 +0200
@@ -101,4 +101,15 @@
  */
 extern jboolean _launcher_debug;
 
+/*
+ * This allows for finding classes from the VM's bootstrap class loader
+ * directly, FindClass uses the application class loader internally, this will
+ * cause unnecessary searching of the classpath for the required classes.
+ */
+typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env,
+                                                const char *name,
+                                                jboolean throwError));
+
+jclass FindBootStrapClass(JNIEnv *env, const char *classname);
+
 #endif /* _JAVA_H_ */
--- a/hotspot/src/os/solaris/launcher/java_md.c	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/solaris/launcher/java_md.c	Wed Jul 05 16:43:17 2017 +0200
@@ -1826,3 +1826,24 @@
 {
     return(borrowed_unsetenv(name));
 }
+
+/*
+ * The implementation for finding classes from the bootstrap
+ * class loader, refer to java.h
+ */
+static FindClassFromBootLoader_t *findBootClass = NULL;
+
+jclass
+FindBootStrapClass(JNIEnv *env, const char* classname)
+{
+   if (findBootClass == NULL) {
+       findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT,
+          "JVM_FindClassFromBootLoader");
+       if (findBootClass == NULL) {
+           fprintf(stderr, "Error: could not load method JVM_FindClassFromBootLoader");
+           return NULL;
+       }
+   }
+   return findBootClass(env, classname, JNI_FALSE);
+}
+
--- a/hotspot/src/os/solaris/vm/globals_solaris.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/solaris/vm/globals_solaris.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -44,5 +44,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, true);
+define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, false);
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -462,16 +462,14 @@
   int online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
   pid_t pid = getpid();
   psetid_t pset = PS_NONE;
-  // Are we running in a processor set?
+  // Are we running in a processor set or is there any processor set around?
   if (pset_bind(PS_QUERY, P_PID, pid, &pset) == 0) {
-    if (pset != PS_NONE) {
-      uint_t pset_cpus;
-      // Query number of cpus in processor set
-      if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) {
-        assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check");
-        _processors_online = pset_cpus;
-        return pset_cpus;
-      }
+    uint_t pset_cpus;
+    // Query the number of cpus available to us.
+    if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) {
+      assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check");
+      _processors_online = pset_cpus;
+      return pset_cpus;
     }
   }
   // Otherwise return number of online cpus
@@ -1691,6 +1689,40 @@
   }
 }
 
+bool os::supports_vtime() { return true; }
+
+bool os::enable_vtime() {
+  int fd = open("/proc/self/ctl", O_WRONLY);
+  if (fd == -1)
+    return false;
+
+  long cmd[] = { PCSET, PR_MSACCT };
+  int res = write(fd, cmd, sizeof(long) * 2);
+  close(fd);
+  if (res != sizeof(long) * 2)
+    return false;
+
+  return true;
+}
+
+bool os::vtime_enabled() {
+  int fd = open("/proc/self/status", O_RDONLY);
+  if (fd == -1)
+    return false;
+
+  pstatus_t status;
+  int res = read(fd, (void*) &status, sizeof(pstatus_t));
+  close(fd);
+  if (res != sizeof(pstatus_t))
+    return false;
+
+  return status.pr_flags & PR_MSACCT;
+}
+
+double os::elapsedVTime() {
+  return (double)gethrvtime() / (double)hrtime_hz;
+}
+
 // Used internally for comparisons only
 // getTimeMillis guaranteed to not move backwards on Solaris
 jlong getTimeMillis() {
@@ -2688,7 +2720,7 @@
    return bottom;
 }
 
-// Detect the topology change. Typically happens during CPU pluggin-unplugging.
+// Detect the topology change. Typically happens during CPU plugging-unplugging.
 bool os::numa_topology_changed() {
   int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie());
   if (is_stale != -1 && is_stale) {
--- a/hotspot/src/os/windows/vm/globals_windows.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/windows/vm/globals_windows.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -37,5 +37,6 @@
 // platforms, but they may have different default values on other platforms.
 //
 define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePagesIndividualAllocation, true);
 define_pd_global(bool, UseOSErrorReporting, false);  // for now.
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -737,6 +737,17 @@
   return result;
 }
 
+// For now, we say that Windows does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime() { return false; }
+bool os::vtime_enabled() { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
   if (UseFakeTimers) {
     return fake_time++;
@@ -2582,9 +2593,104 @@
 }
 
 char* os::reserve_memory_special(size_t bytes) {
-  DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
-  char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_EXECUTE_READWRITE);
-  return res;
+
+  if (UseLargePagesIndividualAllocation) {
+    if (TracePageSizes && Verbose) {
+       tty->print_cr("Reserving large pages individually.");
+    }
+    char * p_buf;
+    // first reserve enough address space in advance since we want to be
+    // able to break a single contiguous virtual address range into multiple
+    // large page commits but WS2003 does not allow reserving large page space
+    // so we just use 4K pages for reserve, this gives us a legal contiguous
+    // address space. then we will deallocate that reservation, and re alloc
+    // using large pages
+    const size_t size_of_reserve = bytes + _large_page_size;
+    if (bytes > size_of_reserve) {
+      // Overflowed.
+      warning("Individually allocated large pages failed, "
+        "use -XX:-UseLargePagesIndividualAllocation to turn off");
+      return NULL;
+    }
+    p_buf = (char *) VirtualAlloc(NULL,
+                                 size_of_reserve,  // size of Reserve
+                                 MEM_RESERVE,
+                                 PAGE_EXECUTE_READWRITE);
+    // If reservation failed, return NULL
+    if (p_buf == NULL) return NULL;
+
+    release_memory(p_buf, bytes + _large_page_size);
+    // round up to page boundary.  If the size_of_reserve did not
+    // overflow and the reservation did not fail, this align up
+    // should not overflow.
+    p_buf = (char *) align_size_up((size_t)p_buf, _large_page_size);
+
+    // now go through and allocate one page at a time until all bytes are
+    // allocated
+    size_t  bytes_remaining = align_size_up(bytes, _large_page_size);
+    // An overflow of align_size_up() would have been caught above
+    // in the calculation of size_of_reserve.
+    char * next_alloc_addr = p_buf;
+
+#ifdef ASSERT
+    // Variable for the failure injection
+    long ran_num = os::random();
+    size_t fail_after = ran_num % bytes;
+#endif
+
+    while (bytes_remaining) {
+      size_t bytes_to_rq = MIN2(bytes_remaining, _large_page_size);
+      // Note allocate and commit
+      char * p_new;
+
+#ifdef ASSERT
+      bool inject_error = LargePagesIndividualAllocationInjectError &&
+          (bytes_remaining <= fail_after);
+#else
+      const bool inject_error = false;
+#endif
+
+      if (inject_error) {
+        p_new = NULL;
+      } else {
+        p_new = (char *) VirtualAlloc(next_alloc_addr,
+                                    bytes_to_rq,
+                                    MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
+                                    PAGE_EXECUTE_READWRITE);
+      }
+
+      if (p_new == NULL) {
+        // Free any allocated pages
+        if (next_alloc_addr > p_buf) {
+          // Some memory was committed so release it.
+          size_t bytes_to_release = bytes - bytes_remaining;
+          release_memory(p_buf, bytes_to_release);
+        }
+#ifdef ASSERT
+        if (UseLargePagesIndividualAllocation &&
+            LargePagesIndividualAllocationInjectError) {
+          if (TracePageSizes && Verbose) {
+             tty->print_cr("Reserving large pages individually failed.");
+          }
+        }
+#endif
+        return NULL;
+      }
+      bytes_remaining -= bytes_to_rq;
+      next_alloc_addr += bytes_to_rq;
+    }
+
+    return p_buf;
+
+  } else {
+    // normal policy just allocate it all at once
+    DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
+    char * res = (char *)VirtualAlloc(NULL,
+                                      bytes,
+                                      flag,
+                                      PAGE_EXECUTE_READWRITE);
+    return res;
+  }
 }
 
 bool os::release_memory_special(char* base, size_t bytes) {
@@ -2972,6 +3078,7 @@
 volatile intx os::win32::_os_thread_count    = 0;
 
 bool   os::win32::_is_nt              = false;
+bool   os::win32::_is_windows_2003    = false;
 
 
 void os::win32::initialize_system_info() {
@@ -2994,7 +3101,15 @@
   GetVersionEx(&oi);
   switch(oi.dwPlatformId) {
     case VER_PLATFORM_WIN32_WINDOWS: _is_nt = false; break;
-    case VER_PLATFORM_WIN32_NT:      _is_nt = true;  break;
+    case VER_PLATFORM_WIN32_NT:
+      _is_nt = true;
+      {
+        int os_vers = oi.dwMajorVersion * 1000 + oi.dwMinorVersion;
+        if (os_vers == 5002) {
+          _is_windows_2003 = true;
+        }
+      }
+      break;
     default: fatal("Unknown platform");
   }
 
@@ -3092,9 +3207,13 @@
     NoYieldsInMicrolock = true;
   }
 #endif
+  // This may be overridden later when argument processing is done.
+  FLAG_SET_ERGO(bool, UseLargePagesIndividualAllocation,
+    os::win32::is_windows_2003());
+
   // Initialize main_process and main_thread
   main_process = GetCurrentProcess();  // Remember main_process is a pseudo handle
-  if (!DuplicateHandle(main_process, GetCurrentThread(), main_process,
+ if (!DuplicateHandle(main_process, GetCurrentThread(), main_process,
                        &main_thread, THREAD_ALL_ACCESS, false, 0)) {
     fatal("DuplicateHandle failed\n");
   }
--- a/hotspot/src/os/windows/vm/os_windows.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/os/windows/vm/os_windows.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -34,6 +34,7 @@
   static julong _physical_memory;
   static size_t _default_stack_size;
   static bool   _is_nt;
+  static bool   _is_windows_2003;
 
  public:
   // Windows-specific interface:
@@ -60,6 +61,9 @@
   // Tells whether the platform is NT or Windown95
   static bool is_nt() { return _is_nt; }
 
+  // Tells whether the platform is Windows 2003
+  static bool is_windows_2003() { return _is_windows_2003; }
+
   // Returns the byte size of a virtual memory page
   static int vm_page_size() { return _vm_page_size; }
 
--- a/hotspot/src/share/vm/adlc/formssel.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/adlc/formssel.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -3768,6 +3768,10 @@
 int MatchRule::is_ideal_copy() const {
   if( _rChild ) {
     const char  *opType = _rChild->_opType;
+#if 1
+    if( strcmp(opType,"CastIP")==0 )
+      return 1;
+#else
     if( strcmp(opType,"CastII")==0 )
       return 1;
     // Do not treat *CastPP this way, because it
@@ -3787,6 +3791,7 @@
     //  return 1;
     //if( strcmp(opType,"CastP2X")==0 )
     //  return 1;
+#endif
   }
   if( is_chain_rule(_AD.globalNames()) &&
       _lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 )
--- a/hotspot/src/share/vm/asm/assembler.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/asm/assembler.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -249,8 +249,6 @@
 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   // Exception handler checks the nmethod's implicit null checks table
   // only when this method returns false.
-#ifndef SPARC
-  // Sparc does not have based addressing
   if (UseCompressedOops) {
     // The first page after heap_base is unmapped and
     // the 'offset' is equal to [heap_base + offset] for
@@ -261,7 +259,6 @@
       offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
     }
   }
-#endif // SPARC
   return offset < 0 || os::vm_page_size() <= offset;
 }
 
--- a/hotspot/src/share/vm/c1/c1_CodeStubs.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_CodeStubs.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -482,3 +482,81 @@
   virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); }
 #endif // PRODUCT
 };
+
+//////////////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+// Code stubs for Garbage-First barriers.
+class G1PreBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _pre_val;
+  LIR_PatchCode _patch_code;
+  CodeEmitInfo* _info;
+
+ public:
+  // pre_val (a temporary register) must be a register;
+  // addr (the address of the field to be read) must be a LIR_Address
+  G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) :
+    _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info)
+  {
+    assert(_pre_val->is_register(), "should be temporary register");
+    assert(_addr->is_address(), "should be the address of the field");
+  }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr pre_val() const { return _pre_val; }
+  LIR_PatchCode patch_code() const { return _patch_code; }
+  CodeEmitInfo* info() const { return _info; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast
+    // path
+    if (_info != NULL)
+      visitor->do_slow_case(_info);
+    else
+      visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_temp(_pre_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); }
+#endif // PRODUCT
+};
+
+class G1PostBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _new_val;
+
+  static jbyte* _byte_map_base;
+  static jbyte* byte_map_base_slow();
+  static jbyte* byte_map_base() {
+    if (_byte_map_base == NULL) {
+      _byte_map_base = byte_map_base_slow();
+    }
+    return _byte_map_base;
+  }
+
+ public:
+  // addr (the address of the object head) and new_val must be registers.
+  G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr new_val() const { return _new_val; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast path
+    visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_input(_new_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
+#endif // PRODUCT
+};
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////////////
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -74,6 +74,7 @@
 LIR_Assembler::LIR_Assembler(Compilation* c):
    _compilation(c)
  , _masm(c->masm())
+ , _bs(Universe::heap()->barrier_set())
  , _frame_map(c->frame_map())
  , _current_block(NULL)
  , _pending_non_safepoint(NULL)
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -24,11 +24,13 @@
 
 class Compilation;
 class ScopeValue;
+class BarrierSet;
 
 class LIR_Assembler: public CompilationResourceObj {
  private:
   C1_MacroAssembler* _masm;
   CodeStubList*      _slow_case_stubs;
+  BarrierSet*        _bs;
 
   Compilation*       _compilation;
   FrameMap*          _frame_map;
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -285,16 +285,7 @@
 
 
 void LIRGenerator::init() {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-#ifdef _LP64
-  _card_table_base = new LIR_Const((jlong)ct->byte_map_base);
-#else
-  _card_table_base = new LIR_Const((jint)ct->byte_map_base);
-#endif
+  _bs = Universe::heap()->barrier_set();
 }
 
 
@@ -1239,8 +1230,37 @@
 
 // Various barriers
 
+void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  // Do the pre-write barrier, if any.
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info);
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      // No pre barriers
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      // No pre barriers
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
-  switch (Universe::heap()->barrier_set()->kind()) {
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_post_barrier(addr,  new_val);
+      break;
+#endif // SERIALGC
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
       CardTableModRef_post_barrier(addr,  new_val);
@@ -1254,11 +1274,120 @@
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  if (G1DisablePreBarrier) return;
+
+  // First we test whether marking is in progress.
+  BasicType flag_type;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    flag_type = T_INT;
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    flag_type = T_BYTE;
+  }
+  LIR_Opr thrd = getThreadPointer();
+  LIR_Address* mark_active_flag_addr =
+    new LIR_Address(thrd,
+                    in_bytes(JavaThread::satb_mark_queue_offset() +
+                             PtrQueue::byte_offset_of_active()),
+                    flag_type);
+  // Read the marking-in-progress flag.
+  LIR_Opr flag_val = new_register(T_INT);
+  __ load(mark_active_flag_addr, flag_val);
+
+  LabelObj* start_store = new LabelObj();
+
+  LIR_PatchCode pre_val_patch_code =
+    patch ? lir_patch_normal : lir_patch_none;
+
+  LIR_Opr pre_val = new_register(T_OBJECT);
+
+  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+  if (!addr_opr->is_address()) {
+    assert(addr_opr->is_register(), "must be");
+    addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT));
+  }
+  CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
+                                        info);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
+  if (G1DisablePostBarrier) return;
+
+  // If the "new_val" is a constant NULL, no barrier is necessary.
+  if (new_val->is_constant() &&
+      new_val->as_constant_ptr()->as_jobject() == NULL) return;
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    if (new_val->is_constant()) {
+      __ move(new_val, new_val_reg);
+    } else {
+      __ leal(new_val, new_val_reg);
+    }
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  if (addr->is_address()) {
+    LIR_Address* address = addr->as_address_ptr();
+    LIR_Opr ptr = new_pointer_register();
+    if (!address->index()->is_valid() && address->disp() == 0) {
+      __ move(address->base(), ptr);
+    } else {
+      assert(address->disp() != max_jint, "lea doesn't support patched addresses!");
+      __ leal(addr, ptr);
+    }
+    addr = ptr;
+  }
+  assert(addr->is_register(), "must be a register at this point");
+
+  LIR_Opr xor_res = new_pointer_register();
+  LIR_Opr xor_shift_res = new_pointer_register();
+
+  if (TwoOperandLIRForm ) {
+    __ move(addr, xor_res);
+    __ logical_xor(xor_res, new_val, xor_res);
+    __ move(xor_res, xor_shift_res);
+    __ unsigned_shift_right(xor_shift_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  } else {
+    __ logical_xor(addr, new_val, xor_res);
+    __ unsigned_shift_right(xor_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  }
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    __ leal(new_val, new_val_reg);
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
+
+  CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+#endif // SERIALGC
+////////////////////////////////////////////////////////////////////////
+
 void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
 
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
-  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base);
+  assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
+  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base);
   if (addr->is_address()) {
     LIR_Address* address = addr->as_address_ptr();
     LIR_Opr ptr = new_register(T_OBJECT);
@@ -1388,6 +1517,13 @@
     __ membar_release();
   }
 
+  if (is_oop) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(address),
+                needs_patching,
+                (info ? new CodeEmitInfo(info) : NULL));
+  }
+
   if (is_volatile) {
     assert(!needs_patching && x->is_loaded(),
            "how do we know it's volatile if it's not loaded");
@@ -1398,7 +1534,12 @@
   }
 
   if (is_oop) {
+#ifdef PRECISE_CARDMARK
+    // Precise cardmarks don't work
+    post_barrier(LIR_OprFact::address(address), value.result());
+#else
     post_barrier(object.result(), value.result());
+#endif // PRECISE_CARDMARK
   }
 
   if (is_volatile && os::is_MP()) {
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -145,6 +145,7 @@
 
 // only the classes below belong in the same file
 class LIRGenerator: public InstructionVisitor, public BlockClosure {
+
  private:
   Compilation*  _compilation;
   ciMethod*     _method;    // method that we are compiling
@@ -154,6 +155,7 @@
   Values        _instruction_for_operand;
   BitMap2D      _vreg_flags; // flags which can be set on a per-vreg basis
   LIR_List*     _lir;
+  BarrierSet*   _bs;
 
   LIRGenerator* gen() {
     return this;
@@ -174,8 +176,6 @@
   LIR_OprList                     _reg_for_constants;
   Values                          _unpinned_constants;
 
-  LIR_Const*                      _card_table_base;
-
   friend class PhiResolver;
 
   // unified bailout support
@@ -196,8 +196,6 @@
   LIR_Opr load_constant(Constant* x);
   LIR_Opr load_constant(LIR_Const* constant);
 
-  LIR_Const* card_table_base() const { return _card_table_base; }
-
   void  set_result(Value x, LIR_Opr opr)           {
     assert(opr->is_valid(), "must set to valid value");
     assert(x->operand()->is_illegal(), "operand should never change");
@@ -253,12 +251,17 @@
 
   // generic interface
 
+  void pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
   void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
 
   // specific implementations
+  // pre barriers
+
+  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
 
   // post barriers
 
+  void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
   void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
 
 
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -168,6 +168,8 @@
   switch (id) {
     // These stubs don't need to have an oopmap
     case dtrace_object_alloc_id:
+    case g1_pre_barrier_slow_id:
+    case g1_post_barrier_slow_id:
     case slow_subtype_check_id:
     case fpu2long_stub_id:
     case unwind_exception_id:
--- a/hotspot/src/share/vm/c1/c1_Runtime1.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -56,6 +56,8 @@
   stub(access_field_patching)        \
   stub(load_klass_patching)          \
   stub(jvmti_exception_throw)        \
+  stub(g1_pre_barrier_slow)          \
+  stub(g1_post_barrier_slow)         \
   stub(fpu2long_stub)                \
   stub(counter_overflow)             \
   last_entry(number_of_ids)
--- a/hotspot/src/share/vm/c1/c1_globals.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_globals.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -213,9 +213,6 @@
   develop(bool, UseFastLocking, true,                                       \
           "Use fast inlined locking code")                                  \
                                                                             \
-  product(bool, FastTLABRefill, true,                                       \
-          "Use fast TLAB refill code")                                      \
-                                                                            \
   develop(bool, UseSlowPath, false,                                         \
           "For debugging: test slow cases by always using them")            \
                                                                             \
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/ci/ciMethodBlocks.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -49,7 +49,7 @@
 // first half.  Returns the range beginning at bci.
 ciBlock *ciMethodBlocks::split_block_at(int bci) {
   ciBlock *former_block = block_containing(bci);
-  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci());
+  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci());
   _blocks->append(new_block);
   assert(former_block != NULL, "must not be NULL");
   new_block->set_limit_bci(bci);
@@ -83,7 +83,7 @@
   if (cb == NULL ) {
     // This is our first time visiting this bytecode.  Create
     // a fresh block and assign it this starting point.
-    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci);
+    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci);
     _blocks->append(nb);
      _bci_to_block[bci] = nb;
     return nb;
@@ -98,6 +98,11 @@
   }
 }
 
+ciBlock *ciMethodBlocks::make_dummy_block() {
+  ciBlock *dum = new(_arena) ciBlock(_method, -1, 0);
+  return dum;
+}
+
 void ciMethodBlocks::do_analysis() {
   ciBytecodeStream s(_method);
   ciBlock *cur_block = block_containing(0);
@@ -253,7 +258,7 @@
   Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord));
 
   // create initial block covering the entire method
-  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0);
+  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0);
   _blocks->append(b);
   _bci_to_block[0] = b;
 
@@ -334,7 +339,7 @@
 #endif
 
 
-ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) :
+ciBlock::ciBlock(ciMethod *method, int index, int start_bci) :
 #ifndef PRODUCT
                          _method(method),
 #endif
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/ci/ciMethodBlocks.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -48,6 +48,8 @@
   int num_blocks()  { return _num_blocks;}
   void clear_processed();
 
+  ciBlock *make_dummy_block(); // a block not associated with a bci
+
 #ifndef PRODUCT
   void dump();
 #endif
@@ -81,7 +83,7 @@
     fall_through_bci = -1
   };
 
-  ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci);
+  ciBlock(ciMethod *method, int index, int start_bci);
   int start_bci() const         { return _start_bci; }
   int limit_bci() const         { return _limit_bci; }
   int control_bci() const       { return _control_bci; }
@@ -94,7 +96,6 @@
   int ex_limit_bci() const      { return _ex_limit_bci; }
   bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); }
 
-
   // flag handling
   bool  processed() const           { return (_flags & Processed) != 0; }
   bool  is_handler() const          { return (_flags & Handler) != 0; }
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -338,8 +338,10 @@
   }
   _trap_bci = -1;
   _trap_index = 0;
+  _def_locals.clear();
 }
 
+
 // ------------------------------------------------------------------
 // ciTypeFlow::get_start_state
 //
@@ -735,7 +737,7 @@
 void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) {
   bool will_link;
   ciKlass* klass = str->get_klass(will_link);
-  if (!will_link) {
+  if (!will_link || str->is_unresolved_klass()) {
     trap(str, klass, str->get_klass_index());
   } else {
     push_object(klass);
@@ -1268,7 +1270,9 @@
     }
   case Bytecodes::_iinc:
     {
-      check_int(local(str->get_index()));
+      int lnum = str->get_index();
+      check_int(local(lnum));
+      store_to_local(lnum);
       break;
     }
   case Bytecodes::_iload:   load_local_int(str->get_index()); break;
@@ -1506,6 +1510,46 @@
 }
 #endif
 
+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::next
+//
+void ciTypeFlow::SuccIter::next() {
+  int succ_ct = _pred->successors()->length();
+  int next = _index + 1;
+  if (next < succ_ct) {
+    _index = next;
+    _succ = _pred->successors()->at(next);
+    return;
+  }
+  for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) {
+    // Do not compile any code for unloaded exception types.
+    // Following compiler passes are responsible for doing this also.
+    ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i);
+    if (exception_klass->is_loaded()) {
+      _index = next;
+      _succ = _pred->exceptions()->at(i);
+      return;
+    }
+    next++;
+  }
+  _index = -1;
+  _succ = NULL;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::set_succ
+//
+void ciTypeFlow::SuccIter::set_succ(Block* succ) {
+  int succ_ct = _pred->successors()->length();
+  if (_index < succ_ct) {
+    _pred->successors()->at_put(_index, succ);
+  } else {
+    int idx = _index - succ_ct;
+    _pred->exceptions()->at_put(idx, succ);
+  }
+}
+
 // ciTypeFlow::Block
 //
 // A basic block.
@@ -1526,10 +1570,11 @@
   _jsrs = new_jsrs;
   _next = NULL;
   _on_work_list = false;
-  _pre_order = -1; assert(!has_pre_order(), "");
-  _private_copy = false;
+  _backedge_copy = false;
+  _exception_entry = false;
   _trap_bci = -1;
   _trap_index = 0;
+  df_init();
 
   if (CITraceTypeFlow) {
     tty->print_cr(">> Created new block");
@@ -1541,55 +1586,13 @@
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::clone_loop_head
-//
-ciTypeFlow::Block*
-ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer,
-                                   int branch_bci,
-                                   ciTypeFlow::Block* target,
-                                   ciTypeFlow::JsrSet* jsrs) {
-  // Loop optimizations are not performed on Tier1 compiles. Do nothing.
-  if (analyzer->env()->comp_level() < CompLevel_full_optimization) {
-    return target;
-  }
-
-  // The current block ends with a branch.
-  //
-  // If the target block appears to be the test-clause of a for loop, and
-  // it is not too large, and it has not yet been cloned, clone it.
-  // The pre-existing copy becomes the private clone used only by
-  // the initial iteration of the loop.  (We know we are simulating
-  // the initial iteration right now, since we have never calculated
-  // successors before for this block.)
-
-  if (branch_bci <= start()
-      && (target->limit() - target->start()) <= CICloneLoopTestLimit
-      && target->private_copy_count() == 0) {
-    // Setting the private_copy bit ensures that the target block cannot be
-    // reached by any other paths, such as fall-in from the loop body.
-    // The private copy will be accessible only on successor lists
-    // created up to this point.
-    target->set_private_copy(true);
-    if (CITraceTypeFlow) {
-      tty->print(">> Cloning a test-clause block ");
-      print_value_on(tty);
-      tty->cr();
-    }
-    // If the target is the current block, then later on a new copy of the
-    // target block will be created when its bytecodes are reached by
-    // an alternate path. (This is the case for loops with the loop
-    // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.)
-    //
-    // Otherwise, duplicate the target block now and use it immediately.
-    // (The case for loops with the loop head at the bci-wise top of the
-    // loop, as with 1.4.2 javac.)
-    //
-    // In either case, the new copy of the block will remain public.
-    if (target != this) {
-      target = analyzer->block_at(branch_bci, jsrs);
-    }
-  }
-  return target;
+// ciTypeFlow::Block::df_init
+void ciTypeFlow::Block::df_init() {
+  _pre_order = -1; assert(!has_pre_order(), "");
+  _post_order = -1; assert(!has_post_order(), "");
+  _loop = NULL;
+  _irreducible_entry = false;
+  _rpo_next = NULL;
 }
 
 // ------------------------------------------------------------------
@@ -1644,7 +1647,6 @@
       case Bytecodes::_ifnull:       case Bytecodes::_ifnonnull:
         // Our successors are the branch target and the next bci.
         branch_bci = str->get_dest();
-        clone_loop_head(analyzer, branch_bci, this, jsrs);
         _successors =
           new (arena) GrowableArray<Block*>(arena, 2, 0, NULL);
         assert(_successors->length() == IF_NOT_TAKEN, "");
@@ -1658,14 +1660,7 @@
         _successors =
           new (arena) GrowableArray<Block*>(arena, 1, 0, NULL);
         assert(_successors->length() == GOTO_TARGET, "");
-        target = analyzer->block_at(branch_bci, jsrs);
-        // If the target block has not been visited yet, and looks like
-        // a two-way branch, attempt to clone it if it is a loop head.
-        if (target->_successors != NULL
-            && target->_successors->length() == (IF_TAKEN + 1)) {
-          target = clone_loop_head(analyzer, branch_bci, target, jsrs);
-        }
-        _successors->append(target);
+        _successors->append(analyzer->block_at(branch_bci, jsrs));
         break;
 
       case Bytecodes::_jsr:
@@ -1801,65 +1796,60 @@
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::is_simpler_than
-//
-// A relation used to order our work list.  We work on a block earlier
-// if it has a smaller jsr stack or it occurs earlier in the program
-// text.
-//
-// Note: maybe we should redo this functionality to make blocks
-// which correspond to exceptions lower priority.
-bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) {
-  if (other == NULL) {
-    return true;
-  } else {
-    int size1 = _jsrs->size();
-    int size2 = other->_jsrs->size();
-    if (size1 < size2) {
-      return true;
-    } else if (size2 < size1) {
-      return false;
-    } else {
-#if 0
-      if (size1 > 0) {
-        int r1 = _jsrs->record_at(0)->return_address();
-        int r2 = _jsrs->record_at(0)->return_address();
-        if (r1 < r2) {
-          return true;
-        } else if (r2 < r1) {
-          return false;
-        } else {
-          int e1 = _jsrs->record_at(0)->return_address();
-          int e2 = _jsrs->record_at(0)->return_address();
-          if (e1 < e2) {
-            return true;
-          } else if (e2 < e1) {
-            return false;
-          }
-        }
-      }
-#endif
-      return (start() <= other->start());
-    }
-  }
+// ciTypeFlow::Block::set_backedge_copy
+// Use this only to make a pre-existing public block into a backedge copy.
+void ciTypeFlow::Block::set_backedge_copy(bool z) {
+  assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public");
+  _backedge_copy = z;
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::set_private_copy
-// Use this only to make a pre-existing public block into a private copy.
-void ciTypeFlow::Block::set_private_copy(bool z) {
-  assert(z || (z == is_private_copy()), "cannot make a private copy public");
-  _private_copy = z;
+// ciTypeFlow::Block::is_clonable_exit
+//
+// At most 2 normal successors, one of which continues looping,
+// and all exceptional successors must exit.
+bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) {
+  int normal_cnt  = 0;
+  int in_loop_cnt = 0;
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (iter.is_normal_ctrl()) {
+      if (++normal_cnt > 2) return false;
+      if (lp->contains(succ->loop())) {
+        if (++in_loop_cnt > 1) return false;
+      }
+    } else {
+      if (lp->contains(succ->loop())) return false;
+    }
+  }
+  return in_loop_cnt == 1;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Block::looping_succ
+//
+ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) {
+  assert(successors()->length() <= 2, "at most 2 normal successors");
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (lp->contains(succ->loop())) {
+      return succ;
+    }
+  }
+  return NULL;
 }
 
 #ifndef PRODUCT
 // ------------------------------------------------------------------
 // ciTypeFlow::Block::print_value_on
 void ciTypeFlow::Block::print_value_on(outputStream* st) const {
-  if (has_pre_order())  st->print("#%-2d ", pre_order());
+  if (has_pre_order()) st->print("#%-2d ", pre_order());
+  if (has_rpo())       st->print("rpo#%-2d ", rpo());
   st->print("[%d - %d)", start(), limit());
+  if (is_loop_head()) st->print(" lphd");
+  if (is_irreducible_entry()) st->print(" irred");
   if (_jsrs->size() > 0) { st->print("/");  _jsrs->print_on(st); }
-  if (is_private_copy())  st->print("/private_copy");
+  if (is_backedge_copy())  st->print("/backedge_copy");
 }
 
 // ------------------------------------------------------------------
@@ -1871,6 +1861,16 @@
   st->print_cr("  ====================================================  ");
   st->print ("  ");
   print_value_on(st);
+  st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr();
+  if (loop() && loop()->parent() != NULL) {
+    st->print(" loops:");
+    Loop* lp = loop();
+    do {
+      st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order());
+      if (lp->is_irreducible()) st->print("(ir)");
+      lp = lp->parent();
+    } while (lp->parent() != NULL);
+  }
   st->cr();
   _state->print_on(st);
   if (_successors == NULL) {
@@ -1907,6 +1907,21 @@
 }
 #endif
 
+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::LocalSet::print_on
+void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const {
+  st->print("{");
+  for (int i = 0; i < max; i++) {
+    if (test(i)) st->print(" %d", i);
+  }
+  if (limit > max) {
+    st->print(" %d..%d ", max, limit);
+  }
+  st->print(" }");
+}
+#endif
+
 // ciTypeFlow
 //
 // This is a pass over the bytecodes which computes the following:
@@ -1922,12 +1937,11 @@
   _max_locals = method->max_locals();
   _max_stack = method->max_stack();
   _code_size = method->code_size();
+  _has_irreducible_entry = false;
   _osr_bci = osr_bci;
   _failure_reason = NULL;
   assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument");
-
   _work_list = NULL;
-  _next_pre_order = 0;
 
   _ciblock_count = _methodBlocks->num_blocks();
   _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray<Block*>*, _ciblock_count);
@@ -1949,12 +1963,6 @@
   _work_list = next_block->next();
   next_block->set_next(NULL);
   next_block->set_on_work_list(false);
-  if (!next_block->has_pre_order()) {
-    // Assign "pre_order" as each new block is taken from the work list.
-    // This number may be used by following phases to order block visits.
-    assert(!have_block_count(), "must not have mapped blocks yet")
-    next_block->set_pre_order(_next_pre_order++);
-  }
   return next_block;
 }
 
@@ -1962,30 +1970,37 @@
 // ciTypeFlow::add_to_work_list
 //
 // Add a basic block to our work list.
+// List is sorted by decreasing postorder sort (same as increasing RPO)
 void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) {
   assert(!block->is_on_work_list(), "must not already be on work list");
 
   if (CITraceTypeFlow) {
-    tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : "");
+    tty->print(">> Adding block ");
     block->print_value_on(tty);
     tty->print_cr(" to the work list : ");
   }
 
   block->set_on_work_list(true);
-  if (block->is_simpler_than(_work_list)) {
+
+  // decreasing post order sort
+
+  Block* prev = NULL;
+  Block* current = _work_list;
+  int po = block->post_order();
+  while (current != NULL) {
+    if (!current->has_post_order() || po > current->post_order())
+      break;
+    prev = current;
+    current = current->next();
+  }
+  if (prev == NULL) {
     block->set_next(_work_list);
     _work_list = block;
   } else {
-    Block *temp = _work_list;
-    while (!block->is_simpler_than(temp->next())) {
-      if (CITraceTypeFlow) {
-        tty->print(".");
-      }
-      temp = temp->next();
-    }
-    block->set_next(temp->next());
-    temp->set_next(block);
+    block->set_next(current);
+    prev->set_next(block);
   }
+
   if (CITraceTypeFlow) {
     tty->cr();
   }
@@ -2008,7 +2023,7 @@
   assert(ciblk->start_bci() == bci, "bad ciBlock boundaries");
   Block* block = get_block_for(ciblk->index(), jsrs, option);
 
-  assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result");
+  assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result");
 
   if (CITraceTypeFlow) {
     if (block != NULL) {
@@ -2072,8 +2087,9 @@
     }
 
     if (block->meet_exception(exception_klass, state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
         add_to_work_list(block);
       }
     }
@@ -2091,8 +2107,9 @@
   for (int i = 0; i < len; i++) {
     Block* block = successors->at(i);
     if (block->meet(state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
         add_to_work_list(block);
       }
     }
@@ -2133,6 +2150,111 @@
   return true;
 }
 
+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_heads
+//
+// Clone the loop heads
+bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  bool rslt = false;
+  for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) {
+    lp = iter.current();
+    Block* head = lp->head();
+    if (lp == loop_tree_root() ||
+        lp->is_irreducible() ||
+        !head->is_clonable_exit(lp))
+      continue;
+
+    // check not already cloned
+    if (head->backedge_copy_count() != 0)
+      continue;
+
+    // check _no_ shared head below us
+    Loop* ch;
+    for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling());
+    if (ch != NULL)
+      continue;
+
+    // Clone head
+    Block* new_head = head->looping_succ(lp);
+    Block* clone = clone_loop_head(lp, temp_vector, temp_set);
+    // Update lp's info
+    clone->set_loop(lp);
+    lp->set_head(new_head);
+    lp->set_tail(clone);
+    // And move original head into outer loop
+    head->set_loop(lp->parent());
+
+    rslt = true;
+  }
+  return rslt;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_head
+//
+// Clone lp's head and replace tail's successors with clone.
+//
+//  |
+//  v
+// head <-> body
+//  |
+//  v
+// exit
+//
+// new_head
+//
+//  |
+//  v
+// head ----------\
+//  |             |
+//  |             v
+//  |  clone <-> body
+//  |    |
+//  | /--/
+//  | |
+//  v v
+// exit
+//
+ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  Block* head = lp->head();
+  Block* tail = lp->tail();
+  if (CITraceTypeFlow) {
+    tty->print(">> Requesting clone of loop head "); head->print_value_on(tty);
+    tty->print("  for predecessor ");                tail->print_value_on(tty);
+    tty->cr();
+  }
+  Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy);
+  assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges");
+
+  assert(!clone->has_pre_order(), "just created");
+  clone->set_next_pre_order();
+
+  // Insert clone after (orig) tail in reverse post order
+  clone->set_rpo_next(tail->rpo_next());
+  tail->set_rpo_next(clone);
+
+  // tail->head becomes tail->clone
+  for (SuccIter iter(tail); !iter.done(); iter.next()) {
+    if (iter.succ() == head) {
+      iter.set_succ(clone);
+      break;
+    }
+  }
+  flow_block(tail, temp_vector, temp_set);
+  if (head == tail) {
+    // For self-loops, clone->head becomes clone->clone
+    flow_block(clone, temp_vector, temp_set);
+    for (SuccIter iter(clone); !iter.done(); iter.next()) {
+      if (iter.succ() == head) {
+        iter.set_succ(clone);
+        break;
+      }
+    }
+  }
+  flow_block(clone, temp_vector, temp_set);
+
+  return clone;
+}
 
 // ------------------------------------------------------------------
 // ciTypeFlow::flow_block
@@ -2159,11 +2281,14 @@
 
   // Grab the state from the current block.
   block->copy_state_into(state);
+  state->def_locals()->clear();
 
   GrowableArray<Block*>*           exceptions = block->exceptions();
   GrowableArray<ciInstanceKlass*>* exc_klasses = block->exc_klasses();
   bool has_exceptions = exceptions->length() > 0;
 
+  bool exceptions_used = false;
+
   ciBytecodeStream str(method());
   str.reset_to_bci(start);
   Bytecodes::Code code;
@@ -2172,6 +2297,7 @@
     // Check for exceptional control flow from this point.
     if (has_exceptions && can_trap(str)) {
       flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
     }
     // Apply the effects of the current bytecode to our state.
     bool res = state->apply_one_bytecode(&str);
@@ -2189,9 +2315,14 @@
         block->print_on(tty);
       }
 
+      // Save set of locals defined in this block
+      block->def_locals()->add(state->def_locals());
+
       // Record (no) successors.
       block->successors(&str, state, jsrs);
 
+      assert(!has_exceptions || exceptions_used, "Not removing exceptions");
+
       // Discontinue interpretation of this Block.
       return;
     }
@@ -2202,6 +2333,7 @@
     // Check for exceptional control flow from this point.
     if (has_exceptions && can_trap(str)) {
       flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
     }
 
     // Fix the JsrSet to reflect effect of the bytecode.
@@ -2218,11 +2350,306 @@
     successors = block->successors(&str, NULL, NULL);
   }
 
+  // Save set of locals defined in this block
+  block->def_locals()->add(state->def_locals());
+
+  // Remove untaken exception paths
+  if (!exceptions_used)
+    exceptions->clear();
+
   // Pass our state to successors.
   flow_successors(successors, state);
 }
 
 // ------------------------------------------------------------------
+// ciTypeFlow::PostOrderLoops::next
+//
+// Advance to next loop tree using a postorder, left-to-right traversal.
+void ciTypeFlow::PostorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+    while (_current->child() != NULL) {
+      _current = _current->child();
+    }
+  } else {
+    _current = _current->parent();
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::PreOrderLoops::next
+//
+// Advance to next loop tree using a preorder, left-to-right traversal.
+void ciTypeFlow::PreorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->child() != NULL) {
+    _current = _current->child();
+  } else if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+  } else {
+    while (_current != _root && _current->sibling() == NULL) {
+      _current = _current->parent();
+    }
+    if (_current == _root) {
+      _current = NULL;
+      assert(done(), "must be done.");
+    } else {
+      assert(_current->sibling() != NULL, "must be more to do");
+      _current = _current->sibling();
+    }
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::sorted_merge
+//
+// Merge the branch lp into this branch, sorting on the loop head
+// pre_orders. Returns the leaf of the merged branch.
+// Child and sibling pointers will be setup later.
+// Sort is (looking from leaf towards the root)
+//  descending on primary key: loop head's pre_order, and
+//  ascending  on secondary key: loop tail's pre_order.
+ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) {
+  Loop* leaf = this;
+  Loop* prev = NULL;
+  Loop* current = leaf;
+  while (lp != NULL) {
+    int lp_pre_order = lp->head()->pre_order();
+    // Find insertion point for "lp"
+    while (current != NULL) {
+      if (current == lp)
+        return leaf; // Already in list
+      if (current->head()->pre_order() < lp_pre_order)
+        break;
+      if (current->head()->pre_order() == lp_pre_order &&
+          current->tail()->pre_order() > lp->tail()->pre_order()) {
+        break;
+      }
+      prev = current;
+      current = current->parent();
+    }
+    Loop* next_lp = lp->parent(); // Save future list of items to insert
+    // Insert lp before current
+    lp->set_parent(current);
+    if (prev != NULL) {
+      prev->set_parent(lp);
+    } else {
+      leaf = lp;
+    }
+    prev = lp;     // Inserted item is new prev[ious]
+    lp = next_lp;  // Next item to insert
+  }
+  return leaf;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::build_loop_tree
+//
+// Incrementally build loop tree.
+void ciTypeFlow::build_loop_tree(Block* blk) {
+  assert(!blk->is_post_visited(), "precondition");
+  Loop* innermost = NULL; // merge of loop tree branches over all successors
+
+  for (SuccIter iter(blk); !iter.done(); iter.next()) {
+    Loop*  lp   = NULL;
+    Block* succ = iter.succ();
+    if (!succ->is_post_visited()) {
+      // Found backedge since predecessor post visited, but successor is not
+      assert(succ->pre_order() <= blk->pre_order(), "should be backedge");
+
+      // Create a LoopNode to mark this loop.
+      lp = new (arena()) Loop(succ, blk);
+      if (succ->loop() == NULL)
+        succ->set_loop(lp);
+      // succ->loop will be updated to innermost loop on a later call, when blk==succ
+
+    } else {  // Nested loop
+      lp = succ->loop();
+
+      // If succ is loop head, find outer loop.
+      while (lp != NULL && lp->head() == succ) {
+        lp = lp->parent();
+      }
+      if (lp == NULL) {
+        // Infinite loop, it's parent is the root
+        lp = loop_tree_root();
+      }
+    }
+
+    // Check for irreducible loop.
+    // Successor has already been visited. If the successor's loop head
+    // has already been post-visited, then this is another entry into the loop.
+    while (lp->head()->is_post_visited() && lp != loop_tree_root()) {
+      _has_irreducible_entry = true;
+      lp->set_irreducible(succ);
+      if (!succ->is_on_work_list()) {
+        // Assume irreducible entries need more data flow
+        add_to_work_list(succ);
+      }
+      lp = lp->parent();
+      assert(lp != NULL, "nested loop must have parent by now");
+    }
+
+    // Merge loop tree branch for all successors.
+    innermost = innermost == NULL ? lp : innermost->sorted_merge(lp);
+
+  } // end loop
+
+  if (innermost == NULL) {
+    assert(blk->successors()->length() == 0, "CFG exit");
+    blk->set_loop(loop_tree_root());
+  } else if (innermost->head() == blk) {
+    // If loop header, complete the tree pointers
+    if (blk->loop() != innermost) {
+#if ASSERT
+      assert(blk->loop()->head() == innermost->head(), "same head");
+      Loop* dl;
+      for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent());
+      assert(dl == blk->loop(), "blk->loop() already in innermost list");
+#endif
+      blk->set_loop(innermost);
+    }
+    innermost->def_locals()->add(blk->def_locals());
+    Loop* l = innermost;
+    Loop* p = l->parent();
+    while (p && l->head() == blk) {
+      l->set_sibling(p->child());  // Put self on parents 'next child'
+      p->set_child(l);             // Make self the first child of parent
+      p->def_locals()->add(l->def_locals());
+      l = p;                       // Walk up the parent chain
+      p = l->parent();
+    }
+  } else {
+    blk->set_loop(innermost);
+    innermost->def_locals()->add(blk->def_locals());
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::contains
+//
+// Returns true if lp is nested loop.
+bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const {
+  assert(lp != NULL, "");
+  if (this == lp || head() == lp->head()) return true;
+  int depth1 = depth();
+  int depth2 = lp->depth();
+  if (depth1 > depth2)
+    return false;
+  while (depth1 < depth2) {
+    depth2--;
+    lp = lp->parent();
+  }
+  return this == lp;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::depth
+//
+// Loop depth
+int ciTypeFlow::Loop::depth() const {
+  int dp = 0;
+  for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent())
+    dp++;
+  return dp;
+}
+
+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::print
+void ciTypeFlow::Loop::print(outputStream* st, int indent) const {
+  for (int i = 0; i < indent; i++) st->print(" ");
+  st->print("%d<-%d %s",
+            is_root() ? 0 : this->head()->pre_order(),
+            is_root() ? 0 : this->tail()->pre_order(),
+            is_irreducible()?" irr":"");
+  st->print(" defs: ");
+  def_locals()->print_on(st, _head->outer()->method()->max_locals());
+  st->cr();
+  for (Loop* ch = child(); ch != NULL; ch = ch->sibling())
+    ch->print(st, indent+2);
+}
+#endif
+
+// ------------------------------------------------------------------
+// ciTypeFlow::df_flow_types
+//
+// Perform the depth first type flow analysis. Helper for flow_types.
+void ciTypeFlow::df_flow_types(Block* start,
+                               bool do_flow,
+                               StateVector* temp_vector,
+                               JsrSet* temp_set) {
+  int dft_len = 100;
+  GrowableArray<Block*> stk(arena(), dft_len, 0, NULL);
+
+  ciBlock* dummy = _methodBlocks->make_dummy_block();
+  JsrSet* root_set = new JsrSet(NULL, 0);
+  Block* root_head = new (arena()) Block(this, dummy, root_set);
+  Block* root_tail = new (arena()) Block(this, dummy, root_set);
+  root_head->set_pre_order(0);
+  root_head->set_post_order(0);
+  root_tail->set_pre_order(max_jint);
+  root_tail->set_post_order(max_jint);
+  set_loop_tree_root(new (arena()) Loop(root_head, root_tail));
+
+  stk.push(start);
+
+  _next_pre_order = 0;  // initialize pre_order counter
+  _rpo_list = NULL;
+  int next_po = 0;      // initialize post_order counter
+
+  // Compute RPO and the control flow graph
+  int size;
+  while ((size = stk.length()) > 0) {
+    Block* blk = stk.top(); // Leave node on stack
+    if (!blk->is_visited()) {
+      // forward arc in graph
+      assert (!blk->has_pre_order(), "");
+      blk->set_next_pre_order();
+
+      if (_next_pre_order >= MaxNodeLimit / 2) {
+        // Too many basic blocks.  Bail out.
+        // This can happen when try/finally constructs are nested to depth N,
+        // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
+        // "MaxNodeLimit / 2" is used because probably the parser will
+        // generate at least twice that many nodes and bail out.
+        record_failure("too many basic blocks");
+        return;
+      }
+      if (do_flow) {
+        flow_block(blk, temp_vector, temp_set);
+        if (failing()) return; // Watch for bailouts.
+      }
+    } else if (!blk->is_post_visited()) {
+      // cross or back arc
+      for (SuccIter iter(blk); !iter.done(); iter.next()) {
+        Block* succ = iter.succ();
+        if (!succ->is_visited()) {
+          stk.push(succ);
+        }
+      }
+      if (stk.length() == size) {
+        // There were no additional children, post visit node now
+        stk.pop(); // Remove node from stack
+
+        build_loop_tree(blk);
+        blk->set_post_order(next_po++);   // Assign post order
+        prepend_to_rpo_list(blk);
+        assert(blk->is_post_visited(), "");
+
+        if (blk->is_loop_head() && !blk->is_on_work_list()) {
+          // Assume loop heads need more data flow
+          add_to_work_list(blk);
+        }
+      }
+    } else {
+      stk.pop(); // Remove post-visited node from stack
+    }
+  }
+}
+
+// ------------------------------------------------------------------
 // ciTypeFlow::flow_types
 //
 // Perform the type flow analysis, creating and cloning Blocks as
@@ -2233,91 +2660,93 @@
   JsrSet* temp_set = new JsrSet(NULL, 16);
 
   // Create the method entry block.
-  Block* block = block_at(start_bci(), temp_set);
-  block->set_pre_order(_next_pre_order++);
-  assert(block->is_start(), "start block must have order #0");
+  Block* start = block_at(start_bci(), temp_set);
 
   // Load the initial state into it.
   const StateVector* start_state = get_start_state();
   if (failing())  return;
-  block->meet(start_state);
-  add_to_work_list(block);
+  start->meet(start_state);
+
+  // Depth first visit
+  df_flow_types(start, true /*do flow*/, temp_vector, temp_set);
 
-  // Trickle away.
-  while (!work_list_empty()) {
-    Block* block = work_list_next();
-    flow_block(block, temp_vector, temp_set);
+  if (failing())  return;
+  assert(_rpo_list == start, "must be start");
 
+  // Any loops found?
+  if (loop_tree_root()->child() != NULL &&
+      env()->comp_level() >= CompLevel_full_optimization) {
+      // Loop optimizations are not performed on Tier1 compiles.
+
+    bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set);
 
-    // NodeCountCutoff is the number of nodes at which the parser
-    // will bail out.  Probably if we already have lots of BBs,
-    // the parser will generate at least twice that many nodes and bail out.
-    // Therefore, this is a conservatively large limit at which to
-    // bail out in the pre-parse typeflow pass.
-    int block_limit = MaxNodeLimit / 2;
+    // If some loop heads were cloned, recompute postorder and loop tree
+    if (changed) {
+      loop_tree_root()->set_child(NULL);
+      for (Block* blk = _rpo_list; blk != NULL;) {
+        Block* next = blk->rpo_next();
+        blk->df_init();
+        blk = next;
+      }
+      df_flow_types(start, false /*no flow*/, temp_vector, temp_set);
+    }
+  }
 
-    if (_next_pre_order >= block_limit) {
-      // Too many basic blocks.  Bail out.
-      //
-      // This can happen when try/finally constructs are nested to depth N,
-      // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
-      record_failure("too many basic blocks");
-      return;
-    }
+  if (CITraceTypeFlow) {
+    tty->print_cr("\nLoop tree");
+    loop_tree_root()->print();
+  }
+
+  // Continue flow analysis until fixed point reached
+
+  debug_only(int max_block = _next_pre_order;)
 
-    // Watch for bailouts.
-    if (failing())  return;
+  while (!work_list_empty()) {
+    Block* blk = work_list_next();
+    assert (blk->has_post_order(), "post order assigned above");
+
+    flow_block(blk, temp_vector, temp_set);
+
+    assert (max_block == _next_pre_order, "no new blocks");
+    assert (!failing(), "no more bailouts");
   }
 }
 
 // ------------------------------------------------------------------
 // ciTypeFlow::map_blocks
 //
-// Create the block map, which indexes blocks in pre_order.
+// Create the block map, which indexes blocks in reverse post-order.
 void ciTypeFlow::map_blocks() {
   assert(_block_map == NULL, "single initialization");
-  int pre_order_limit = _next_pre_order;
-  _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit);
-  assert(pre_order_limit == block_count(), "");
-  int po;
-  for (po = 0; po < pre_order_limit; po++) {
-    debug_only(_block_map[po] = NULL);
+  int block_ct = _next_pre_order;
+  _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct);
+  assert(block_ct == block_count(), "");
+
+  Block* blk = _rpo_list;
+  for (int m = 0; m < block_ct; m++) {
+    int rpo = blk->rpo();
+    assert(rpo == m, "should be sequential");
+    _block_map[rpo] = blk;
+    blk = blk->rpo_next();
   }
-  ciMethodBlocks *mblks = _methodBlocks;
-  ciBlock* current = NULL;
-  int limit_bci = code_size();
-  for (int bci = 0; bci < limit_bci; bci++) {
-    ciBlock* ciblk = mblks->block_containing(bci);
-    if (ciblk != NULL && ciblk != current) {
-      current = ciblk;
-      int curidx = ciblk->index();
-      int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length();
-      for (int i = 0; i < block_count; i++) {
-        Block* block = _idx_to_blocklist[curidx]->at(i);
-        if (!block->has_pre_order())  continue;
-        int po = block->pre_order();
-        assert(_block_map[po] == NULL, "unique ref to block");
-        assert(0 <= po && po < pre_order_limit, "");
-        _block_map[po] = block;
-      }
-    }
-  }
-  for (po = 0; po < pre_order_limit; po++) {
-    assert(_block_map[po] != NULL, "must not drop any blocks");
-    Block* block = _block_map[po];
+  assert(blk == NULL, "should be done");
+
+  for (int j = 0; j < block_ct; j++) {
+    assert(_block_map[j] != NULL, "must not drop any blocks");
+    Block* block = _block_map[j];
     // Remove dead blocks from successor lists:
     for (int e = 0; e <= 1; e++) {
       GrowableArray<Block*>* l = e? block->exceptions(): block->successors();
-      for (int i = 0; i < l->length(); i++) {
-        Block* s = l->at(i);
-        if (!s->has_pre_order()) {
+      for (int k = 0; k < l->length(); k++) {
+        Block* s = l->at(k);
+        if (!s->has_post_order()) {
           if (CITraceTypeFlow) {
             tty->print("Removing dead %s successor of #%d: ", (e? "exceptional":  "normal"), block->pre_order());
             s->print_value_on(tty);
             tty->cr();
           }
           l->remove(s);
-          --i;
+          --k;
         }
       }
     }
@@ -2329,7 +2758,7 @@
 //
 // Find a block with this ciBlock which has a compatible JsrSet.
 // If no such block exists, create it, unless the option is no_create.
-// If the option is create_private_copy, always create a fresh private copy.
+// If the option is create_backedge_copy, always create a fresh backedge copy.
 ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) {
   Arena* a = arena();
   GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
@@ -2342,11 +2771,11 @@
     _idx_to_blocklist[ciBlockIndex] = blocks;
   }
 
-  if (option != create_private_copy) {
+  if (option != create_backedge_copy) {
     int len = blocks->length();
     for (int i = 0; i < len; i++) {
       Block* block = blocks->at(i);
-      if (!block->is_private_copy() && block->is_compatible_with(jsrs)) {
+      if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
         return block;
       }
     }
@@ -2357,15 +2786,15 @@
 
   // We did not find a compatible block.  Create one.
   Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs);
-  if (option == create_private_copy)  new_block->set_private_copy(true);
+  if (option == create_backedge_copy)  new_block->set_backedge_copy(true);
   blocks->append(new_block);
   return new_block;
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::private_copy_count
+// ciTypeFlow::backedge_copy_count
 //
-int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
+int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
   GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
 
   if (blocks == NULL) {
@@ -2376,7 +2805,7 @@
   int len = blocks->length();
   for (int i = 0; i < len; i++) {
     Block* block = blocks->at(i);
-    if (block->is_private_copy() && block->is_compatible_with(jsrs)) {
+    if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
       count++;
     }
   }
@@ -2405,10 +2834,12 @@
   if (failing()) {
     return;
   }
+
+  map_blocks();
+
   if (CIPrintTypeFlow || CITraceTypeFlow) {
-    print_on(tty);
+    rpo_print_on(tty);
   }
-  map_blocks();
 }
 
 // ------------------------------------------------------------------
@@ -2466,4 +2897,19 @@
   st->print_cr("********************************************************");
   st->cr();
 }
+
+void ciTypeFlow::rpo_print_on(outputStream* st) const {
+  st->print_cr("********************************************************");
+  st->print   ("TypeFlow for ");
+  method()->name()->print_symbol_on(st);
+  int limit_bci = code_size();
+  st->print_cr("  %d bytes", limit_bci);
+  for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) {
+    blk->print_on(st);
+    st->print_cr("--------------------------------------------------------");
+    st->cr();
+  }
+  st->print_cr("********************************************************");
+  st->cr();
+}
 #endif
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -34,11 +34,13 @@
   int _max_locals;
   int _max_stack;
   int _code_size;
+  bool      _has_irreducible_entry;
 
   const char* _failure_reason;
 
 public:
   class StateVector;
+  class Loop;
   class Block;
 
   // Build a type flow analyzer
@@ -55,6 +57,7 @@
   int       max_stack() const  { return _max_stack; }
   int       max_cells() const  { return _max_locals + _max_stack; }
   int       code_size() const  { return _code_size; }
+  bool      has_irreducible_entry() const { return _has_irreducible_entry; }
 
   // Represents information about an "active" jsr call.  This
   // class represents a call to the routine at some entry address
@@ -125,6 +128,19 @@
     void print_on(outputStream* st) const PRODUCT_RETURN;
   };
 
+  class LocalSet VALUE_OBJ_CLASS_SPEC {
+  private:
+    enum Constants { max = 63 };
+    uint64_t _bits;
+  public:
+    LocalSet() : _bits(0) {}
+    void add(uint32_t i)        { if (i < (uint32_t)max) _bits |=  (1LL << i); }
+    void add(LocalSet* ls)      { _bits |= ls->_bits; }
+    bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; }
+    void clear()                { _bits = 0; }
+    void print_on(outputStream* st, int limit) const  PRODUCT_RETURN;
+  };
+
   // Used as a combined index for locals and temps
   enum Cell {
     Cell_0, Cell_max = INT_MAX
@@ -142,6 +158,8 @@
     int         _trap_bci;
     int         _trap_index;
 
+    LocalSet    _def_locals;  // For entire block
+
     static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer);
 
   public:
@@ -181,6 +199,9 @@
     int         monitor_count() const  { return _monitor_count; }
     void    set_monitor_count(int mc)  { _monitor_count = mc; }
 
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
     static Cell start_cell()           { return (Cell)0; }
     static Cell next_cell(Cell c)      { return (Cell)(((int)c) + 1); }
     Cell        limit_cell() const {
@@ -250,6 +271,10 @@
       return type->basic_type() == T_DOUBLE;
     }
 
+    void store_to_local(int lnum) {
+      _def_locals.add((uint) lnum);
+    }
+
     void      push_translate(ciType* type);
 
     void      push_int() {
@@ -358,6 +383,7 @@
              "must be reference type or return address");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_double(int index) {
@@ -376,6 +402,8 @@
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
       set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
     }
 
     void load_local_float(int index) {
@@ -388,6 +416,7 @@
       assert(is_float(type), "must be float type");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_int(int index) {
@@ -400,6 +429,7 @@
       assert(is_int(type), "must be int type");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_long(int index) {
@@ -418,6 +448,8 @@
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
       set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
     }
 
     // Stop interpretation of this path with a trap.
@@ -450,13 +482,31 @@
   };
 
   // Parameter for "find_block" calls:
-  // Describes the difference between a public and private copy.
+  // Describes the difference between a public and backedge copy.
   enum CreateOption {
     create_public_copy,
-    create_private_copy,
+    create_backedge_copy,
     no_create
   };
 
+  // Successor iterator
+  class SuccIter : public StackObj {
+  private:
+    Block* _pred;
+    int    _index;
+    Block* _succ;
+  public:
+    SuccIter()                        : _pred(NULL), _index(-1), _succ(NULL) {}
+    SuccIter(Block* pred)             : _pred(pred), _index(-1), _succ(NULL) { next(); }
+    int    index()     { return _index; }
+    Block* pred()      { return _pred; }           // Return predecessor
+    bool   done()      { return _index < 0; }      // Finished?
+    Block* succ()      { return _succ; }           // Return current successor
+    void   next();                                 // Advance
+    void   set_succ(Block* succ);                  // Update current successor
+    bool   is_normal_ctrl() { return index() < _pred->successors()->length(); }
+  };
+
   // A basic block
   class Block : public ResourceObj {
   private:
@@ -470,15 +520,24 @@
     int                              _trap_bci;
     int                              _trap_index;
 
-    // A reasonable approximation to pre-order, provided.to the client.
+    // pre_order, assigned at first visit. Used as block ID and "visited" tag
     int                              _pre_order;
 
-    // Has this block been cloned for some special purpose?
-    bool                             _private_copy;
+    // A post-order, used to compute the reverse post order (RPO) provided to the client
+    int                              _post_order;  // used to compute rpo
+
+    // Has this block been cloned for a loop backedge?
+    bool                             _backedge_copy;
 
     // A pointer used for our internal work list
-    Block*                 _next;
-    bool                   _on_work_list;
+    Block*                           _next;
+    bool                             _on_work_list;      // on the work list
+    Block*                           _rpo_next;          // Reverse post order list
+
+    // Loop info
+    Loop*                            _loop;              // nearest loop
+    bool                             _irreducible_entry; // entry to irreducible loop
+    bool                             _exception_entry;   // entry to exception handler
 
     ciBlock*     ciblock() const     { return _ciblock; }
     StateVector* state() const     { return _state; }
@@ -504,10 +563,11 @@
     int start() const         { return _ciblock->start_bci(); }
     int limit() const         { return _ciblock->limit_bci(); }
     int control() const       { return _ciblock->control_bci(); }
+    JsrSet* jsrs() const      { return _jsrs; }
 
-    bool    is_private_copy() const       { return _private_copy; }
-    void   set_private_copy(bool z);
-    int        private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); }
+    bool    is_backedge_copy() const       { return _backedge_copy; }
+    void   set_backedge_copy(bool z);
+    int        backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); }
 
     // access to entry state
     int     stack_size() const         { return _state->stack_size(); }
@@ -515,6 +575,20 @@
     ciType* local_type_at(int i) const { return _state->local_type_at(i); }
     ciType* stack_type_at(int i) const { return _state->stack_type_at(i); }
 
+    // Data flow on locals
+    bool is_invariant_local(uint v) const {
+      assert(is_loop_head(), "only loop heads");
+      // Find outermost loop with same loop head
+      Loop* lp = loop();
+      while (lp->parent() != NULL) {
+        if (lp->parent()->head() != lp->head()) break;
+        lp = lp->parent();
+      }
+      return !lp->def_locals()->test(v);
+    }
+    LocalSet* def_locals() { return _state->def_locals(); }
+    const LocalSet* def_locals() const { return _state->def_locals(); }
+
     // Get the successors for this Block.
     GrowableArray<Block*>* successors(ciBytecodeStream* str,
                                       StateVector* state,
@@ -524,13 +598,6 @@
       return _successors;
     }
 
-    // Helper function for "successors" when making private copies of
-    // loop heads for C2.
-    Block * clone_loop_head(ciTypeFlow* analyzer,
-                            int branch_bci,
-                            Block* target,
-                            JsrSet* jsrs);
-
     // Get the exceptional successors for this Block.
     GrowableArray<Block*>* exceptions() {
       if (_exceptions == NULL) {
@@ -584,17 +651,126 @@
     bool   is_on_work_list() const  { return _on_work_list; }
 
     bool   has_pre_order() const  { return _pre_order >= 0; }
-    void   set_pre_order(int po)  { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; }
+    void   set_pre_order(int po)  { assert(!has_pre_order(), ""); _pre_order = po; }
     int    pre_order() const      { assert(has_pre_order(), ""); return _pre_order; }
+    void   set_next_pre_order()   { set_pre_order(outer()->inc_next_pre_order()); }
     bool   is_start() const       { return _pre_order == outer()->start_block_num(); }
 
-    // A ranking used in determining order within the work list.
-    bool   is_simpler_than(Block* other);
+    // Reverse post order
+    void   df_init();
+    bool   has_post_order() const { return _post_order >= 0; }
+    void   set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; }
+    void   reset_post_order(int o){ _post_order = o; }
+    int    post_order() const     { assert(has_post_order(), ""); return _post_order; }
+
+    bool   has_rpo() const        { return has_post_order() && outer()->have_block_count(); }
+    int    rpo() const            { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; }
+    void   set_rpo_next(Block* b) { _rpo_next = b; }
+    Block* rpo_next()             { return _rpo_next; }
+
+    // Loops
+    Loop*  loop() const                  { return _loop; }
+    void   set_loop(Loop* lp)            { _loop = lp; }
+    bool   is_loop_head() const          { return _loop && _loop->head() == this; }
+    void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
+    bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    bool   is_visited() const            { return has_pre_order(); }
+    bool   is_post_visited() const       { return has_post_order(); }
+    bool   is_clonable_exit(Loop* lp);
+    Block* looping_succ(Loop* lp);       // Successor inside of loop
+    bool   is_single_entry_loop_head() const {
+      if (!is_loop_head()) return false;
+      for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent())
+        if (lp->is_irreducible()) return false;
+      return true;
+    }
 
     void   print_value_on(outputStream* st) const PRODUCT_RETURN;
     void   print_on(outputStream* st) const       PRODUCT_RETURN;
   };
 
+  // Loop
+  class Loop : public ResourceObj {
+  private:
+    Loop* _parent;
+    Loop* _sibling;  // List of siblings, null terminated
+    Loop* _child;    // Head of child list threaded thru sibling pointer
+    Block* _head;    // Head of loop
+    Block* _tail;    // Tail of loop
+    bool   _irreducible;
+    LocalSet _def_locals;
+
+  public:
+    Loop(Block* head, Block* tail) :
+      _head(head),   _tail(tail),
+      _parent(NULL), _sibling(NULL), _child(NULL),
+      _irreducible(false), _def_locals() {}
+
+    Loop* parent()  const { return _parent; }
+    Loop* sibling() const { return _sibling; }
+    Loop* child()   const { return _child; }
+    Block* head()   const { return _head; }
+    Block* tail()   const { return _tail; }
+    void set_parent(Loop* p)  { _parent = p; }
+    void set_sibling(Loop* s) { _sibling = s; }
+    void set_child(Loop* c)   { _child = c; }
+    void set_head(Block* hd)  { _head = hd; }
+    void set_tail(Block* tl)  { _tail = tl; }
+
+    int depth() const;              // nesting depth
+
+    // Returns true if lp is a nested loop or us.
+    bool contains(Loop* lp) const;
+    bool contains(Block* blk) const { return contains(blk->loop()); }
+
+    // Data flow on locals
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
+    // Merge the branch lp into this branch, sorting on the loop head
+    // pre_orders. Returns the new branch.
+    Loop* sorted_merge(Loop* lp);
+
+    // Mark non-single entry to loop
+    void set_irreducible(Block* entry) {
+      _irreducible = true;
+      entry->set_irreducible_entry(true);
+    }
+    bool is_irreducible() const { return _irreducible; }
+
+    bool is_root() const { return _tail->pre_order() == max_jint; }
+
+    void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
+  };
+
+  // Postorder iteration over the loop tree.
+  class PostorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PostorderLoops(Loop* root) : _root(root), _current(root) {
+      while (_current->child() != NULL) {
+        _current = _current->child();
+      }
+    }
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
+  // Preorder iteration over the loop tree.
+  class PreorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PreorderLoops(Loop* root) : _root(root), _current(root) {}
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
   // Standard indexes of successors, for various bytecodes.
   enum {
     FALL_THROUGH   = 0,  // normal control
@@ -619,6 +795,12 @@
   // Tells if a given instruction is able to generate an exception edge.
   bool can_trap(ciBytecodeStream& str);
 
+  // Clone the loop heads. Returns true if any cloning occurred.
+  bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
+  // Clone lp's head and replace tail's successors with clone.
+  Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
 public:
   // Return the block beginning at bci which has a JsrSet compatible
   // with jsrs.
@@ -627,8 +809,8 @@
   // block factory
   Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy);
 
-  // How many of the blocks have the private_copy bit set?
-  int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
+  // How many of the blocks have the backedge_copy bit set?
+  int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
 
   // Return an existing block containing bci which has a JsrSet compatible
   // with jsrs, or NULL if there is none.
@@ -651,11 +833,18 @@
                                       return _block_map[po]; }
   Block* start_block() const        { return pre_order_at(start_block_num()); }
   int start_block_num() const       { return 0; }
+  Block* rpo_at(int rpo) const      { assert(0 <= rpo && rpo < block_count(), "out of bounds");
+                                      return _block_map[rpo]; }
+  int next_pre_order()              { return _next_pre_order; }
+  int inc_next_pre_order()          { return _next_pre_order++; }
 
 private:
   // A work list used during flow analysis.
   Block* _work_list;
 
+  // List of blocks in reverse post order
+  Block* _rpo_list;
+
   // Next Block::_pre_order.  After mapping, doubles as block_count.
   int _next_pre_order;
 
@@ -668,6 +857,15 @@
   // Add a basic block to our work list.
   void add_to_work_list(Block* block);
 
+  // Prepend a basic block to rpo list.
+  void prepend_to_rpo_list(Block* blk) {
+    blk->set_rpo_next(_rpo_list);
+    _rpo_list = blk;
+  }
+
+  // Root of the loop tree
+  Loop* _loop_tree_root;
+
   // State used for make_jsr_record
   int _jsr_count;
   GrowableArray<JsrRecord*>* _jsr_records;
@@ -677,6 +875,9 @@
   // does not already exist.
   JsrRecord* make_jsr_record(int entry_address, int return_address);
 
+  void  set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; }
+  Loop* loop_tree_root()              { return _loop_tree_root; }
+
 private:
   // Get the initial state for start_bci:
   const StateVector* get_start_state();
@@ -703,6 +904,15 @@
   // necessary.
   void flow_types();
 
+  // Perform the depth first type flow analysis. Helper for flow_types.
+  void df_flow_types(Block* start,
+                     bool do_flow,
+                     StateVector* temp_vector,
+                     JsrSet* temp_set);
+
+  // Incrementally build loop tree.
+  void build_loop_tree(Block* blk);
+
   // Create the block map, which indexes blocks in pre_order.
   void map_blocks();
 
@@ -711,4 +921,6 @@
   void do_flow();
 
   void print_on(outputStream* st) const PRODUCT_RETURN;
+
+  void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
 };
--- a/hotspot/src/share/vm/code/nmethod.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1350,11 +1350,7 @@
       return false;
     }
   }
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-    // Cannot do this test if verification of the UseParallelOldGC
-    // code using the PSMarkSweep code is being done.
-    assert(unloading_occurred, "Inconsistency in unloading");
-  }
+  assert(unloading_occurred, "Inconsistency in unloading");
   make_unloaded(is_alive, obj);
   return true;
 }
--- a/hotspot/src/share/vm/compiler/methodLiveness.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/compiler/methodLiveness.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -76,8 +76,9 @@
   BitCounter() : _count(0) {}
 
   // Callback when bit in map is set
-  virtual void do_bit(size_t offset) {
+  virtual bool do_bit(size_t offset) {
     _count++;
+    return true;
   }
 
   int count() {
@@ -467,7 +468,7 @@
     bci = 0;
   }
 
-  MethodLivenessResult answer(NULL,0);
+  MethodLivenessResult answer((uintptr_t*)NULL,0);
 
   if (_block_count > 0) {
     if (TimeLivenessAnalysis) _time_total.start();
--- a/hotspot/src/share/vm/compiler/methodLiveness.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/compiler/methodLiveness.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -29,7 +29,7 @@
   bool _is_valid;
 
  public:
-  MethodLivenessResult(uintptr_t* map, idx_t size_in_bits)
+  MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits)
     : BitMap(map, size_in_bits)
     , _is_valid(false)
   {}
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -790,7 +790,7 @@
 }
 
 
-HeapWord* CompactibleFreeListSpace::block_start(const void* p) const {
+HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const {
   NOT_PRODUCT(verify_objects_initialized());
   return _bt.block_start(p);
 }
@@ -2286,9 +2286,9 @@
 }
 
 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  guarantee(size % 2 == 0, "Odd slots should be empty");
-  for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL;
-    fc = fc->next()) {
+  FreeChunk* fc =  _indexedFreeList[size].head();
+  guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
+  for (; fc != NULL; fc = fc->next()) {
     guarantee(fc->size() == size, "Size inconsistency");
     guarantee(fc->isFree(), "!free?");
     guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
@@ -2790,10 +2790,11 @@
   assert(n_threads > 0, "Unexpected n_threads argument");
   const size_t task_size = rescan_task_size();
   size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size;
-  assert((used_region().start() + (n_tasks - 1)*task_size <
-          used_region().end()) &&
-         (used_region().start() + n_tasks*task_size >=
-          used_region().end()), "n_task calculation incorrect");
+  assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect");
+  assert(n_tasks == 0 ||
+         ((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) &&
+          (used_region().start() + n_tasks*task_size >= used_region().end())),
+         "n_tasks calculation incorrect");
   SequentialSubTasksDone* pst = conc_par_seq_tasks();
   assert(!pst->valid(), "Clobbering existing data?");
   pst->set_par_threads(n_threads);
@@ -2833,7 +2834,7 @@
   assert(n_tasks == 0 ||
          ((span.start() + (n_tasks - 1)*task_size < span.end()) &&
           (span.start() + n_tasks*task_size >= span.end())),
-         "n_task calculation incorrect");
+         "n_tasks calculation incorrect");
   SequentialSubTasksDone* pst = conc_par_seq_tasks();
   assert(!pst->valid(), "Clobbering existing data?");
   pst->set_par_threads(n_threads);
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -502,7 +502,7 @@
 
   void blk_iterate(BlkClosure* cl);
   void blk_iterate_careful(BlkClosureCareful* cl);
-  HeapWord* block_start(const void* p) const;
+  HeapWord* block_start_const(const void* p) const;
   HeapWord* block_start_careful(const void* p) const;
   size_t block_size(const HeapWord* p) const;
   size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -2761,13 +2761,14 @@
  public:
   VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}
 
-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
     HeapWord* addr = _marks->offsetToHeapWord(offset);
     if (!_marks->isMarked(addr)) {
       oop(addr)->print();
       gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
       _failed = true;
     }
+    return true;
   }
 
   bool failed() { return _failed; }
@@ -3650,6 +3651,7 @@
   CompactibleFreeListSpace*  _cms_space;
   CompactibleFreeListSpace* _perm_space;
   HeapWord*     _global_finger;
+  HeapWord*     _restart_addr;
 
   //  Exposed here for yielding support
   Mutex* const _bit_map_lock;
@@ -3680,7 +3682,7 @@
     _term.set_task(this);
     assert(_cms_space->bottom() < _perm_space->bottom(),
            "Finger incorrectly initialized below");
-    _global_finger = _cms_space->bottom();
+    _restart_addr = _global_finger = _cms_space->bottom();
   }
 
 
@@ -3698,6 +3700,10 @@
   bool result() { return _result; }
 
   void reset(HeapWord* ra) {
+    assert(_global_finger >= _cms_space->end(),  "Postcondition of ::work(i)");
+    assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)");
+    assert(ra             <  _perm_space->end(), "ra too large");
+    _restart_addr = _global_finger = ra;
     _term.reset_for_reuse();
   }
 
@@ -3842,16 +3848,24 @@
   int n_tasks = pst->n_tasks();
   // We allow that there may be no tasks to do here because
   // we are restarting after a stack overflow.
-  assert(pst->valid() || n_tasks == 0, "Uninitializd use?");
+  assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
   int nth_task = 0;
 
-  HeapWord* start = sp->bottom();
+  HeapWord* aligned_start = sp->bottom();
+  if (sp->used_region().contains(_restart_addr)) {
+    // Align down to a card boundary for the start of 0th task
+    // for this space.
+    aligned_start =
+      (HeapWord*)align_size_down((uintptr_t)_restart_addr,
+                                 CardTableModRefBS::card_size);
+  }
+
   size_t chunk_size = sp->marking_task_size();
   while (!pst->is_task_claimed(/* reference */ nth_task)) {
     // Having claimed the nth task in this space,
     // compute the chunk that it corresponds to:
-    MemRegion span = MemRegion(start + nth_task*chunk_size,
-                               start + (nth_task+1)*chunk_size);
+    MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
+                               aligned_start + (nth_task+1)*chunk_size);
     // Try and bump the global finger via a CAS;
     // note that we need to do the global finger bump
     // _before_ taking the intersection below, because
@@ -3866,26 +3880,40 @@
     // beyond the "top" address of the space.
     span = span.intersection(sp->used_region());
     if (!span.is_empty()) {  // Non-null task
-      // We want to skip the first object because
-      // the protocol is to scan any object in its entirety
-      // that _starts_ in this span; a fortiori, any
-      // object starting in an earlier span is scanned
-      // as part of an earlier claimed task.
-      // Below we use the "careful" version of block_start
-      // so we do not try to navigate uninitialized objects.
-      HeapWord* prev_obj = sp->block_start_careful(span.start());
-      // Below we use a variant of block_size that uses the
-      // Printezis bits to avoid waiting for allocated
-      // objects to become initialized/parsable.
-      while (prev_obj < span.start()) {
-        size_t sz = sp->block_size_no_stall(prev_obj, _collector);
-        if (sz > 0) {
-          prev_obj += sz;
+      HeapWord* prev_obj;
+      assert(!span.contains(_restart_addr) || nth_task == 0,
+             "Inconsistency");
+      if (nth_task == 0) {
+        // For the 0th task, we'll not need to compute a block_start.
+        if (span.contains(_restart_addr)) {
+          // In the case of a restart because of stack overflow,
+          // we might additionally skip a chunk prefix.
+          prev_obj = _restart_addr;
         } else {
-          // In this case we may end up doing a bit of redundant
-          // scanning, but that appears unavoidable, short of
-          // locking the free list locks; see bug 6324141.
-          break;
+          prev_obj = span.start();
+        }
+      } else {
+        // We want to skip the first object because
+        // the protocol is to scan any object in its entirety
+        // that _starts_ in this span; a fortiori, any
+        // object starting in an earlier span is scanned
+        // as part of an earlier claimed task.
+        // Below we use the "careful" version of block_start
+        // so we do not try to navigate uninitialized objects.
+        prev_obj = sp->block_start_careful(span.start());
+        // Below we use a variant of block_size that uses the
+        // Printezis bits to avoid waiting for allocated
+        // objects to become initialized/parsable.
+        while (prev_obj < span.start()) {
+          size_t sz = sp->block_size_no_stall(prev_obj, _collector);
+          if (sz > 0) {
+            prev_obj += sz;
+          } else {
+            // In this case we may end up doing a bit of redundant
+            // scanning, but that appears unavoidable, short of
+            // locking the free list locks; see bug 6324141.
+            break;
+          }
         }
       }
       if (prev_obj < span.end()) {
@@ -3938,12 +3966,14 @@
   void handle_stack_overflow(HeapWord* lost);
 };
 
-// Grey object rescan during work stealing phase --
-// the salient assumption here is that stolen oops must
-// always be initialized, so we do not need to check for
-// uninitialized objects before scanning here.
+// Grey object scanning during work stealing phase --
+// the salient assumption here is that any references
+// that are in these stolen objects being scanned must
+// already have been initialized (else they would not have
+// been published), so we do not need to check for
+// uninitialized objects before pushing here.
 void Par_ConcMarkingClosure::do_oop(oop obj) {
-  assert(obj->is_oop_or_null(), "expected an oop or NULL");
+  assert(obj->is_oop_or_null(true), "expected an oop or NULL");
   HeapWord* addr = (HeapWord*)obj;
   // Check if oop points into the CMS generation
   // and is not marked
@@ -4001,7 +4031,7 @@
 // in CMSCollector's _restart_address.
 void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
   // We need to do this under a mutex to prevent other
-  // workers from interfering with the expansion below.
+  // workers from interfering with the work done below.
   MutexLockerEx ml(_overflow_stack->par_lock(),
                    Mutex::_no_safepoint_check_flag);
   // Remember the least grey address discarded
@@ -4640,8 +4670,11 @@
       startTimer();
       sample_eden();
       // Get and clear dirty region from card table
-      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean(
-                                    MemRegion(nextAddr, endAddr));
+      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
+                                    MemRegion(nextAddr, endAddr),
+                                    true,
+                                    CardTableModRefBS::precleaned_card_val());
+
       assert(dirtyRegion.start() >= nextAddr,
              "returned region inconsistent?");
     }
@@ -5409,8 +5442,8 @@
                               &mrias_cl);
   {
     TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty);
-    // Iterate over the dirty cards, marking them precleaned, and
-    // setting the corresponding bits in the mod union table.
+    // Iterate over the dirty cards, setting the corresponding bits in the
+    // mod union table.
     {
       ModUnionClosure modUnionClosure(&_modUnionTable);
       _ct->ct_bs()->dirty_card_iterate(
@@ -6182,7 +6215,7 @@
 // bit vector itself. That is done by a separate call CMSBitMap::allocate()
 // further below.
 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
-  _bm(NULL,0),
+  _bm(),
   _shifter(shifter),
   _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
 {
@@ -6207,7 +6240,7 @@
   }
   assert(_virtual_space.committed_size() == brs.size(),
          "didn't reserve backing store for all of CMS bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
+  _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
          _bmWordSize, "inconsistency in bit map sizing");
   _bm.set_size(_bmWordSize >> _shifter);
@@ -6554,7 +6587,7 @@
   if (obj != NULL) {
     // Ignore mark word because this could be an already marked oop
     // that may be chained at the end of the overflow list.
-    assert(obj->is_oop(), "expected an oop");
+    assert(obj->is_oop(true), "expected an oop");
     HeapWord* addr = (HeapWord*)obj;
     if (_span.contains(addr) &&
         !_bit_map->isMarked(addr)) {
@@ -6845,10 +6878,10 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsClosure::do_bit(size_t offset) {
+bool MarkFromRootsClosure::do_bit(size_t offset) {
   if (_skipBits > 0) {
     _skipBits--;
-    return;
+    return true;
   }
   // convert offset into a HeapWord*
   HeapWord* addr = _bitMap->startWord() + offset;
@@ -6886,10 +6919,11 @@
           } // ...else the setting of klass will dirty the card anyway.
         }
       DEBUG_ONLY(})
-      return;
+      return true;
     }
   }
   scanOopsInOop(addr);
+  return true;
 }
 
 // We take a break if we've been at this for a while,
@@ -7023,10 +7057,10 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void Par_MarkFromRootsClosure::do_bit(size_t offset) {
+bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
   if (_skip_bits > 0) {
     _skip_bits--;
-    return;
+    return true;
   }
   // convert offset into a HeapWord*
   HeapWord* addr = _bit_map->startWord() + offset;
@@ -7041,10 +7075,11 @@
     if (p->klass_or_null() == NULL || !p->is_parsable()) {
       // in the case of Clean-on-Enter optimization, redirty card
       // and avoid clearing card by increasing  the threshold.
-      return;
+      return true;
     }
   }
   scan_oops_in_oop(addr);
+  return true;
 }
 
 void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
@@ -7167,7 +7202,7 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsVerifyClosure::do_bit(size_t offset) {
+bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
   // convert offset into a HeapWord*
   HeapWord* addr = _verification_bm->startWord() + offset;
   assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
@@ -7195,6 +7230,7 @@
     new_oop->oop_iterate(&_pam_verify_closure);
   }
   assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
+  return true;
 }
 
 PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
@@ -7289,6 +7325,8 @@
   _should_remember_klasses(collector->should_unload_classes())
 { }
 
+// Assumes thread-safe access by callers, who are
+// responsible for mutual exclusion.
 void CMSCollector::lower_restart_addr(HeapWord* low) {
   assert(_span.contains(low), "Out of bounds addr");
   if (_restart_addr == NULL) {
@@ -7314,7 +7352,7 @@
 // in CMSCollector's _restart_address.
 void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
   // We need to do this under a mutex to prevent other
-  // workers from interfering with the expansion below.
+  // workers from interfering with the work done below.
   MutexLockerEx ml(_overflow_stack->par_lock(),
                    Mutex::_no_safepoint_check_flag);
   // Remember the least grey address discarded
@@ -7438,8 +7476,12 @@
 // Grey object rescan during pre-cleaning and second checkpoint phases --
 // the non-parallel version (the parallel version appears further below.)
 void PushAndMarkClosure::do_oop(oop obj) {
-  // If _concurrent_precleaning, ignore mark word verification
-  assert(obj->is_oop_or_null(_concurrent_precleaning),
+  // Ignore mark word verification. If during concurrent precleaning,
+  // the object monitor may be locked. If during the checkpoint
+  // phases, the object may already have been reached by a  different
+  // path and may be at the end of the global overflow list (so
+  // the mark word may be NULL).
+  assert(obj->is_oop_or_null(true /* ignore mark word */),
          "expected an oop or NULL");
   HeapWord* addr = (HeapWord*)obj;
   // Check if oop points into the CMS generation
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1327,7 +1327,7 @@
                        CMSMarkStack*  markStack,
                        CMSMarkStack*  revisitStack,
                        bool should_yield, bool verifying = false);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   void reset(HeapWord* addr);
   inline void do_yield_check();
 
@@ -1363,7 +1363,7 @@
                        CMSMarkStack*  overflow_stack,
                        CMSMarkStack*  revisit_stack,
                        bool should_yield);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   inline void do_yield_check();
 
  private:
@@ -1411,7 +1411,7 @@
                              CMSBitMap* verification_bm,
                              CMSBitMap* cms_bm,
                              CMSMarkStack*  mark_stack);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   void reset(HeapWord* addr);
 };
 
@@ -1420,8 +1420,9 @@
 // "empty" (i.e. the bit vector doesn't have any 1-bits).
 class FalseBitMapClosure: public BitMapClosure {
  public:
-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
     guarantee(false, "Should not have a 1 bit");
+    return true;
   }
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A BufferingOops closure tries to separate out the cost of finding roots
+// from the cost of applying closures to them.  It maintains an array of
+// ref-containing locations.  Until the array is full, applying the closure
+// to an oop* merely records that location in the array.  Since this
+// closure app cost is small, an elapsed timer can approximately attribute
+// all of this cost to the cost of finding the roots.  When the array fills
+// up, the wrapped closure is applied to all elements, keeping track of
+// this elapsed time of this process, and leaving the array empty.
+// The caller must be sure to call "done" to process any unprocessed
+// buffered entriess.
+
+class Generation;
+class HeapRegion;
+
+class BufferingOopClosure: public OopClosure {
+protected:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop          *_buffer[BufferLength];
+  oop         **_buffer_top;
+  oop         **_buffer_curr;
+
+  OopClosure  *_oc;
+  double       _closure_app_seconds;
+
+  void process_buffer () {
+
+    double start = os::elapsedTime();
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      _oc->do_oop(*curr);
+    }
+    _buffer_curr = _buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopClosure (OopClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _closure_app_seconds(0.0) { }
+};
+
+class BufferingOopsInGenClosure: public OopsInGenClosure {
+  BufferingOopClosure _boc;
+  OopsInGenClosure* _oc;
+public:
+  BufferingOopsInGenClosure(OopsInGenClosure *oc) :
+    _boc(oc), _oc(oc) {}
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop* p) {
+    assert(generation()->is_in_reserved(p), "Must be in!");
+    _boc.do_oop(p);
+  }
+
+  void done() {
+    _boc.done();
+  }
+
+  double closure_app_seconds () {
+    return _boc.closure_app_seconds();
+  }
+
+  void set_generation(Generation* gen) {
+    OopsInGenClosure::set_generation(gen);
+    _oc->set_generation(gen);
+  }
+
+  void reset_generation() {
+    // Make sure we finish the current work with the current generation.
+    _boc.done();
+    OopsInGenClosure::reset_generation();
+    _oc->reset_generation();
+  }
+
+};
+
+
+class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure {
+private:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop                      *_buffer[BufferLength];
+  oop                     **_buffer_top;
+  oop                     **_buffer_curr;
+
+  HeapRegion               *_hr_buffer[BufferLength];
+  HeapRegion              **_hr_curr;
+
+  OopsInHeapRegionClosure  *_oc;
+  double                    _closure_app_seconds;
+
+  void process_buffer () {
+
+    assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer),
+           "the two lengths should be the same");
+
+    double start = os::elapsedTime();
+    HeapRegion **hr_curr = _hr_buffer;
+    HeapRegion *hr_prev = NULL;
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      HeapRegion *region = *hr_curr;
+      if (region != hr_prev) {
+        _oc->set_region(region);
+        hr_prev = region;
+      }
+      _oc->do_oop(*curr);
+      ++hr_curr;
+    }
+    _buffer_curr = _buffer;
+    _hr_curr = _hr_buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop *p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+    *_hr_curr = _from;
+    ++_hr_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _hr_curr(_hr_buffer),
+    _closure_app_seconds(0.0) { }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,409 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_collectionSetChooser.cpp.incl"
+
+CSetChooserCache::CSetChooserCache() {
+  for (int i = 0; i < CacheLength; ++i)
+    _cache[i] = NULL;
+  clear();
+}
+
+void CSetChooserCache::clear() {
+  _occupancy = 0;
+  _first = 0;
+  for (int i = 0; i < CacheLength; ++i) {
+    HeapRegion *hr = _cache[i];
+    if (hr != NULL)
+      hr->set_sort_index(-1);
+    _cache[i] = NULL;
+  }
+}
+
+#ifndef PRODUCT
+bool CSetChooserCache::verify() {
+  int index = _first;
+  HeapRegion *prev = NULL;
+  for (int i = 0; i < _occupancy; ++i) {
+    guarantee(_cache[index] != NULL, "cache entry should not be empty");
+    HeapRegion *hr = _cache[index];
+    guarantee(!hr->is_young(), "should not be young!");
+    if (prev != NULL) {
+      guarantee(prev->gc_efficiency() >= hr->gc_efficiency(),
+                "cache should be correctly ordered");
+    }
+    guarantee(hr->sort_index() == get_sort_index(index),
+              "sort index should be correct");
+    index = trim_index(index + 1);
+    prev = hr;
+  }
+
+  for (int i = 0; i < (CacheLength - _occupancy); ++i) {
+    guarantee(_cache[index] == NULL, "cache entry should be empty");
+    index = trim_index(index + 1);
+  }
+
+  guarantee(index == _first, "we should have reached where we started from");
+  return true;
+}
+#endif // PRODUCT
+
+void CSetChooserCache::insert(HeapRegion *hr) {
+  assert(!is_full(), "cache should not be empty");
+  hr->calc_gc_efficiency();
+
+  int empty_index;
+  if (_occupancy == 0) {
+    empty_index = _first;
+  } else {
+    empty_index = trim_index(_first + _occupancy);
+    assert(_cache[empty_index] == NULL, "last slot should be empty");
+    int last_index = trim_index(empty_index - 1);
+    HeapRegion *last = _cache[last_index];
+    assert(last != NULL,"as the cache is not empty, last should not be empty");
+    while (empty_index != _first &&
+           last->gc_efficiency() < hr->gc_efficiency()) {
+      _cache[empty_index] = last;
+      last->set_sort_index(get_sort_index(empty_index));
+      empty_index = last_index;
+      last_index = trim_index(last_index - 1);
+      last = _cache[last_index];
+    }
+  }
+  _cache[empty_index] = hr;
+  hr->set_sort_index(get_sort_index(empty_index));
+
+  ++_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+HeapRegion *CSetChooserCache::remove_first() {
+  if (_occupancy > 0) {
+    assert(_cache[_first] != NULL, "cache should have at least one region");
+    HeapRegion *ret = _cache[_first];
+    _cache[_first] = NULL;
+    ret->set_sort_index(-1);
+    --_occupancy;
+    _first = trim_index(_first + 1);
+    assert(verify(), "cache should be consistent");
+    return ret;
+  } else {
+    return NULL;
+  }
+}
+
+// this is a bit expensive... but we expect that it should not be called
+// to often.
+void CSetChooserCache::remove(HeapRegion *hr) {
+  assert(_occupancy > 0, "cache should not be empty");
+  assert(hr->sort_index() < -1, "should already be in the cache");
+  int index = get_index(hr->sort_index());
+  assert(_cache[index] == hr, "index should be correct");
+  int next_index = trim_index(index + 1);
+  int last_index = trim_index(_first + _occupancy - 1);
+  while (index != last_index) {
+    assert(_cache[next_index] != NULL, "should not be null");
+    _cache[index] = _cache[next_index];
+    _cache[index]->set_sort_index(get_sort_index(index));
+
+    index = next_index;
+    next_index = trim_index(next_index+1);
+  }
+  assert(index == last_index, "should have reached the last one");
+  _cache[index] = NULL;
+  hr->set_sort_index(-1);
+  --_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
+  if (hr1 == NULL) {
+    if (hr2 == NULL) return 0;
+    else return 1;
+  } else if (hr2 == NULL) {
+    return -1;
+  }
+  if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1;
+  else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1;
+  else return 0;
+}
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  return orderRegions(*hr1p, *hr2p);
+}
+
+CollectionSetChooser::CollectionSetChooser() :
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _markedRegions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                             (void*)&_markedRegions,
+                                             ResourceObj::C_HEAP),
+                  100),
+                 true),
+  _curMarkedIndex(0),
+  _numMarkedRegions(0),
+  _unmarked_age_1_returned_as_new(false),
+  _first_par_unreserved_idx(0)
+{}
+
+
+
+#ifndef PRODUCT
+bool CollectionSetChooser::verify() {
+  int index = 0;
+  guarantee(_curMarkedIndex <= _numMarkedRegions,
+            "_curMarkedIndex should be within bounds");
+  while (index < _curMarkedIndex) {
+    guarantee(_markedRegions.at(index++) == NULL,
+              "all entries before _curMarkedIndex should be NULL");
+  }
+  HeapRegion *prev = NULL;
+  while (index < _numMarkedRegions) {
+    HeapRegion *curr = _markedRegions.at(index++);
+    if (curr != NULL) {
+      int si = curr->sort_index();
+      guarantee(!curr->is_young(), "should not be young!");
+      guarantee(si > -1 && si == (index-1), "sort index invariant");
+      if (prev != NULL) {
+        guarantee(orderRegions(prev, curr) != 1, "regions should be sorted");
+      }
+      prev = curr;
+    }
+  }
+  return _cache.verify();
+}
+#endif
+
+bool
+CollectionSetChooser::addRegionToCache() {
+  assert(!_cache.is_full(), "cache should not be full");
+
+  HeapRegion *hr = NULL;
+  while (hr == NULL && _curMarkedIndex < _numMarkedRegions) {
+    hr = _markedRegions.at(_curMarkedIndex++);
+  }
+  if (hr == NULL)
+    return false;
+  assert(!hr->is_young(), "should not be young!");
+  assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant");
+  _markedRegions.at_put(hr->sort_index(), NULL);
+  _cache.insert(hr);
+  assert(!_cache.is_empty(), "cache should not be empty");
+  assert(verify(), "cache should be consistent");
+  return false;
+}
+
+void
+CollectionSetChooser::fillCache() {
+  while (!_cache.is_full() && addRegionToCache()) {
+  }
+}
+
+void
+CollectionSetChooser::sortMarkedHeapRegions() {
+  guarantee(_cache.is_empty(), "cache should be empty");
+  // First trim any unused portion of the top in the parallel case.
+  if (_first_par_unreserved_idx > 0) {
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Truncating _markedRegions from %d to %d.\n",
+                          _markedRegions.length(), _first_par_unreserved_idx);
+    }
+    assert(_first_par_unreserved_idx <= _markedRegions.length(),
+           "Or we didn't reserved enough length");
+    _markedRegions.trunc_to(_first_par_unreserved_idx);
+  }
+  _markedRegions.sort(orderRegions);
+  assert(_numMarkedRegions <= _markedRegions.length(), "Requirement");
+  assert(_numMarkedRegions == 0
+         || _markedRegions.at(_numMarkedRegions-1) != NULL,
+         "Testing _numMarkedRegions");
+  assert(_numMarkedRegions == _markedRegions.length()
+         || _markedRegions.at(_numMarkedRegions) == NULL,
+         "Testing _numMarkedRegions");
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("     Sorted %d marked regions.", _numMarkedRegions);
+  }
+  for (int i = 0; i < _numMarkedRegions; i++) {
+    assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
+    _markedRegions.at(i)->set_sort_index(i);
+    if (G1PrintRegionLivenessInfo > 0) {
+      if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:");
+      if (i < G1PrintRegionLivenessInfo ||
+          (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) {
+        HeapRegion* hr = _markedRegions.at(i);
+        size_t u = hr->used();
+        gclog_or_tty->print_cr("  Region %d: %d used, %d max live, %5.2f%%.",
+                      i, u, hr->max_live_bytes(),
+                      100.0*(float)hr->max_live_bytes()/(float)u);
+      }
+    }
+  }
+  if (G1PolicyVerbose > 1)
+    printSortedHeapRegions();
+  assert(verify(), "should now be sorted");
+}
+
+void
+printHeapRegion(HeapRegion *hr) {
+  if (hr->isHumongous())
+    gclog_or_tty->print("H: ");
+  if (hr->in_collection_set())
+    gclog_or_tty->print("CS: ");
+  if (hr->popular())
+    gclog_or_tty->print("pop: ");
+  gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) "
+                         "[" PTR_FORMAT ", " PTR_FORMAT"] "
+                         "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.",
+                         hr, hr->is_young() ? "Y " : "  ",
+                         hr->is_marked()? "M1" : "M0",
+                         hr->bottom(), hr->end(),
+                         hr->used()/K, hr->garbage_bytes()/K);
+}
+
+void
+CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
+  assert(!hr->isHumongous(),
+         "Humongous regions shouldn't be added to the collection set");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.append(hr);
+  _numMarkedRegions++;
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::
+prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
+  _first_par_unreserved_idx = 0;
+  size_t max_waste = ParallelGCThreads * chunkSize;
+  // it should be aligned with respect to chunkSize
+  size_t aligned_n_regions =
+                     (n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
+  assert( aligned_n_regions % chunkSize == 0, "should be aligned" );
+  _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
+}
+
+jint
+CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
+  jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
+  assert(_markedRegions.length() > res + n_regions - 1,
+         "Should already have been expanded");
+  return res - n_regions;
+}
+
+void
+CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
+  assert(_markedRegions.at(index) == NULL, "precondition");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.at_put(index, hr);
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) {
+  (void)Atomic::add(inc_by, &_numMarkedRegions);
+}
+
+void
+CollectionSetChooser::clearMarkedHeapRegions(){
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    HeapRegion* r =   _markedRegions.at(i);
+    if (r != NULL) r->set_sort_index(-1);
+  }
+  _markedRegions.clear();
+  _curMarkedIndex = 0;
+  _numMarkedRegions = 0;
+  _cache.clear();
+};
+
+void
+CollectionSetChooser::updateAfterFullCollection() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  clearMarkedHeapRegions();
+}
+
+void
+CollectionSetChooser::printSortedHeapRegions() {
+  gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
+                _numMarkedRegions);
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    printHeapRegion(_markedRegions.at(i));
+  }
+  gclog_or_tty->print_cr("Done sorted heap region print");
+}
+
+void CollectionSetChooser::removeRegion(HeapRegion *hr) {
+  int si = hr->sort_index();
+  assert(si == -1 || hr->is_marked(), "Sort index not valid.");
+  if (si > -1) {
+    assert(_markedRegions.at(si) == hr, "Sort index not valid." );
+    _markedRegions.at_put(si, NULL);
+  } else if (si < -1) {
+    assert(_cache.region_in_cache(hr), "should be in the cache");
+    _cache.remove(hr);
+    assert(hr->sort_index() == -1, "sort index invariant");
+  }
+  hr->set_sort_index(-1);
+}
+
+// if time_remaining < 0.0, then this method should try to return
+// a region, whether it fits within the remaining time or not
+HeapRegion*
+CollectionSetChooser::getNextMarkedRegion(double time_remaining,
+                                          double avg_prediction) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  fillCache();
+  if (_cache.is_empty()) {
+    assert(_curMarkedIndex == _numMarkedRegions,
+           "if cache is empty, list should also be empty");
+    return NULL;
+  }
+
+  HeapRegion *hr = _cache.get_first();
+  assert(hr != NULL, "if cache not empty, first entry should be non-null");
+  double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false);
+
+  if (g1p->adaptive_young_list_length()) {
+    if (time_remaining - predicted_time < 0.0) {
+      g1h->check_if_region_is_too_expensive(predicted_time);
+      return NULL;
+    }
+  } else {
+    if (predicted_time > 2.0 * avg_prediction) {
+      return NULL;
+    }
+  }
+
+  HeapRegion *hr2 = _cache.remove_first();
+  assert(hr == hr2, "cache contents should not have changed");
+
+  return hr;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// We need to sort heap regions by collection desirability.
+
+class CSetChooserCache {
+private:
+  enum {
+    CacheLength = 16
+  } PrivateConstants;
+
+  HeapRegion*  _cache[CacheLength];
+  int          _occupancy; // number of region in cache
+  int          _first; // "first" region in the cache
+
+  // adding CacheLength to deal with negative values
+  inline int trim_index(int index) {
+    return (index + CacheLength) % CacheLength;
+  }
+
+  inline int get_sort_index(int index) {
+    return -index-2;
+  }
+  inline int get_index(int sort_index) {
+    return -sort_index-2;
+  }
+
+public:
+  CSetChooserCache(void);
+
+  inline int occupancy(void) { return _occupancy; }
+  inline bool is_full()      { return _occupancy == CacheLength; }
+  inline bool is_empty()     { return _occupancy == 0; }
+
+  void clear(void);
+  void insert(HeapRegion *hr);
+  HeapRegion *remove_first(void);
+  void remove (HeapRegion *hr);
+  inline HeapRegion *get_first(void) {
+    return _cache[_first];
+  }
+
+#ifndef PRODUCT
+  bool verify (void);
+  bool region_in_cache(HeapRegion *hr) {
+    int sort_index = hr->sort_index();
+    if (sort_index < -1) {
+      int index = get_index(sort_index);
+      guarantee(index < CacheLength, "should be within bounds");
+      return _cache[index] == hr;
+    } else
+      return 0;
+  }
+#endif // PRODUCT
+};
+
+class CollectionSetChooser: public CHeapObj {
+
+  GrowableArray<HeapRegion*> _markedRegions;
+  int _curMarkedIndex;
+  int _numMarkedRegions;
+  CSetChooserCache _cache;
+
+  // True iff last collection pause ran of out new "age 0" regions, and
+  // returned an "age 1" region.
+  bool _unmarked_age_1_returned_as_new;
+
+  jint _first_par_unreserved_idx;
+
+public:
+
+  HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction);
+
+  CollectionSetChooser();
+
+  void printSortedHeapRegions();
+
+  void sortMarkedHeapRegions();
+  void fillCache();
+  bool addRegionToCache(void);
+  void addMarkedHeapRegion(HeapRegion *hr);
+
+  // Must be called before calls to getParMarkedHeapRegionChunk.
+  // "n_regions" is the number of regions, "chunkSize" the chunk size.
+  void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize);
+  // Returns the first index in a contiguous chunk of "n_regions" indexes
+  // that the calling thread has reserved.  These must be set by the
+  // calling thread using "setMarkedHeapRegion" (to NULL if necessary).
+  jint getParMarkedHeapRegionChunk(jint n_regions);
+  // Set the marked array entry at index to hr.  Careful to claim the index
+  // first if in parallel.
+  void setMarkedHeapRegion(jint index, HeapRegion* hr);
+  // Atomically increment the number of claimed regions by "inc_by".
+  void incNumMarkedHeapRegions(jint inc_by);
+
+  void clearMarkedHeapRegions();
+
+  void updateAfterFullCollection();
+
+  // Ensure that "hr" is not a member of the marked region array or the cache
+  void removeRegion(HeapRegion* hr);
+
+  bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; }
+
+  // Returns true if the used portion of "_markedRegions" is properly
+  // sorted, otherwise asserts false.
+#ifndef PRODUCT
+  bool verify(void);
+  bool regionProperlyOrdered(HeapRegion* r) {
+    int si = r->sort_index();
+    return (si == -1) ||
+      (si > -1 && _markedRegions.at(si) == r) ||
+      (si < -1 && _cache.region_in_cache(r));
+  }
+#endif
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1Refine.cpp.incl"
+
+bool ConcurrentG1Refine::_enabled = false;
+
+ConcurrentG1Refine::ConcurrentG1Refine() :
+  _pya(PYA_continue), _last_pya(PYA_continue),
+  _last_cards_during(), _first_traversal(false),
+  _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
+  _hot_cache(NULL),
+  _def_use_cache(false), _use_cache(false),
+  _n_periods(0), _total_cards(0), _total_travs(0)
+{
+  if (G1ConcRefine) {
+    _cg1rThread = new ConcurrentG1RefineThread(this);
+    assert(cg1rThread() != NULL, "Conc refine should have been created");
+    assert(cg1rThread()->cg1r() == this,
+           "Conc refine thread should refer to this");
+  } else {
+    _cg1rThread = NULL;
+  }
+}
+
+void ConcurrentG1Refine::init() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    _n_card_counts =
+      (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
+    _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
+    for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
+    ModRefBarrierSet* bs = g1h->mr_bs();
+    guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
+    CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
+    _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
+    if (G1ConcRSCountTraversals) {
+      _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      for (int i = 0; i < 256; i++) {
+        _cur_card_count_histo[i] = 0;
+        _cum_card_count_histo[i] = 0;
+      }
+    }
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    _def_use_cache = true;
+    _use_cache = true;
+    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
+    _n_hot = 0;
+    _hot_cache_idx = 0;
+  }
+}
+
+ConcurrentG1Refine::~ConcurrentG1Refine() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    assert(_card_counts != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
+    assert(_cur_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
+    assert(_cum_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    assert(_hot_cache != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
+  }
+}
+
+bool ConcurrentG1Refine::refine() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards();
+  clear_hot_cache();  // Any previous values in this are now invalid.
+  g1h->g1_rem_set()->concurrentRefinementPass(this);
+  _traversals++;
+  unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
+  unsigned cards_during = cards_after-cards_before;
+  // If this is the first traversal in the current enabling
+  // and we did some cards, or if the number of cards found is decreasing
+  // sufficiently quickly, then keep going.  Otherwise, sleep a while.
+  bool res =
+    (_first_traversal && cards_during > 0)
+    ||
+    (!_first_traversal && cards_during * 3 < _last_cards_during * 2);
+  _last_cards_during = cards_during;
+  _first_traversal = false;
+  return res;
+}
+
+void ConcurrentG1Refine::enable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (!_enabled) {
+    _enabled = true;
+    _first_traversal = true; _last_cards_during = 0;
+    G1ConcRefine_mon->notify_all();
+  }
+}
+
+unsigned ConcurrentG1Refine::disable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (_enabled) {
+    _enabled = false;
+    return _traversals;
+  } else {
+    return 0;
+  }
+}
+
+void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
+  G1ConcRefine_mon->lock();
+  while (!_enabled) {
+    G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+  G1ConcRefine_mon->unlock();
+  _traversals = 0;
+};
+
+void ConcurrentG1Refine::set_pya_restart() {
+  // If we're using the log-based RS barrier, the above will cause
+  // in-progress traversals of completed log buffers to quit early; we will
+  // also abandon all other buffers.
+  if (G1RSBarrierUseQueue) {
+    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+    dcqs.abandon_logs();
+    if (_cg1rThread->do_traversal()) {
+      _pya = PYA_restart;
+    } else {
+      _cg1rThread->set_do_traversal(true);
+      // Reset the post-yield actions.
+      _pya = PYA_continue;
+      _last_pya = PYA_continue;
+    }
+  } else {
+    _pya = PYA_restart;
+  }
+}
+
+void ConcurrentG1Refine::set_pya_cancel() {
+  _pya = PYA_cancel;
+}
+
+PostYieldAction ConcurrentG1Refine::get_pya() {
+  if (_pya != PYA_continue) {
+    jint val = _pya;
+    while (true) {
+      jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
+      if (val_read == val) {
+        PostYieldAction res = (PostYieldAction)val;
+        assert(res != PYA_continue, "Only the refine thread should reset.");
+        _last_pya = res;
+        return res;
+      } else {
+        val = val_read;
+      }
+    }
+  }
+  // QQQ WELL WHAT DO WE RETURN HERE???
+  // make up something!
+  return PYA_continue;
+}
+
+PostYieldAction ConcurrentG1Refine::get_last_pya() {
+  PostYieldAction res = _last_pya;
+  _last_pya = PYA_continue;
+  return res;
+}
+
+bool ConcurrentG1Refine::do_traversal() {
+  return _cg1rThread->do_traversal();
+}
+
+int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
+  size_t card_num = (card_ptr - _ct_bot);
+  guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
+  unsigned char cnt = _card_counts[card_num];
+  if (cnt < 255) _card_counts[card_num]++;
+  return cnt;
+  _total_travs++;
+}
+
+jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
+  int count = add_card_count(card_ptr);
+  // Count previously unvisited cards.
+  if (count == 0) _total_cards++;
+  // We'll assume a traversal unless we store it in the cache.
+  if (count < G1ConcRSHotCardLimit) {
+    _total_travs++;
+    return card_ptr;
+  }
+  // Otherwise, it's hot.
+  jbyte* res = NULL;
+  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
+  if (_n_hot == _hot_cache_size) {
+    _total_travs++;
+    res = _hot_cache[_hot_cache_idx];
+    _n_hot--;
+  }
+  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
+  _hot_cache[_hot_cache_idx] = card_ptr;
+  _hot_cache_idx++;
+  if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
+  _n_hot++;
+  return res;
+}
+
+
+void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
+  assert(!use_cache(), "cache should be disabled");
+  int start_ind = _hot_cache_idx-1;
+  for (int i = 0; i < _n_hot; i++) {
+    int ind = start_ind - i;
+    if (ind < 0) ind = ind + _hot_cache_size;
+    jbyte* entry = _hot_cache[ind];
+    if (entry != NULL) {
+      g1rs->concurrentRefineOneCard(entry, worker_i);
+    }
+  }
+  _n_hot = 0;
+  _hot_cache_idx = 0;
+}
+
+void ConcurrentG1Refine::clear_and_record_card_counts() {
+  if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
+  _n_periods++;
+  if (G1ConcRSCountTraversals) {
+    for (size_t i = 0; i < _n_card_counts; i++) {
+      unsigned char bucket = _card_counts[i];
+      _cur_card_count_histo[bucket]++;
+      _card_counts[i] = 0;
+    }
+    gclog_or_tty->print_cr("Card counts:");
+    for (int i = 0; i < 256; i++) {
+      if (_cur_card_count_histo[i] > 0) {
+        gclog_or_tty->print_cr("  %3d: %9d", i, _cur_card_count_histo[i]);
+        _cum_card_count_histo[i] += _cur_card_count_histo[i];
+        _cur_card_count_histo[i] = 0;
+      }
+    }
+  } else {
+    assert(G1ConcRSLogCacheSize > 0, "Logic");
+    Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
+                        _n_card_counts / HeapWordSize);
+  }
+}
+
+void
+ConcurrentG1Refine::
+print_card_count_histo_range(unsigned* histo, int from, int to,
+                             float& cum_card_pct,
+                             float& cum_travs_pct) {
+  unsigned cards = 0;
+  unsigned travs = 0;
+  guarantee(to <= 256, "Precondition");
+  for (int i = from; i < to-1; i++) {
+    cards += histo[i];
+    travs += histo[i] * i;
+  }
+  if (to == 256) {
+    unsigned histo_card_sum = 0;
+    unsigned histo_trav_sum = 0;
+    for (int i = 1; i < 255; i++) {
+      histo_trav_sum += histo[i] * i;
+    }
+    cards += histo[255];
+    // correct traversals for the last one.
+    unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
+    travs += travs_255;
+
+  } else {
+    cards += histo[to-1];
+    travs += histo[to-1] * (to-1);
+  }
+  float fperiods = (float)_n_periods;
+  float f_tot_cards = (float)_total_cards/fperiods;
+  float f_tot_travs = (float)_total_travs/fperiods;
+  if (cards > 0) {
+    float fcards = (float)cards/fperiods;
+    float ftravs = (float)travs/fperiods;
+    if (to == 256) {
+      gclog_or_tty->print(" %4d-       %10.2f%10.2f", from, fcards, ftravs);
+    } else {
+      gclog_or_tty->print(" %4d-%4d   %10.2f%10.2f", from, to-1, fcards, ftravs);
+    }
+    float pct_cards = fcards*100.0/f_tot_cards;
+    cum_card_pct += pct_cards;
+    float pct_travs = ftravs*100.0/f_tot_travs;
+    cum_travs_pct += pct_travs;
+    gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
+                  pct_cards, cum_card_pct,
+                  pct_travs, cum_travs_pct);
+  }
+}
+
+void ConcurrentG1Refine::print_final_card_counts() {
+  if (!G1ConcRSCountTraversals) return;
+
+  gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
+                _total_travs, _total_cards);
+  float fperiods = (float)_n_periods;
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals, %8.2f cards, "
+                "per collection.", (float)_total_travs/fperiods,
+                (float)_total_cards/fperiods);
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals/distinct "
+                "dirty card.\n",
+                _total_cards > 0 ?
+                (float)_total_travs/(float)_total_cards : 0.0);
+
+
+  gclog_or_tty->print_cr("Histogram:\n\n%10s   %10s%10s%10s%10s%10s%10s",
+                "range", "# cards", "# travs", "% cards", "(cum)",
+                "% travs", "(cum)");
+  gclog_or_tty->print_cr("------------------------------------------------------------"
+                "-------------");
+  float cum_cards_pct = 0.0;
+  float cum_travs_pct = 0.0;
+  for (int i = 1; i < 10; i++) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+1,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  for (int i = 10; i < 100; i += 10) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+10,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  print_card_count_histo_range(_cum_card_count_histo, 100, 150,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 200,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 255,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 255, 256,
+                               cum_cards_pct, cum_travs_pct);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward decl
+class ConcurrentG1RefineThread;
+class G1RemSet;
+
+// What to do after a yield:
+enum PostYieldAction {
+  PYA_continue,  // Continue the traversal
+  PYA_restart,   // Restart
+  PYA_cancel     // It's been completed by somebody else: cancel.
+};
+
+class ConcurrentG1Refine {
+  ConcurrentG1RefineThread* _cg1rThread;
+
+  volatile jint _pya;
+  PostYieldAction _last_pya;
+
+  static bool _enabled;  // Protected by G1ConcRefine_mon.
+  unsigned _traversals;
+
+  // Number of cards processed during last refinement traversal.
+  unsigned _first_traversal;
+  unsigned _last_cards_during;
+
+  // The cache for card refinement.
+  bool     _use_cache;
+  bool     _def_use_cache;
+  size_t _n_periods;
+  size_t _total_cards;
+  size_t _total_travs;
+
+  unsigned char*  _card_counts;
+  unsigned _n_card_counts;
+  const jbyte* _ct_bot;
+  unsigned* _cur_card_count_histo;
+  unsigned* _cum_card_count_histo;
+  jbyte**  _hot_cache;
+  int      _hot_cache_size;
+  int      _n_hot;
+  int      _hot_cache_idx;
+
+  // Returns the count of this card after incrementing it.
+  int add_card_count(jbyte* card_ptr);
+
+  void print_card_count_histo_range(unsigned* histo, int from, int to,
+                                    float& cum_card_pct,
+                                    float& cum_travs_pct);
+ public:
+  ConcurrentG1Refine();
+  ~ConcurrentG1Refine();
+
+  void init(); // Accomplish some initialization that has to wait.
+
+  // Enabled Conc refinement, waking up thread if necessary.
+  void enable();
+
+  // Returns the number of traversals performed since this refiner was enabled.
+  unsigned disable();
+
+  // Requires G1ConcRefine_mon to be held.
+  bool enabled() { return _enabled; }
+
+  // Returns only when G1 concurrent refinement has been enabled.
+  void wait_for_ConcurrentG1Refine_enabled();
+
+  // Do one concurrent refinement pass over the card table.  Returns "true"
+  // if heuristics determine that another pass should be done immediately.
+  bool refine();
+
+  // Indicate that an in-progress refinement pass should start over.
+  void set_pya_restart();
+  // Indicate that an in-progress refinement pass should quit.
+  void set_pya_cancel();
+
+  // Get the appropriate post-yield action.  Also sets last_pya.
+  PostYieldAction get_pya();
+
+  // The last PYA read by "get_pya".
+  PostYieldAction get_last_pya();
+
+  bool do_traversal();
+
+  ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
+
+  // If this is the first entry for the slot, writes into the cache and
+  // returns NULL.  If it causes an eviction, returns the evicted pointer.
+  // Otherwise, its a cache hit, and returns NULL.
+  jbyte* cache_insert(jbyte* card_ptr);
+
+  // Process the cached entries.
+  void clean_up_cache(int worker_i, G1RemSet* g1rs);
+
+  // Discard entries in the hot cache.
+  void clear_hot_cache() {
+    _hot_cache_idx = 0; _n_hot = 0;
+  }
+
+  bool hot_cache_is_empty() { return _n_hot == 0; }
+
+  bool use_cache() { return _use_cache; }
+  void set_use_cache(bool b) {
+    if (b) _use_cache = _def_use_cache;
+    else   _use_cache = false;
+  }
+
+  void clear_and_record_card_counts();
+  void print_final_card_counts();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1RefineThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+ConcurrentG1RefineThread::
+ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
+  ConcurrentGCThread(),
+  _cg1r(cg1r),
+  _started(false),
+  _in_progress(false),
+  _do_traversal(false),
+  _vtime_accum(0.0),
+  _co_tracker(G1CRGroup),
+  _interval_ms(5.0)
+{
+  create_and_start();
+}
+
+const long timeout = 200; // ms.
+
+void ConcurrentG1RefineThread::traversalBasedRefinement() {
+  _cg1r->wait_for_ConcurrentG1Refine_enabled();
+  MutexLocker x(G1ConcRefine_mon);
+  while (_cg1r->enabled()) {
+    MutexUnlocker ux(G1ConcRefine_mon);
+    ResourceMark rm;
+    HandleMark   hm;
+
+    if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass");
+    _sts.join();
+    bool no_sleep = _cg1r->refine();
+    _sts.leave();
+    if (!no_sleep) {
+      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+      // We do this only for the timeout; we don't expect this to be signalled.
+      CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
+    }
+  }
+}
+
+void ConcurrentG1RefineThread::queueBasedRefinement() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  // Wait for completed log buffers to exist.
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    while (!_do_traversal && !dcqs.process_completed_buffers() &&
+           !_should_terminate) {
+      DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+
+  if (_should_terminate) {
+    return;
+  }
+
+  // Now we take them off (this doesn't hold locks while it applies
+  // closures.)  (If we did a full collection, then we'll do a full
+  // traversal.
+  _sts.join();
+  if (_do_traversal) {
+    (void)_cg1r->refine();
+    switch (_cg1r->get_last_pya()) {
+    case PYA_cancel: case PYA_continue:
+      // Continue was caught and handled inside "refine".  If it's still
+      // "continue" when we get here, we're done.
+      _do_traversal = false;
+      break;
+    case PYA_restart:
+      assert(_do_traversal, "Because of Full GC.");
+      break;
+    }
+  } else {
+    int n_logs = 0;
+    int lower_limit = 0;
+    double start_vtime_sec; // only used when G1SmoothConcRefine is on
+    int prev_buffer_num; // only used when G1SmoothConcRefine is on
+
+    if (G1SmoothConcRefine) {
+      lower_limit = 0;
+      start_vtime_sec = os::elapsedVTime();
+      prev_buffer_num = (int) dcqs.completed_buffers_num();
+    } else {
+      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
+    }
+    while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) {
+      double end_vtime_sec;
+      double elapsed_vtime_sec;
+      int elapsed_vtime_ms;
+      int curr_buffer_num;
+
+      if (G1SmoothConcRefine) {
+        end_vtime_sec = os::elapsedVTime();
+        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
+        curr_buffer_num = (int) dcqs.completed_buffers_num();
+
+        if (curr_buffer_num > prev_buffer_num ||
+            curr_buffer_num > DCQBarrierProcessCompletedThreshold) {
+          decreaseInterval(elapsed_vtime_ms);
+        } else if (curr_buffer_num < prev_buffer_num) {
+          increaseInterval(elapsed_vtime_ms);
+        }
+      }
+
+      sample_young_list_rs_lengths();
+      _co_tracker.update(false);
+
+      if (G1SmoothConcRefine) {
+        start_vtime_sec = os::elapsedVTime();
+        prev_buffer_num = curr_buffer_num;
+
+        _sts.leave();
+        os::sleep(Thread::current(), (jlong) _interval_ms, false);
+        _sts.join();
+      }
+
+      n_logs++;
+    }
+    // Make sure we harvest the PYA, if any.
+    (void)_cg1r->get_pya();
+  }
+  _sts.leave();
+}
+
+void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  if (g1p->adaptive_young_list_length()) {
+    int regions_visited = 0;
+
+    g1h->young_list_rs_length_sampling_init();
+    while (g1h->young_list_rs_length_sampling_more()) {
+      g1h->young_list_rs_length_sampling_next();
+      ++regions_visited;
+
+      // we try to yield every time we visit 10 regions
+      if (regions_visited == 10) {
+        if (_sts.should_yield()) {
+          _sts.yield("G1 refine");
+          // we just abandon the iteration
+          break;
+        }
+        regions_visited = 0;
+      }
+    }
+
+    g1p->check_prediction_validity();
+  }
+}
+
+void ConcurrentG1RefineThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    if (G1RSBarrierUseQueue) {
+      queueBasedRefinement();
+    } else {
+      traversalBasedRefinement();
+    }
+    _sts.join();
+    _co_tracker.update();
+    _sts.leave();
+    if (os::supports_vtime()) {
+      _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    } else {
+      _vtime_accum = 0.0;
+    }
+  }
+  _sts.join();
+  _co_tracker.update(true);
+  _sts.leave();
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentG1RefineThread::yield() {
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield");
+  _sts.yield("G1 refine");
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end");
+}
+
+void ConcurrentG1RefineThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  {
+    MutexLockerEx mu(Terminator_lock);
+    _should_terminate = true;
+  }
+
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    DirtyCardQ_CBL_mon->notify_all();
+  }
+
+  {
+    MutexLockerEx mu(Terminator_lock);
+    while (!_has_terminated) {
+      Terminator_lock->wait();
+    }
+  }
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop");
+}
+
+void ConcurrentG1RefineThread::print() {
+  gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentG1RefineThread::set_do_traversal(bool b) {
+  _do_traversal = b;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward Decl.
+class ConcurrentG1Refine;
+
+// The G1 Concurrent Refinement Thread (could be several in the future).
+
+class ConcurrentG1RefineThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class G1CollectedHeap;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Initial virtual time.
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentG1Refine*              _cg1r;
+  bool                             _started;
+  bool                             _in_progress;
+  volatile bool                    _restart;
+
+  COTracker                        _co_tracker;
+  double                           _interval_ms;
+
+  bool                             _do_traversal;
+
+  void decreaseInterval(int processing_time_ms) {
+    double min_interval_ms = (double) processing_time_ms;
+    _interval_ms = 0.8 * _interval_ms;
+    if (_interval_ms < min_interval_ms)
+      _interval_ms = min_interval_ms;
+  }
+  void increaseInterval(int processing_time_ms) {
+    double max_interval_ms = 9.0 * (double) processing_time_ms;
+    _interval_ms = 1.1 * _interval_ms;
+    if (max_interval_ms > 0 && _interval_ms > max_interval_ms)
+      _interval_ms = max_interval_ms;
+  }
+
+  void sleepBeforeNextCycle();
+
+  void traversalBasedRefinement();
+
+  void queueBasedRefinement();
+
+  // For use by G1CollectedHeap, which is a friend.
+  static SuspendibleThreadSet* sts() { return &_sts; }
+
+ public:
+  // Constructor
+  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r);
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum() { return _vtime_accum; }
+
+  ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
+
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  void            set_do_traversal(bool b);
+  bool            do_traversal() { return _do_traversal; }
+
+  void            sample_young_list_rs_lengths();
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,3979 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentMark.cpp.incl"
+
+//
+// CMS Bit Map Wrapper
+
+CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
+  _bm((uintptr_t*)NULL,0),
+  _shifter(shifter) {
+  _bmStartWord = (HeapWord*)(rs.base());
+  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
+  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
+                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
+
+  guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
+  // For now we'll just commit all of the bit map up fromt.
+  // Later on we'll try to be more parsimonious with swap.
+  guarantee(_virtual_space.initialize(brs, brs.size()),
+            "couldn't reseve backing store for CMS bit map");
+  assert(_virtual_space.committed_size() == brs.size(),
+         "didn't reserve backing store for all of CMS bit map?");
+  _bm.set_map((uintptr_t*)_virtual_space.low());
+  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
+         _bmWordSize, "inconsistency in bit map sizing");
+  _bm.set_size(_bmWordSize >> _shifter);
+}
+
+HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
+                                               HeapWord* limit) const {
+  // First we must round addr *up* to a possible object boundary.
+  addr = (HeapWord*)align_size_up((intptr_t)addr,
+                                  HeapWordSize << _shifter);
+  size_t addrOffset = heapWordToOffset(addr);
+  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
+  size_t limitOffset = heapWordToOffset(limit);
+  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
+  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
+  assert(nextAddr >= addr, "get_next_one postcondition");
+  assert(nextAddr == limit || isMarked(nextAddr),
+         "get_next_one postcondition");
+  return nextAddr;
+}
+
+HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
+                                                 HeapWord* limit) const {
+  size_t addrOffset = heapWordToOffset(addr);
+  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
+  size_t limitOffset = heapWordToOffset(limit);
+  size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
+  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
+  assert(nextAddr >= addr, "get_next_one postcondition");
+  assert(nextAddr == limit || !isMarked(nextAddr),
+         "get_next_one postcondition");
+  return nextAddr;
+}
+
+int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
+  assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
+  return (int) (diff >> _shifter);
+}
+
+bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+  HeapWord* left  = MAX2(_bmStartWord, mr.start());
+  HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
+  if (right > left) {
+    // Right-open interval [leftOffset, rightOffset).
+    return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
+  } else {
+    return true;
+  }
+}
+
+void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
+                                             size_t    from_start_index,
+                                             HeapWord* to_start_word,
+                                             size_t    word_num) {
+  _bm.mostly_disjoint_range_union(from_bitmap,
+                                  from_start_index,
+                                  heapWordToOffset(to_start_word),
+                                  word_num);
+}
+
+#ifndef PRODUCT
+bool CMBitMapRO::covers(ReservedSpace rs) const {
+  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
+  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
+         "size inconsistency");
+  return _bmStartWord == (HeapWord*)(rs.base()) &&
+         _bmWordSize  == rs.size()>>LogHeapWordSize;
+}
+#endif
+
+void CMBitMap::clearAll() {
+  _bm.clear();
+  return;
+}
+
+void CMBitMap::markRange(MemRegion mr) {
+  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
+  assert(!mr.is_empty(), "unexpected empty region");
+  assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
+          ((HeapWord *) mr.end())),
+         "markRange memory region end is not card aligned");
+  // convert address range into offset range
+  _bm.at_put_range(heapWordToOffset(mr.start()),
+                   heapWordToOffset(mr.end()), true);
+}
+
+void CMBitMap::clearRange(MemRegion mr) {
+  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
+  assert(!mr.is_empty(), "unexpected empty region");
+  // convert address range into offset range
+  _bm.at_put_range(heapWordToOffset(mr.start()),
+                   heapWordToOffset(mr.end()), false);
+}
+
+MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
+                                            HeapWord* end_addr) {
+  HeapWord* start = getNextMarkedWordAddress(addr);
+  start = MIN2(start, end_addr);
+  HeapWord* end   = getNextUnmarkedWordAddress(start);
+  end = MIN2(end, end_addr);
+  assert(start <= end, "Consistency check");
+  MemRegion mr(start, end);
+  if (!mr.is_empty()) {
+    clearRange(mr);
+  }
+  return mr;
+}
+
+CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
+  _base(NULL), _cm(cm)
+#ifdef ASSERT
+  , _drain_in_progress(false)
+  , _drain_in_progress_yields(false)
+#endif
+{}
+
+void CMMarkStack::allocate(size_t size) {
+  _base = NEW_C_HEAP_ARRAY(oop, size);
+  if (_base == NULL)
+    vm_exit_during_initialization("Failed to allocate "
+                                  "CM region mark stack");
+  _index = 0;
+  // QQQQ cast ...
+  _capacity = (jint) size;
+  _oops_do_bound = -1;
+  NOT_PRODUCT(_max_depth = 0);
+}
+
+CMMarkStack::~CMMarkStack() {
+  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
+}
+
+void CMMarkStack::par_push(oop ptr) {
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index+1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      _base[index] = ptr;
+      // Note that we don't maintain this atomically.  We could, but it
+      // doesn't seem necessary.
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index + n;
+    if (next_index > _capacity) {
+      _overflow = true;
+      return;
+    }
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      for (int i = 0; i < n; i++) {
+        int ind = index + i;
+        assert(ind < _capacity, "By overflow test above.");
+        _base[ind] = ptr_arr[i];
+      }
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+
+void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint start = _index;
+  jint next_index = start + n;
+  if (next_index > _capacity) {
+    _overflow = true;
+    return;
+  }
+  // Otherwise.
+  _index = next_index;
+  for (int i = 0; i < n; i++) {
+    int ind = start + i;
+    guarantee(ind < _capacity, "By overflow test above.");
+    _base[ind] = ptr_arr[i];
+  }
+}
+
+
+bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint index = _index;
+  if (index == 0) {
+    *n = 0;
+    return false;
+  } else {
+    int k = MIN2(max, index);
+    jint new_ind = index - k;
+    for (int j = 0; j < k; j++) {
+      ptr_arr[j] = _base[new_ind + j];
+    }
+    _index = new_ind;
+    *n = k;
+    return true;
+  }
+}
+
+
+CMRegionStack::CMRegionStack() : _base(NULL) {}
+
+void CMRegionStack::allocate(size_t size) {
+  _base = NEW_C_HEAP_ARRAY(MemRegion, size);
+  if (_base == NULL)
+    vm_exit_during_initialization("Failed to allocate "
+                                  "CM region mark stack");
+  _index = 0;
+  // QQQQ cast ...
+  _capacity = (jint) size;
+}
+
+CMRegionStack::~CMRegionStack() {
+  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
+}
+
+void CMRegionStack::push(MemRegion mr) {
+  assert(mr.word_size() > 0, "Precondition");
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index+1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      _base[index] = mr;
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+MemRegion CMRegionStack::pop() {
+  while (true) {
+    // Otherwise...
+    jint index = _index;
+
+    if (index == 0) {
+      return MemRegion();
+    }
+    jint next_index = index-1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      MemRegion mr = _base[next_index];
+      if (mr.start() != NULL) {
+        tmp_guarantee_CM( mr.end() != NULL, "invariant" );
+        tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+        return mr;
+      } else {
+        // that entry was invalidated... let's skip it
+        tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+      }
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+bool CMRegionStack::invalidate_entries_into_cset() {
+  bool result = false;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  for (int i = 0; i < _oops_do_bound; ++i) {
+    MemRegion mr = _base[i];
+    if (mr.start() != NULL) {
+      tmp_guarantee_CM( mr.end() != NULL, "invariant");
+      tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+      HeapRegion* hr = g1h->heap_region_containing(mr.start());
+      tmp_guarantee_CM( hr != NULL, "invariant" );
+      if (hr->in_collection_set()) {
+        // The region points into the collection set
+        _base[i] = MemRegion();
+        result = true;
+      }
+    } else {
+      // that entry was invalidated... let's skip it
+      tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+    }
+  }
+  return result;
+}
+
+template<class OopClosureClass>
+bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
+  assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
+         || SafepointSynchronize::is_at_safepoint(),
+         "Drain recursion must be yield-safe.");
+  bool res = true;
+  debug_only(_drain_in_progress = true);
+  debug_only(_drain_in_progress_yields = yield_after);
+  while (!isEmpty()) {
+    oop newOop = pop();
+    assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
+    assert(newOop->is_oop(), "Expected an oop");
+    assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
+           "only grey objects on this stack");
+    // iterate over the oops in this oop, marking and pushing
+    // the ones in CMS generation.
+    newOop->oop_iterate(cl);
+    if (yield_after && _cm->do_yield_check()) {
+      res = false; break;
+    }
+  }
+  debug_only(_drain_in_progress = false);
+  return res;
+}
+
+void CMMarkStack::oops_do(OopClosure* f) {
+  if (_index == 0) return;
+  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
+         "Bound must be set.");
+  for (int i = 0; i < _oops_do_bound; i++) {
+    f->do_oop(&_base[i]);
+  }
+  _oops_do_bound = -1;
+}
+
+bool ConcurrentMark::not_yet_marked(oop obj) const {
+  return (_g1h->is_obj_ill(obj)
+          || (_g1h->is_in_permanent(obj)
+              && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
+}
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+ConcurrentMark::ConcurrentMark(ReservedSpace rs,
+                               int max_regions) :
+  _markBitMap1(rs, MinObjAlignment - 1),
+  _markBitMap2(rs, MinObjAlignment - 1),
+
+  _parallel_marking_threads(0),
+  _sleep_factor(0.0),
+  _marking_task_overhead(1.0),
+  _cleanup_sleep_factor(0.0),
+  _cleanup_task_overhead(1.0),
+  _region_bm(max_regions, false /* in_resource_area*/),
+  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
+           CardTableModRefBS::card_shift,
+           false /* in_resource_area*/),
+  _prevMarkBitMap(&_markBitMap1),
+  _nextMarkBitMap(&_markBitMap2),
+  _at_least_one_mark_complete(false),
+
+  _markStack(this),
+  _regionStack(),
+  // _finger set in set_non_marking_state
+
+  _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
+  // _active_tasks set in set_non_marking_state
+  // _tasks set inside the constructor
+  _task_queues(new CMTaskQueueSet((int) _max_task_num)),
+  _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
+
+  _has_overflown(false),
+  _concurrent(false),
+
+  // _verbose_level set below
+
+  _init_times(),
+  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
+  _cleanup_times(),
+  _total_counting_time(0.0),
+  _total_rs_scrub_time(0.0),
+
+  _parallel_workers(NULL),
+  _cleanup_co_tracker(G1CLGroup)
+{
+  CMVerboseLevel verbose_level =
+    (CMVerboseLevel) G1MarkingVerboseLevel;
+  if (verbose_level < no_verbose)
+    verbose_level = no_verbose;
+  if (verbose_level > high_verbose)
+    verbose_level = high_verbose;
+  _verbose_level = verbose_level;
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
+                           "heap end = "PTR_FORMAT, _heap_start, _heap_end);
+
+  _markStack.allocate(G1CMStackSize);
+  _regionStack.allocate(G1CMRegionStackSize);
+
+  // Create & start a ConcurrentMark thread.
+  if (G1ConcMark) {
+    _cmThread = new ConcurrentMarkThread(this);
+    assert(cmThread() != NULL, "CM Thread should have been created");
+    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
+  } else {
+    _cmThread = NULL;
+  }
+  _g1h = G1CollectedHeap::heap();
+  assert(CGC_lock != NULL, "Where's the CGC_lock?");
+  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
+  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+  satb_qs.set_buffer_size(G1SATBLogBufferSize);
+
+  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
+  _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
+  for (int i = 0 ; i < size; i++) {
+    _par_cleanup_thread_state[i] = new ParCleanupThreadState;
+  }
+
+  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
+
+  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
+  _active_tasks = _max_task_num;
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    CMTaskQueue* task_queue = new CMTaskQueue();
+    task_queue->initialize();
+    _task_queues->register_queue(i, task_queue);
+
+    _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
+    _accum_task_vtime[i] = 0.0;
+  }
+
+  if (ParallelMarkingThreads > ParallelGCThreads) {
+    vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
+                                  "than ParallelGCThreads.");
+  }
+  if (ParallelGCThreads == 0) {
+    // if we are not running with any parallel GC threads we will not
+    // spawn any marking threads either
+    _parallel_marking_threads =   0;
+    _sleep_factor             = 0.0;
+    _marking_task_overhead    = 1.0;
+  } else {
+    if (ParallelMarkingThreads > 0) {
+      // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
+      // if both are set
+
+      _parallel_marking_threads = ParallelMarkingThreads;
+      _sleep_factor             = 0.0;
+      _marking_task_overhead    = 1.0;
+    } else if (G1MarkingOverheadPerc > 0) {
+      // we will calculate the number of parallel marking threads
+      // based on a target overhead with respect to the soft real-time
+      // goal
+
+      double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
+      double overall_cm_overhead =
+        (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
+      double cpu_ratio = 1.0 / (double) os::processor_count();
+      double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
+      double marking_task_overhead =
+        overall_cm_overhead / marking_thread_num *
+                                                (double) os::processor_count();
+      double sleep_factor =
+                         (1.0 - marking_task_overhead) / marking_task_overhead;
+
+      _parallel_marking_threads = (size_t) marking_thread_num;
+      _sleep_factor             = sleep_factor;
+      _marking_task_overhead    = marking_task_overhead;
+    } else {
+      _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
+      _sleep_factor             = 0.0;
+      _marking_task_overhead    = 1.0;
+    }
+
+    if (parallel_marking_threads() > 1)
+      _cleanup_task_overhead = 1.0;
+    else
+      _cleanup_task_overhead = marking_task_overhead();
+    _cleanup_sleep_factor =
+                     (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
+
+#if 0
+    gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
+    gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
+    gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
+    gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
+    gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
+#endif
+
+    guarantee( parallel_marking_threads() > 0, "peace of mind" );
+    _parallel_workers = new WorkGang("Parallel Marking Threads",
+                                     (int) parallel_marking_threads(), false, true);
+    if (_parallel_workers == NULL)
+      vm_exit_during_initialization("Failed necessary allocation.");
+  }
+
+  // so that the call below can read a sensible value
+  _heap_start = (HeapWord*) rs.base();
+  set_non_marking_state();
+}
+
+void ConcurrentMark::update_g1_committed(bool force) {
+  // If concurrent marking is not in progress, then we do not need to
+  // update _heap_end. This has a subtle and important
+  // side-effect. Imagine that two evacuation pauses happen between
+  // marking completion and remark. The first one can grow the
+  // heap (hence now the finger is below the heap end). Then, the
+  // second one could unnecessarily push regions on the region
+  // stack. This causes the invariant that the region stack is empty
+  // at the beginning of remark to be false. By ensuring that we do
+  // not observe heap expansions after marking is complete, then we do
+  // not have this problem.
+  if (!concurrent_marking_in_progress() && !force)
+    return;
+
+  MemRegion committed = _g1h->g1_committed();
+  tmp_guarantee_CM( committed.start() == _heap_start,
+                    "start shouldn't change" );
+  HeapWord* new_end = committed.end();
+  if (new_end > _heap_end) {
+    // The heap has been expanded.
+
+    _heap_end = new_end;
+  }
+  // Notice that the heap can also shrink. However, this only happens
+  // during a Full GC (at least currently) and the entire marking
+  // phase will bail out and the task will not be restarted. So, let's
+  // do nothing.
+}
+
+void ConcurrentMark::reset() {
+  // Starting values for these two. This should be called in a STW
+  // phase. CM will be notified of any future g1_committed expansions
+  // will be at the end of evacuation pauses, when tasks are
+  // inactive.
+  MemRegion committed = _g1h->g1_committed();
+  _heap_start = committed.start();
+  _heap_end   = committed.end();
+
+  guarantee( _heap_start != NULL &&
+             _heap_end != NULL   &&
+             _heap_start < _heap_end, "heap bounds should look ok" );
+
+  // reset all the marking data structures and any necessary flags
+  clear_marking_state();
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] resetting");
+
+  // We do reset all of them, since different phases will use
+  // different number of active threads. So, it's easiest to have all
+  // of them ready.
+  for (int i = 0; i < (int) _max_task_num; ++i)
+    _tasks[i]->reset(_nextMarkBitMap);
+
+  // we need this to make sure that the flag is on during the evac
+  // pause with initial mark piggy-backed
+  set_concurrent_marking_in_progress();
+}
+
+void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
+  guarantee( active_tasks <= _max_task_num, "we should not have more" );
+
+  _active_tasks = active_tasks;
+  // Need to update the three data structures below according to the
+  // number of active threads for this phase.
+  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
+  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
+  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
+
+  _concurrent = concurrent;
+  // We propagate this to all tasks, not just the active ones.
+  for (int i = 0; i < (int) _max_task_num; ++i)
+    _tasks[i]->set_concurrent(concurrent);
+
+  if (concurrent) {
+    set_concurrent_marking_in_progress();
+  } else {
+    // We currently assume that the concurrent flag has been set to
+    // false before we start remark. At this point we should also be
+    // in a STW phase.
+    guarantee( !concurrent_marking_in_progress(), "invariant" );
+    guarantee( _finger == _heap_end, "only way to get here" );
+    update_g1_committed(true);
+  }
+}
+
+void ConcurrentMark::set_non_marking_state() {
+  // We set the global marking state to some default values when we're
+  // not doing marking.
+  clear_marking_state();
+  _active_tasks = 0;
+  clear_concurrent_marking_in_progress();
+}
+
+ConcurrentMark::~ConcurrentMark() {
+  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
+  for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
+  FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
+                    _par_cleanup_thread_state);
+
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    delete _task_queues->queue(i);
+    delete _tasks[i];
+  }
+  delete _task_queues;
+  FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
+}
+
+// This closure is used to mark refs into the g1 generation
+// from external roots in the CMS bit map.
+// Called at the first checkpoint.
+//
+
+#define PRINT_REACHABLE_AT_INITIAL_MARK 0
+#if PRINT_REACHABLE_AT_INITIAL_MARK
+static FILE* reachable_file = NULL;
+
+class PrintReachableClosure: public OopsInGenClosure {
+  CMBitMap* _bm;
+  int _level;
+public:
+  PrintReachableClosure(CMBitMap* bm) :
+    _bm(bm), _level(0) {
+    guarantee(reachable_file != NULL, "pre-condition");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    HeapWord* obj_addr = (HeapWord*)obj;
+    if (obj == NULL) return;
+    fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
+            _level, p, (void*) obj, _bm->isMarked(obj_addr));
+    if (!_bm->isMarked(obj_addr)) {
+      _bm->mark(obj_addr);
+      _level++;
+      obj->oop_iterate(this);
+      _level--;
+    }
+  }
+};
+#endif // PRINT_REACHABLE_AT_INITIAL_MARK
+
+#define SEND_HEAP_DUMP_TO_FILE 0
+#if SEND_HEAP_DUMP_TO_FILE
+static FILE* heap_dump_file = NULL;
+#endif // SEND_HEAP_DUMP_TO_FILE
+
+void ConcurrentMark::clearNextBitmap() {
+   guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
+
+   // clear the mark bitmap (no grey objects to start with).
+   // We need to do this in chunks and offer to yield in between
+   // each chunk.
+   HeapWord* start  = _nextMarkBitMap->startWord();
+   HeapWord* end    = _nextMarkBitMap->endWord();
+   HeapWord* cur    = start;
+   size_t chunkSize = M;
+   while (cur < end) {
+     HeapWord* next = cur + chunkSize;
+     if (next > end)
+       next = end;
+     MemRegion mr(cur,next);
+     _nextMarkBitMap->clearRange(mr);
+     cur = next;
+     do_yield_check();
+   }
+}
+
+class NoteStartOfMarkHRClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      r->note_start_of_marking(true);
+    }
+    return false;
+  }
+};
+
+void ConcurrentMark::checkpointRootsInitialPre() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+
+  _has_aborted = false;
+
+  // Find all the reachable objects...
+#if PRINT_REACHABLE_AT_INITIAL_MARK
+  guarantee(reachable_file == NULL, "Protocol");
+  char fn_buf[100];
+  sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
+  reachable_file = fopen(fn_buf, "w");
+  // clear the mark bitmap (no grey objects to start with)
+  _nextMarkBitMap->clearAll();
+  PrintReachableClosure prcl(_nextMarkBitMap);
+  g1h->process_strong_roots(
+                            false,   // fake perm gen collection
+                            SharedHeap::SO_AllClasses,
+                            &prcl, // Regular roots
+                            &prcl    // Perm Gen Roots
+                            );
+  // The root iteration above "consumed" dirty cards in the perm gen.
+  // Therefore, as a shortcut, we dirty all such cards.
+  g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
+  fclose(reachable_file);
+  reachable_file = NULL;
+  // clear the mark bitmap again.
+  _nextMarkBitMap->clearAll();
+  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+  COMPILER2_PRESENT(DerivedPointerTable::clear());
+#endif // PRINT_REACHABLE_AT_INITIAL_MARK
+
+  // Initialise marking structures. This has to be done in a STW phase.
+  reset();
+}
+
+class CMMarkRootsClosure: public OopsInGenClosure {
+private:
+  ConcurrentMark*  _cm;
+  G1CollectedHeap* _g1h;
+  bool             _do_barrier;
+
+public:
+  CMMarkRootsClosure(ConcurrentMark* cm,
+                     G1CollectedHeap* g1h,
+                     bool do_barrier) : _cm(cm), _g1h(g1h),
+                                        _do_barrier(do_barrier) { }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop* p) {
+    oop thisOop = *p;
+    if (thisOop != NULL) {
+      assert(thisOop->is_oop() || thisOop->mark() == NULL,
+             "expected an oop, possibly with mark word displaced");
+      HeapWord* addr = (HeapWord*)thisOop;
+      if (_g1h->is_in_g1_reserved(addr)) {
+        _cm->grayRoot(thisOop);
+      }
+    }
+    if (_do_barrier) {
+      assert(!_g1h->is_in_g1_reserved(p),
+             "Should be called on external roots");
+      do_barrier(p);
+    }
+  }
+};
+
+void ConcurrentMark::checkpointRootsInitialPost() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
+
+  // Start weak-reference discovery.
+  ReferenceProcessor* rp = g1h->ref_processor();
+  rp->verify_no_references_recorded();
+  rp->enable_discovery(); // enable ("weak") refs discovery
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
+  satb_mq_set.set_active_all_threads(true);
+
+  // update_g1_committed() will be called at the end of an evac pause
+  // when marking is on. So, it's also called at the end of the
+  // initial-mark pause to update the heap end, if the heap expands
+  // during it. No need to call it here.
+
+  guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
+
+  size_t max_marking_threads =
+    MAX2((size_t) 1, parallel_marking_threads());
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    _tasks[i]->enable_co_tracker();
+    if (i < (int) max_marking_threads)
+      _tasks[i]->reset_co_tracker(marking_task_overhead());
+    else
+      _tasks[i]->reset_co_tracker(0.0);
+  }
+}
+
+// Checkpoint the roots into this generation from outside
+// this generation. [Note this initial checkpoint need only
+// be approximate -- we'll do a catch up phase subsequently.]
+void ConcurrentMark::checkpointRootsInitial() {
+  assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  // If there has not been a GC[n-1] since last GC[n] cycle completed,
+  // precede our marking with a collection of all
+  // younger generations to keep floating garbage to a minimum.
+  // YSR: we won't do this for now -- it's an optimization to be
+  // done post-beta.
+
+  // YSR:    ignoring weak refs for now; will do at bug fixing stage
+  // EVM:    assert(discoveredRefsAreClear());
+
+
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->record_concurrent_mark_init_start();
+  checkpointRootsInitialPre();
+
+  // YSR: when concurrent precleaning is in place, we'll
+  // need to clear the cached card table here
+
+  ResourceMark rm;
+  HandleMark  hm;
+
+  g1h->ensure_parsability(false);
+  g1h->perm_gen()->save_marks();
+
+  CMMarkRootsClosure notOlder(this, g1h, false);
+  CMMarkRootsClosure older(this, g1h, true);
+
+  g1h->set_marking_started();
+  g1h->rem_set()->prepare_for_younger_refs_iterate(false);
+
+  g1h->process_strong_roots(false,   // fake perm gen collection
+                            SharedHeap::SO_AllClasses,
+                            &notOlder, // Regular roots
+                            &older    // Perm Gen Roots
+                            );
+  checkpointRootsInitialPost();
+
+  // Statistics.
+  double end = os::elapsedTime();
+  _init_times.add((end - start) * 1000.0);
+  GCOverheadReporter::recordSTWEnd(end);
+
+  g1p->record_concurrent_mark_init_end();
+}
+
+/*
+   Notice that in the next two methods, we actually leave the STS
+   during the barrier sync and join it immediately afterwards. If we
+   do not do this, this then the following deadlock can occur: one
+   thread could be in the barrier sync code, waiting for the other
+   thread to also sync up, whereas another one could be trying to
+   yield, while also waiting for the other threads to sync up too.
+
+   Because the thread that does the sync barrier has left the STS, it
+   is possible to be suspended for a Full GC or an evacuation pause
+   could occur. This is actually safe, since the entering the sync
+   barrier is one of the last things do_marking_step() does, and it
+   doesn't manipulate any data structures afterwards.
+*/
+
+void ConcurrentMark::enter_first_sync_barrier(int task_num) {
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
+
+  ConcurrentGCThread::stsLeave();
+  _first_overflow_barrier_sync.enter();
+  ConcurrentGCThread::stsJoin();
+  // at this point everyone should have synced up and not be doing any
+  // more work
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
+
+  // let task 0 do this
+  if (task_num == 0) {
+    // task 0 is responsible for clearing the global data structures
+    clear_marking_state();
+
+    if (PrintGC) {
+      gclog_or_tty->date_stamp(PrintGCDateStamps);
+      gclog_or_tty->stamp(PrintGCTimeStamps);
+      gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
+    }
+  }
+
+  // after this, each task should reset its own data structures then
+  // then go into the second barrier
+}
+
+void ConcurrentMark::enter_second_sync_barrier(int task_num) {
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
+
+  ConcurrentGCThread::stsLeave();
+  _second_overflow_barrier_sync.enter();
+  ConcurrentGCThread::stsJoin();
+  // at this point everything should be re-initialised and ready to go
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
+}
+
+void ConcurrentMark::grayRoot(oop p) {
+  HeapWord* addr = (HeapWord*) p;
+  // We can't really check against _heap_start and _heap_end, since it
+  // is possible during an evacuation pause with piggy-backed
+  // initial-mark that the committed space is expanded during the
+  // pause without CM observing this change. So the assertions below
+  // is a bit conservative; but better than nothing.
+  tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
+                    "address should be within the heap bounds" );
+
+  if (!_nextMarkBitMap->isMarked(addr))
+    _nextMarkBitMap->parMark(addr);
+}
+
+void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  // The objects on the region have already been marked "in bulk" by
+  // the caller. We only need to decide whether to push the region on
+  // the region stack or not.
+
+  if (!concurrent_marking_in_progress() || !_should_gray_objects)
+    // We're done with marking and waiting for remark. We do not need to
+    // push anything else on the region stack.
+    return;
+
+  HeapWord* finger = _finger;
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] attempting to push "
+                           "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
+                           PTR_FORMAT, mr.start(), mr.end(), finger);
+
+  if (mr.start() < finger) {
+    // The finger is always heap region aligned and it is not possible
+    // for mr to span heap regions.
+    tmp_guarantee_CM( mr.end() <= finger, "invariant" );
+
+    tmp_guarantee_CM( mr.start() <= mr.end() &&
+                      _heap_start <= mr.start() &&
+                      mr.end() <= _heap_end,
+                  "region boundaries should fall within the committed space" );
+    if (verbose_low())
+      gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
+                             "below the finger, pushing it",
+                             mr.start(), mr.end());
+
+    if (!region_stack_push(mr)) {
+      if (verbose_low())
+        gclog_or_tty->print_cr("[global] region stack has overflown.");
+    }
+  }
+}
+
+void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  // The object is not marked by the caller. We need to at least mark
+  // it and maybe push in on the stack.
+
+  HeapWord* addr = (HeapWord*)p;
+  if (!_nextMarkBitMap->isMarked(addr)) {
+    // We definitely need to mark it, irrespective whether we bail out
+    // because we're done with marking.
+    if (_nextMarkBitMap->parMark(addr)) {
+      if (!concurrent_marking_in_progress() || !_should_gray_objects)
+        // If we're done with concurrent marking and we're waiting for
+        // remark, then we're not pushing anything on the stack.
+        return;
+
+      // No OrderAccess:store_load() is needed. It is implicit in the
+      // CAS done in parMark(addr) above
+      HeapWord* finger = _finger;
+
+      if (addr < finger) {
+        if (!mark_stack_push(oop(addr))) {
+          if (verbose_low())
+            gclog_or_tty->print_cr("[global] global stack overflow "
+                                   "during parMark");
+        }
+      }
+    }
+  }
+}
+
+class CMConcurrentMarkingTask: public AbstractGangTask {
+private:
+  ConcurrentMark*       _cm;
+  ConcurrentMarkThread* _cmt;
+
+public:
+  void work(int worker_i) {
+    guarantee( Thread::current()->is_ConcurrentGC_thread(),
+               "this should only be done by a conc GC thread" );
+
+    double start_vtime = os::elapsedVTime();
+
+    ConcurrentGCThread::stsJoin();
+
+    guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
+    CMTask* the_task = _cm->task(worker_i);
+    the_task->start_co_tracker();
+    the_task->record_start_time();
+    if (!_cm->has_aborted()) {
+      do {
+        double start_vtime_sec = os::elapsedVTime();
+        double start_time_sec = os::elapsedTime();
+        the_task->do_marking_step(10.0);
+        double end_time_sec = os::elapsedTime();
+        double end_vtime_sec = os::elapsedVTime();
+        double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        double elapsed_time_sec = end_time_sec - start_time_sec;
+        _cm->clear_has_overflown();
+
+        bool ret = _cm->do_yield_check(worker_i);
+
+        jlong sleep_time_ms;
+        if (!_cm->has_aborted() && the_task->has_aborted()) {
+          sleep_time_ms =
+            (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
+          ConcurrentGCThread::stsLeave();
+          os::sleep(Thread::current(), sleep_time_ms, false);
+          ConcurrentGCThread::stsJoin();
+        }
+        double end_time2_sec = os::elapsedTime();
+        double elapsed_time2_sec = end_time2_sec - start_time_sec;
+
+        the_task->update_co_tracker();
+
+#if 0
+          gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
+                                 "overhead %1.4lf",
+                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
+                                 the_task->conc_overhead(os::elapsedTime()) * 8.0);
+          gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
+                                 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
+#endif
+      } while (!_cm->has_aborted() && the_task->has_aborted());
+    }
+    the_task->record_end_time();
+    guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
+
+    ConcurrentGCThread::stsLeave();
+
+    double end_vtime = os::elapsedVTime();
+    the_task->update_co_tracker(true);
+    _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
+  }
+
+  CMConcurrentMarkingTask(ConcurrentMark* cm,
+                          ConcurrentMarkThread* cmt) :
+      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
+
+  ~CMConcurrentMarkingTask() { }
+};
+
+void ConcurrentMark::markFromRoots() {
+  // we might be tempted to assert that:
+  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
+  //        "inconsistent argument?");
+  // However that wouldn't be right, because it's possible that
+  // a safepoint is indeed in progress as a younger generation
+  // stop-the-world GC happens even as we mark in this generation.
+
+  _restart_for_overflow = false;
+
+  set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
+
+  CMConcurrentMarkingTask markingTask(this, cmThread());
+  if (parallel_marking_threads() > 0)
+    _parallel_workers->run_task(&markingTask);
+  else
+    markingTask.work(0);
+  print_stats();
+}
+
+void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->set_marking_complete(); // So bitmap clearing isn't confused
+    return;
+  }
+
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  g1p->record_concurrent_mark_remark_start();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  checkpointRootsFinalWork();
+
+  double mark_work_end = os::elapsedTime();
+
+  weakRefsWork(clear_all_soft_refs);
+
+  if (has_overflown()) {
+    // Oops.  We overflowed.  Restart concurrent marking.
+    _restart_for_overflow = true;
+    // Clear the flag. We do not need it any more.
+    clear_has_overflown();
+    if (G1TraceMarkStackOverflow)
+      gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
+  } else {
+    // We're done with marking.
+    JavaThread::satb_mark_queue_set().set_active_all_threads(false);
+  }
+
+#if VERIFY_OBJS_PROCESSED
+  _scan_obj_cl.objs_processed = 0;
+  ThreadLocalObjQueue::objs_enqueued = 0;
+#endif
+
+  // Statistics
+  double now = os::elapsedTime();
+  _remark_mark_times.add((mark_work_end - start) * 1000.0);
+  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
+  _remark_times.add((now - start) * 1000.0);
+
+  GCOverheadReporter::recordSTWEnd(now);
+  for (int i = 0; i < (int)_max_task_num; ++i)
+    _tasks[i]->disable_co_tracker();
+  _cleanup_co_tracker.enable();
+  _cleanup_co_tracker.reset(cleanup_task_overhead());
+  g1p->record_concurrent_mark_remark_end();
+}
+
+
+#define CARD_BM_TEST_MODE 0
+
+class CalcLiveObjectsClosure: public HeapRegionClosure {
+
+  CMBitMapRO* _bm;
+  ConcurrentMark* _cm;
+  COTracker* _co_tracker;
+  bool _changed;
+  bool _yield;
+  size_t _words_done;
+  size_t _tot_live;
+  size_t _tot_used;
+  size_t _regions_done;
+  double _start_vtime_sec;
+
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+  intptr_t _bottom_card_num;
+  bool _final;
+
+  void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
+    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
+#if CARD_BM_TEST_MODE
+      guarantee(_card_bm->at(i - _bottom_card_num),
+                "Should already be set.");
+#else
+      _card_bm->par_at_put(i - _bottom_card_num, 1);
+#endif
+    }
+  }
+
+public:
+  CalcLiveObjectsClosure(bool final,
+                         CMBitMapRO *bm, ConcurrentMark *cm,
+                         BitMap* region_bm, BitMap* card_bm,
+                         COTracker* co_tracker) :
+    _bm(bm), _cm(cm), _changed(false), _yield(true),
+    _words_done(0), _tot_live(0), _tot_used(0),
+    _region_bm(region_bm), _card_bm(card_bm),
+    _final(final), _co_tracker(co_tracker),
+    _regions_done(0), _start_vtime_sec(0.0)
+  {
+    _bottom_card_num =
+      intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
+               CardTableModRefBS::card_shift);
+  }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (_co_tracker != NULL)
+      _co_tracker->update();
+
+    if (!_final && _regions_done == 0)
+      _start_vtime_sec = os::elapsedVTime();
+
+    if (hr->continuesHumongous()) return false;
+
+    HeapWord* nextTop = hr->next_top_at_mark_start();
+    HeapWord* start   = hr->top_at_conc_mark_count();
+    assert(hr->bottom() <= start && start <= hr->end() &&
+           hr->bottom() <= nextTop && nextTop <= hr->end() &&
+           start <= nextTop,
+           "Preconditions.");
+    // Otherwise, record the number of word's we'll examine.
+    size_t words_done = (nextTop - start);
+    // Find the first marked object at or after "start".
+    start = _bm->getNextMarkedWordAddress(start, nextTop);
+    size_t marked_bytes = 0;
+
+    // Below, the term "card num" means the result of shifting an address
+    // by the card shift -- address 0 corresponds to card number 0.  One
+    // must subtract the card num of the bottom of the heap to obtain a
+    // card table index.
+    // The first card num of the sequence of live cards currently being
+    // constructed.  -1 ==> no sequence.
+    intptr_t start_card_num = -1;
+    // The last card num of the sequence of live cards currently being
+    // constructed.  -1 ==> no sequence.
+    intptr_t last_card_num = -1;
+
+    while (start < nextTop) {
+      if (_yield && _cm->do_yield_check()) {
+        // We yielded.  It might be for a full collection, in which case
+        // all bets are off; terminate the traversal.
+        if (_cm->has_aborted()) {
+          _changed = false;
+          return true;
+        } else {
+          // Otherwise, it might be a collection pause, and the region
+          // we're looking at might be in the collection set.  We'll
+          // abandon this region.
+          return false;
+        }
+      }
+      oop obj = oop(start);
+      int obj_sz = obj->size();
+      // The card num of the start of the current object.
+      intptr_t obj_card_num =
+        intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
+
+      HeapWord* obj_last = start + obj_sz - 1;
+      intptr_t obj_last_card_num =
+        intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
+
+      if (obj_card_num != last_card_num) {
+        if (start_card_num == -1) {
+          assert(last_card_num == -1, "Both or neither.");
+          start_card_num = obj_card_num;
+        } else {
+          assert(last_card_num != -1, "Both or neither.");
+          assert(obj_card_num >= last_card_num, "Inv");
+          if ((obj_card_num - last_card_num) > 1) {
+            // Mark the last run, and start a new one.
+            mark_card_num_range(start_card_num, last_card_num);
+            start_card_num = obj_card_num;
+          }
+        }
+#if CARD_BM_TEST_MODE
+        /*
+        gclog_or_tty->print_cr("Setting bits from %d/%d.",
+                               obj_card_num - _bottom_card_num,
+                               obj_last_card_num - _bottom_card_num);
+        */
+        for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
+          _card_bm->par_at_put(j - _bottom_card_num, 1);
+        }
+#endif
+      }
+      // In any case, we set the last card num.
+      last_card_num = obj_last_card_num;
+
+      marked_bytes += obj_sz * HeapWordSize;
+      // Find the next marked object after this one.
+      start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
+      _changed = true;
+    }
+    // Handle the last range, if any.
+    if (start_card_num != -1)
+      mark_card_num_range(start_card_num, last_card_num);
+    if (_final) {
+      // Mark the allocated-since-marking portion...
+      HeapWord* tp = hr->top();
+      if (nextTop < tp) {
+        start_card_num =
+          intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
+        last_card_num =
+          intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
+        mark_card_num_range(start_card_num, last_card_num);
+        // This definitely means the region has live objects.
+        _region_bm->par_at_put(hr->hrs_index(), 1);
+      }
+    }
+
+    hr->add_to_marked_bytes(marked_bytes);
+    // Update the live region bitmap.
+    if (marked_bytes > 0) {
+      _region_bm->par_at_put(hr->hrs_index(), 1);
+    }
+    hr->set_top_at_conc_mark_count(nextTop);
+    _tot_live += hr->next_live_bytes();
+    _tot_used += hr->used();
+    _words_done = words_done;
+
+    if (!_final) {
+      ++_regions_done;
+      if (_regions_done % 10 == 0) {
+        double end_vtime_sec = os::elapsedVTime();
+        double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
+        if (elapsed_vtime_sec > (10.0 / 1000.0)) {
+          jlong sleep_time_ms =
+            (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
+#if 0
+          gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
+                                 "overhead %1.4lf",
+                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
+                                 _co_tracker->concOverhead(os::elapsedTime()));
+#endif
+          os::sleep(Thread::current(), sleep_time_ms, false);
+          _start_vtime_sec = end_vtime_sec;
+        }
+      }
+    }
+
+    return false;
+  }
+
+  bool changed() { return _changed;  }
+  void reset()   { _changed = false; _words_done = 0; }
+  void no_yield() { _yield = false; }
+  size_t words_done() { return _words_done; }
+  size_t tot_live() { return _tot_live; }
+  size_t tot_used() { return _tot_used; }
+};
+
+
+void ConcurrentMark::calcDesiredRegions() {
+  guarantee( _cleanup_co_tracker.enabled(), "invariant" );
+  _cleanup_co_tracker.start();
+
+  _region_bm.clear();
+  _card_bm.clear();
+  CalcLiveObjectsClosure calccl(false /*final*/,
+                                nextMarkBitMap(), this,
+                                &_region_bm, &_card_bm,
+                                &_cleanup_co_tracker);
+  G1CollectedHeap *g1h = G1CollectedHeap::heap();
+  g1h->heap_region_iterate(&calccl);
+
+  do {
+    calccl.reset();
+    g1h->heap_region_iterate(&calccl);
+  } while (calccl.changed());
+
+  _cleanup_co_tracker.update(true);
+}
+
+class G1ParFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  CMBitMap* _bm;
+  size_t _n_workers;
+  size_t *_live_bytes;
+  size_t *_used_bytes;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+public:
+  G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
+                      BitMap* region_bm, BitMap* card_bm) :
+    AbstractGangTask("G1 final counting"), _g1h(g1h),
+    _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
+  {
+    if (ParallelGCThreads > 0)
+      _n_workers = _g1h->workers()->total_workers();
+    else
+      _n_workers = 1;
+    _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
+    _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
+  }
+
+  ~G1ParFinalCountTask() {
+    FREE_C_HEAP_ARRAY(size_t, _live_bytes);
+    FREE_C_HEAP_ARRAY(size_t, _used_bytes);
+  }
+
+  void work(int i) {
+    CalcLiveObjectsClosure calccl(true /*final*/,
+                                  _bm, _g1h->concurrent_mark(),
+                                  _region_bm, _card_bm,
+                                  NULL /* CO tracker */);
+    calccl.no_yield();
+    if (ParallelGCThreads > 0) {
+      _g1h->heap_region_par_iterate_chunked(&calccl, i,
+                                            HeapRegion::FinalCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&calccl);
+    }
+    assert(calccl.complete(), "Shouldn't have yielded!");
+
+    guarantee( (size_t)i < _n_workers, "invariant" );
+    _live_bytes[i] = calccl.tot_live();
+    _used_bytes[i] = calccl.tot_used();
+  }
+  size_t live_bytes()  {
+    size_t live_bytes = 0;
+    for (size_t i = 0; i < _n_workers; ++i)
+      live_bytes += _live_bytes[i];
+    return live_bytes;
+  }
+  size_t used_bytes()  {
+    size_t used_bytes = 0;
+    for (size_t i = 0; i < _n_workers; ++i)
+      used_bytes += _used_bytes[i];
+    return used_bytes;
+  }
+};
+
+class G1ParNoteEndTask;
+
+class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _worker_num;
+  size_t _max_live_bytes;
+  size_t _regions_claimed;
+  size_t _freed_bytes;
+  size_t _cleared_h_regions;
+  size_t _freed_regions;
+  UncleanRegionList* _unclean_region_list;
+  double _claimed_region_time;
+  double _max_region_time;
+
+public:
+  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
+                             UncleanRegionList* list,
+                             int worker_num);
+  size_t freed_bytes() { return _freed_bytes; }
+  size_t cleared_h_regions() { return _cleared_h_regions; }
+  size_t freed_regions() { return  _freed_regions; }
+  UncleanRegionList* unclean_region_list() {
+    return _unclean_region_list;
+  }
+
+  bool doHeapRegion(HeapRegion *r);
+
+  size_t max_live_bytes() { return _max_live_bytes; }
+  size_t regions_claimed() { return _regions_claimed; }
+  double claimed_region_time_sec() { return _claimed_region_time; }
+  double max_region_time_sec() { return _max_region_time; }
+};
+
+class G1ParNoteEndTask: public AbstractGangTask {
+  friend class G1NoteEndOfConcMarkClosure;
+protected:
+  G1CollectedHeap* _g1h;
+  size_t _max_live_bytes;
+  size_t _freed_bytes;
+  ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
+public:
+  G1ParNoteEndTask(G1CollectedHeap* g1h,
+                   ConcurrentMark::ParCleanupThreadState**
+                   par_cleanup_thread_state) :
+    AbstractGangTask("G1 note end"), _g1h(g1h),
+    _max_live_bytes(0), _freed_bytes(0),
+    _par_cleanup_thread_state(par_cleanup_thread_state)
+  {}
+
+  void work(int i) {
+    double start = os::elapsedTime();
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
+                                           &_par_cleanup_thread_state[i]->list,
+                                           i);
+    if (ParallelGCThreads > 0) {
+      _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
+                                            HeapRegion::NoteEndClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&g1_note_end);
+    }
+    assert(g1_note_end.complete(), "Shouldn't have yielded!");
+
+    // Now finish up freeing the current thread's regions.
+    _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
+                                  g1_note_end.cleared_h_regions(),
+                                  0, NULL);
+    {
+      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      _max_live_bytes += g1_note_end.max_live_bytes();
+      _freed_bytes += g1_note_end.freed_bytes();
+    }
+    double end = os::elapsedTime();
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
+                          "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
+                          i, start, end, (end-start)*1000.0,
+                          g1_note_end.regions_claimed(),
+                          g1_note_end.claimed_region_time_sec()*1000.0,
+                          g1_note_end.max_region_time_sec()*1000.0);
+    }
+  }
+  size_t max_live_bytes() { return _max_live_bytes; }
+  size_t freed_bytes() { return _freed_bytes; }
+};
+
+class G1ParScrubRemSetTask: public AbstractGangTask {
+protected:
+  G1RemSet* _g1rs;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+public:
+  G1ParScrubRemSetTask(G1CollectedHeap* g1h,
+                       BitMap* region_bm, BitMap* card_bm) :
+    AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
+    _region_bm(region_bm), _card_bm(card_bm)
+  {}
+
+  void work(int i) {
+    if (ParallelGCThreads > 0) {
+      _g1rs->scrub_par(_region_bm, _card_bm, i,
+                       HeapRegion::ScrubRemSetClaimValue);
+    } else {
+      _g1rs->scrub(_region_bm, _card_bm);
+    }
+  }
+
+};
+
+G1NoteEndOfConcMarkClosure::
+G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
+                           UncleanRegionList* list,
+                           int worker_num)
+  : _g1(g1), _worker_num(worker_num),
+    _max_live_bytes(0), _regions_claimed(0),
+    _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
+    _claimed_region_time(0.0), _max_region_time(0.0),
+    _unclean_region_list(list)
+{}
+
+bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
+  // We use a claim value of zero here because all regions
+  // were claimed with value 1 in the FinalCount task.
+  r->reset_gc_time_stamp();
+  if (!r->continuesHumongous()) {
+    double start = os::elapsedTime();
+    _regions_claimed++;
+    r->note_end_of_marking();
+    _max_live_bytes += r->max_live_bytes();
+    _g1->free_region_if_totally_empty_work(r,
+                                           _freed_bytes,
+                                           _cleared_h_regions,
+                                           _freed_regions,
+                                           _unclean_region_list,
+                                           true /*par*/);
+    double region_time = (os::elapsedTime() - start);
+    _claimed_region_time += region_time;
+    if (region_time > _max_region_time) _max_region_time = region_time;
+  }
+  return false;
+}
+
+void ConcurrentMark::cleanup() {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->set_marking_complete(); // So bitmap clearing isn't confused
+    return;
+  }
+
+  _cleanup_co_tracker.disable();
+
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->record_concurrent_mark_cleanup_start();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  // Do counting once more with the world stopped for good measure.
+  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
+                                        &_region_bm, &_card_bm);
+  if (ParallelGCThreads > 0) {
+    assert(g1h->check_heap_region_claim_values(
+                                               HeapRegion::InitialClaimValue),
+           "sanity check");
+
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&g1_par_count_task);
+    g1h->set_par_threads(0);
+
+    assert(g1h->check_heap_region_claim_values(
+                                             HeapRegion::FinalCountClaimValue),
+           "sanity check");
+  } else {
+    g1_par_count_task.work(0);
+  }
+
+  size_t known_garbage_bytes =
+    g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
+#if 0
+  gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
+                         (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
+                         (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
+                         (double) known_garbage_bytes / (double) (1024 * 1024));
+#endif // 0
+  g1p->set_known_garbage_bytes(known_garbage_bytes);
+
+  size_t start_used_bytes = g1h->used();
+  _at_least_one_mark_complete = true;
+  g1h->set_marking_complete();
+
+  double count_end = os::elapsedTime();
+  double this_final_counting_time = (count_end - start);
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("Cleanup:");
+    gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
+                           this_final_counting_time*1000.0);
+  }
+  _total_counting_time += this_final_counting_time;
+
+  // Install newly created mark bitMap as "prev".
+  swapMarkBitMaps();
+
+  g1h->reset_gc_time_stamp();
+
+  // Note end of marking in all heap regions.
+  double note_end_start = os::elapsedTime();
+  G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
+  if (ParallelGCThreads > 0) {
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&g1_par_note_end_task);
+    g1h->set_par_threads(0);
+
+    assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
+           "sanity check");
+  } else {
+    g1_par_note_end_task.work(0);
+  }
+  g1h->set_unclean_regions_coming(true);
+  double note_end_end = os::elapsedTime();
+  // Tell the mutators that there might be unclean regions coming...
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
+                           (note_end_end - note_end_start)*1000.0);
+  }
+
+
+  // call below, since it affects the metric by which we sort the heap
+  // regions.
+  if (G1ScrubRemSets) {
+    double rs_scrub_start = os::elapsedTime();
+    G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
+    if (ParallelGCThreads > 0) {
+      int n_workers = g1h->workers()->total_workers();
+      g1h->set_par_threads(n_workers);
+      g1h->workers()->run_task(&g1_par_scrub_rs_task);
+      g1h->set_par_threads(0);
+
+      assert(g1h->check_heap_region_claim_values(
+                                            HeapRegion::ScrubRemSetClaimValue),
+             "sanity check");
+    } else {
+      g1_par_scrub_rs_task.work(0);
+    }
+
+    double rs_scrub_end = os::elapsedTime();
+    double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
+    _total_rs_scrub_time += this_rs_scrub_time;
+  }
+
+  // this will also free any regions totally full of garbage objects,
+  // and sort the regions.
+  g1h->g1_policy()->record_concurrent_mark_cleanup_end(
+                        g1_par_note_end_task.freed_bytes(),
+                        g1_par_note_end_task.max_live_bytes());
+
+  // Statistics.
+  double end = os::elapsedTime();
+  _cleanup_times.add((end - start) * 1000.0);
+  GCOverheadReporter::recordSTWEnd(end);
+
+  // G1CollectedHeap::heap()->print();
+  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
+  // G1CollectedHeap::heap()->get_gc_time_stamp());
+
+  if (PrintGC || PrintGCDetails) {
+    g1h->print_size_transition(gclog_or_tty,
+                               start_used_bytes,
+                               g1h->used(),
+                               g1h->capacity());
+  }
+
+  size_t cleaned_up_bytes = start_used_bytes - g1h->used();
+  g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
+
+  // We need to make this be a "collection" so any collection pause that
+  // races with it goes around and waits for completeCleanup to finish.
+  g1h->increment_total_collections();
+
+#ifndef PRODUCT
+  if (G1VerifyConcMark) {
+    G1CollectedHeap::heap()->prepare_for_verify();
+    G1CollectedHeap::heap()->verify(true,false);
+  }
+#endif
+}
+
+void ConcurrentMark::completeCleanup() {
+  // A full collection intervened.
+  if (has_aborted()) return;
+
+  int first = 0;
+  int last = (int)MAX2(ParallelGCThreads, (size_t)1);
+  for (int t = 0; t < last; t++) {
+    UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
+    assert(list->well_formed(), "Inv");
+    HeapRegion* hd = list->hd();
+    while (hd != NULL) {
+      // Now finish up the other stuff.
+      hd->rem_set()->clear();
+      HeapRegion* next_hd = hd->next_from_unclean_list();
+      (void)list->pop();
+      guarantee(list->hd() == next_hd, "how not?");
+      _g1h->put_region_on_unclean_list(hd);
+      if (!hd->isHumongous()) {
+        // Add this to the _free_regions count by 1.
+        _g1h->finish_free_region_work(0, 0, 1, NULL);
+      }
+      hd = list->hd();
+      guarantee(hd == next_hd, "how not?");
+    }
+  }
+}
+
+
+class G1CMIsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+ public:
+  G1CMIsAliveClosure(G1CollectedHeap* g1) :
+    _g1(g1)
+  {}
+
+  void do_object(oop obj) {
+    assert(false, "not to be invoked");
+  }
+  bool do_object_b(oop obj) {
+    HeapWord* addr = (HeapWord*)obj;
+    return addr != NULL &&
+           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
+  }
+};
+
+class G1CMKeepAliveClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  ConcurrentMark*  _cm;
+  CMBitMap*        _bitMap;
+ public:
+  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
+                       CMBitMap* bitMap) :
+    _g1(g1), _cm(cm),
+    _bitMap(bitMap) {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop thisOop = *p;
+    HeapWord* addr = (HeapWord*)thisOop;
+    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
+      _bitMap->mark(addr);
+      _cm->mark_stack_push(thisOop);
+    }
+  }
+};
+
+class G1CMDrainMarkingStackClosure: public VoidClosure {
+  CMMarkStack*                  _markStack;
+  CMBitMap*                     _bitMap;
+  G1CMKeepAliveClosure*         _oopClosure;
+ public:
+  G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
+                               G1CMKeepAliveClosure* oopClosure) :
+    _bitMap(bitMap),
+    _markStack(markStack),
+    _oopClosure(oopClosure)
+  {}
+
+  void do_void() {
+    _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
+  }
+};
+
+void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
+  ResourceMark rm;
+  HandleMark   hm;
+  ReferencePolicy* soft_ref_policy;
+
+  // Process weak references.
+  if (clear_all_soft_refs) {
+    soft_ref_policy = new AlwaysClearPolicy();
+  } else {
+#ifdef COMPILER2
+    soft_ref_policy = new LRUMaxHeapPolicy();
+#else
+    soft_ref_policy = new LRUCurrentHeapPolicy();
+#endif
+  }
+  assert(_markStack.isEmpty(), "mark stack should be empty");
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1CMIsAliveClosure g1IsAliveClosure(g1);
+
+  G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap());
+  G1CMDrainMarkingStackClosure
+    g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
+                               &g1KeepAliveClosure);
+
+  // XXXYYY  Also: copy the parallel ref processing code from CMS.
+  ReferenceProcessor* rp = g1->ref_processor();
+  rp->process_discovered_references(soft_ref_policy,
+                                    &g1IsAliveClosure,
+                                    &g1KeepAliveClosure,
+                                    &g1DrainMarkingStackClosure,
+                                    NULL);
+  assert(_markStack.overflow() || _markStack.isEmpty(),
+         "mark stack should be empty (unless it overflowed)");
+  if (_markStack.overflow()) {
+    set_has_overflown();
+  }
+
+  rp->enqueue_discovered_references();
+  rp->verify_no_references_recorded();
+  assert(!rp->discovery_enabled(), "should have been disabled");
+
+  // Now clean up stale oops in SymbolTable and StringTable
+  SymbolTable::unlink(&g1IsAliveClosure);
+  StringTable::unlink(&g1IsAliveClosure);
+}
+
+void ConcurrentMark::swapMarkBitMaps() {
+  CMBitMapRO* temp = _prevMarkBitMap;
+  _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
+  _nextMarkBitMap  = (CMBitMap*)  temp;
+}
+
+class CMRemarkTask: public AbstractGangTask {
+private:
+  ConcurrentMark *_cm;
+
+public:
+  void work(int worker_i) {
+    // Since all available tasks are actually started, we should
+    // only proceed if we're supposed to be actived.
+    if ((size_t)worker_i < _cm->active_tasks()) {
+      CMTask* task = _cm->task(worker_i);
+      task->record_start_time();
+      do {
+        task->do_marking_step(1000000000.0 /* something very large */);
+      } while (task->has_aborted() && !_cm->has_overflown());
+      // If we overflow, then we do not want to restart. We instead
+      // want to abort remark and do concurrent marking again.
+      task->record_end_time();
+    }
+  }
+
+  CMRemarkTask(ConcurrentMark* cm) :
+    AbstractGangTask("Par Remark"), _cm(cm) { }
+};
+
+void ConcurrentMark::checkpointRootsFinalWork() {
+  ResourceMark rm;
+  HandleMark   hm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  g1h->ensure_parsability(false);
+
+  if (ParallelGCThreads > 0) {
+    g1h->change_strong_roots_parity();
+    // this is remark, so we'll use up all available threads
+    int active_workers = ParallelGCThreads;
+    set_phase(active_workers, false);
+
+    CMRemarkTask remarkTask(this);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&remarkTask);
+    g1h->set_par_threads(0);
+
+    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  } else {
+    g1h->change_strong_roots_parity();
+    // this is remark, so we'll use up all available threads
+    int active_workers = 1;
+    set_phase(active_workers, false);
+
+    CMRemarkTask remarkTask(this);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    remarkTask.work(0);
+
+    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  }
+
+  print_stats();
+
+  if (!restart_for_overflow())
+    set_non_marking_state();
+
+#if VERIFY_OBJS_PROCESSED
+  if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
+    gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
+                           _scan_obj_cl.objs_processed,
+                           ThreadLocalObjQueue::objs_enqueued);
+    guarantee(_scan_obj_cl.objs_processed ==
+              ThreadLocalObjQueue::objs_enqueued,
+              "Different number of objs processed and enqueued.");
+  }
+#endif
+}
+
+class ReachablePrinterOopClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  CMBitMapRO*      _bitmap;
+  outputStream*    _out;
+
+public:
+  ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop         obj = *p;
+    const char* str = NULL;
+    const char* str2 = "";
+
+    if (!_g1h->is_in_g1_reserved(obj))
+      str = "outside G1 reserved";
+    else {
+      HeapRegion* hr  = _g1h->heap_region_containing(obj);
+      guarantee( hr != NULL, "invariant" );
+      if (hr->obj_allocated_since_prev_marking(obj)) {
+        str = "over TAMS";
+        if (_bitmap->isMarked((HeapWord*) obj))
+          str2 = " AND MARKED";
+      } else if (_bitmap->isMarked((HeapWord*) obj))
+        str = "marked";
+      else
+        str = "#### NOT MARKED ####";
+    }
+
+    _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
+                   p, (void*) obj, str, str2);
+  }
+};
+
+class ReachablePrinterClosure: public BitMapClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+
+  bool do_bit(size_t offset) {
+    HeapWord* addr = _bitmap->offsetToHeapWord(offset);
+    ReachablePrinterOopClosure oopCl(_bitmap, _out);
+
+    _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
+    oop(addr)->oop_iterate(&oopCl);
+    _out->print_cr("");
+
+    return true;
+  }
+};
+
+class ObjInRegionReachablePrinterClosure : public ObjectClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  void do_object(oop o) {
+    ReachablePrinterOopClosure oopCl(_bitmap, _out);
+
+    _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
+    o->oop_iterate(&oopCl);
+    _out->print_cr("");
+  }
+
+  ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+};
+
+class RegionReachablePrinterClosure : public HeapRegionClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  bool doHeapRegion(HeapRegion* hr) {
+    HeapWord* b = hr->bottom();
+    HeapWord* e = hr->end();
+    HeapWord* t = hr->top();
+    HeapWord* p = hr->prev_top_at_mark_start();
+    _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
+                   "PTAMS: "PTR_FORMAT, b, e, t, p);
+    _out->print_cr("");
+
+    ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
+    hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
+
+    return false;
+  }
+
+  RegionReachablePrinterClosure(CMBitMapRO* bitmap,
+                                outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+};
+
+void ConcurrentMark::print_prev_bitmap_reachable() {
+  outputStream* out = gclog_or_tty;
+
+#if SEND_HEAP_DUMP_TO_FILE
+  guarantee(heap_dump_file == NULL, "Protocol");
+  char fn_buf[100];
+  sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
+  heap_dump_file = fopen(fn_buf, "w");
+  fileStream fstream(heap_dump_file);
+  out = &fstream;
+#endif // SEND_HEAP_DUMP_TO_FILE
+
+  RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
+  out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
+  _g1h->heap_region_iterate(&rcl);
+  out->print_cr("");
+
+  ReachablePrinterClosure cl(_prevMarkBitMap, out);
+  out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
+  _prevMarkBitMap->iterate(&cl);
+  out->print_cr("");
+
+#if SEND_HEAP_DUMP_TO_FILE
+  fclose(heap_dump_file);
+  heap_dump_file = NULL;
+#endif // SEND_HEAP_DUMP_TO_FILE
+}
+
+// This note is for drainAllSATBBuffers and the code in between.
+// In the future we could reuse a task to do this work during an
+// evacuation pause (since now tasks are not active and can be claimed
+// during an evacuation pause). This was a late change to the code and
+// is currently not being taken advantage of.
+
+class CMGlobalObjectClosure : public ObjectClosure {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  void do_object(oop obj) {
+    _cm->deal_with_reference(obj);
+  }
+
+  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
+};
+
+void ConcurrentMark::deal_with_reference(oop obj) {
+  if (verbose_high())
+    gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
+                           (void*) obj);
+
+
+  HeapWord* objAddr = (HeapWord*) obj;
+  if (_g1h->is_in_g1_reserved(objAddr)) {
+    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    HeapRegion* hr = _g1h->heap_region_containing(obj);
+    if (_g1h->is_obj_ill(obj, hr)) {
+      if (verbose_high())
+        gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
+                               "marked", (void*) obj);
+
+      // we need to mark it first
+      if (_nextMarkBitMap->parMark(objAddr)) {
+        // No OrderAccess:store_load() is needed. It is implicit in the
+        // CAS done in parMark(objAddr) above
+        HeapWord* finger = _finger;
+        if (objAddr < finger) {
+          if (verbose_high())
+            gclog_or_tty->print_cr("[global] below the global finger "
+                                   "("PTR_FORMAT"), pushing it", finger);
+          if (!mark_stack_push(obj)) {
+            if (verbose_low())
+              gclog_or_tty->print_cr("[global] global stack overflow during "
+                                     "deal_with_reference");
+          }
+        }
+      }
+    }
+  }
+}
+
+void ConcurrentMark::drainAllSATBBuffers() {
+  CMGlobalObjectClosure oc(this);
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.set_closure(&oc);
+
+  while (satb_mq_set.apply_closure_to_completed_buffer()) {
+    if (verbose_medium())
+      gclog_or_tty->print_cr("[global] processed an SATB buffer");
+  }
+
+  // no need to check whether we should do this, as this is only
+  // called during an evacuation pause
+  satb_mq_set.iterate_closure_all_threads();
+
+  satb_mq_set.set_closure(NULL);
+  guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+}
+
+void ConcurrentMark::markPrev(oop p) {
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
+}
+
+void ConcurrentMark::clear(oop p) {
+  assert(p != NULL && p->is_oop(), "expected an oop");
+  HeapWord* addr = (HeapWord*)p;
+  assert(addr >= _nextMarkBitMap->startWord() ||
+         addr < _nextMarkBitMap->endWord(), "in a region");
+
+  _nextMarkBitMap->clear(addr);
+}
+
+void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+  _nextMarkBitMap->clearRange(mr);
+}
+
+HeapRegion*
+ConcurrentMark::claim_region(int task_num) {
+  // "checkpoint" the finger
+  HeapWord* finger = _finger;
+
+  // _heap_end will not change underneath our feet; it only changes at
+  // yield points.
+  while (finger < _heap_end) {
+    tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
+
+    // is the gap between reading the finger and doing the CAS too long?
+
+    HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
+    HeapWord*   bottom        = curr_region->bottom();
+    HeapWord*   end           = curr_region->end();
+    HeapWord*   limit         = curr_region->next_top_at_mark_start();
+
+    if (verbose_low())
+      gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
+                             "["PTR_FORMAT", "PTR_FORMAT"), "
+                             "limit = "PTR_FORMAT,
+                             task_num, curr_region, bottom, end, limit);
+
+    HeapWord* res =
+      (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
+    if (res == finger) {
+      // we succeeded
+
+      // notice that _finger == end cannot be guaranteed here since,
+      // someone else might have moved the finger even further
+      guarantee( _finger >= end, "the finger should have moved forward" );
+
+      if (verbose_low())
+        gclog_or_tty->print_cr("[%d] we were successful with region = "
+                               PTR_FORMAT, task_num, curr_region);
+
+      if (limit > bottom) {
+        if (verbose_low())
+          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
+                                 "returning it ", task_num, curr_region);
+        return curr_region;
+      } else {
+        tmp_guarantee_CM( limit == bottom,
+                          "the region limit should be at bottom" );
+        if (verbose_low())
+          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
+                                 "returning NULL", task_num, curr_region);
+        // we return NULL and the caller should try calling
+        // claim_region() again.
+        return NULL;
+      }
+    } else {
+      guarantee( _finger > finger, "the finger should have moved forward" );
+      if (verbose_low())
+        gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
+                               "global finger = "PTR_FORMAT", "
+                               "our finger = "PTR_FORMAT,
+                               task_num, _finger, finger);
+
+      // read it again
+      finger = _finger;
+    }
+  }
+
+  return NULL;
+}
+
+void ConcurrentMark::oops_do(OopClosure* cl) {
+  if (_markStack.size() > 0 && verbose_low())
+    gclog_or_tty->print_cr("[global] scanning the global marking stack, "
+                           "size = %d", _markStack.size());
+  // we first iterate over the contents of the mark stack...
+  _markStack.oops_do(cl);
+
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    OopTaskQueue* queue = _task_queues->queue((int)i);
+
+    if (queue->size() > 0 && verbose_low())
+      gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
+                             "size = %d", i, queue->size());
+
+    // ...then over the contents of the all the task queues.
+    queue->oops_do(cl);
+  }
+
+  // finally, invalidate any entries that in the region stack that
+  // point into the collection set
+  if (_regionStack.invalidate_entries_into_cset()) {
+    // otherwise, any gray objects copied during the evacuation pause
+    // might not be visited.
+    guarantee( _should_gray_objects, "invariant" );
+  }
+}
+
+void ConcurrentMark::clear_marking_state() {
+  _markStack.setEmpty();
+  _markStack.clear_overflow();
+  _regionStack.setEmpty();
+  _regionStack.clear_overflow();
+  clear_has_overflown();
+  _finger = _heap_start;
+
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    OopTaskQueue* queue = _task_queues->queue(i);
+    queue->set_empty();
+  }
+}
+
+void ConcurrentMark::print_stats() {
+  if (verbose_stats()) {
+    gclog_or_tty->print_cr("---------------------------------------------------------------------");
+    for (size_t i = 0; i < _active_tasks; ++i) {
+      _tasks[i]->print_stats();
+      gclog_or_tty->print_cr("---------------------------------------------------------------------");
+    }
+  }
+}
+
+class CSMarkOopClosure: public OopClosure {
+  friend class CSMarkBitMapClosure;
+
+  G1CollectedHeap* _g1h;
+  CMBitMap*        _bm;
+  ConcurrentMark*  _cm;
+  oop*             _ms;
+  jint*            _array_ind_stack;
+  int              _ms_size;
+  int              _ms_ind;
+  int              _array_increment;
+
+  bool push(oop obj, int arr_ind = 0) {
+    if (_ms_ind == _ms_size) {
+      gclog_or_tty->print_cr("Mark stack is full.");
+      return false;
+    }
+    _ms[_ms_ind] = obj;
+    if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
+    _ms_ind++;
+    return true;
+  }
+
+  oop pop() {
+    if (_ms_ind == 0) return NULL;
+    else {
+      _ms_ind--;
+      return _ms[_ms_ind];
+    }
+  }
+
+  bool drain() {
+    while (_ms_ind > 0) {
+      oop obj = pop();
+      assert(obj != NULL, "Since index was non-zero.");
+      if (obj->is_objArray()) {
+        jint arr_ind = _array_ind_stack[_ms_ind];
+        objArrayOop aobj = objArrayOop(obj);
+        jint len = aobj->length();
+        jint next_arr_ind = arr_ind + _array_increment;
+        if (next_arr_ind < len) {
+          push(obj, next_arr_ind);
+        }
+        // Now process this portion of this one.
+        int lim = MIN2(next_arr_ind, len);
+        assert(!UseCompressedOops, "This needs to be fixed");
+        for (int j = arr_ind; j < lim; j++) {
+          do_oop(aobj->obj_at_addr<oop>(j));
+        }
+
+      } else {
+        obj->oop_iterate(this);
+      }
+      if (abort()) return false;
+    }
+    return true;
+  }
+
+public:
+  CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
+    _g1h(G1CollectedHeap::heap()),
+    _cm(cm),
+    _bm(cm->nextMarkBitMap()),
+    _ms_size(ms_size), _ms_ind(0),
+    _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
+    _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
+    _array_increment(MAX2(ms_size/8, 16))
+  {}
+
+  ~CSMarkOopClosure() {
+    FREE_C_HEAP_ARRAY(oop, _ms);
+    FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
+  }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop obj = *p;
+    if (obj == NULL) return;
+    if (obj->is_forwarded()) {
+      // If the object has already been forwarded, we have to make sure
+      // that it's marked.  So follow the forwarding pointer.  Note that
+      // this does the right thing for self-forwarding pointers in the
+      // evacuation failure case.
+      obj = obj->forwardee();
+    }
+    HeapRegion* hr = _g1h->heap_region_containing(obj);
+    if (hr != NULL) {
+      if (hr->in_collection_set()) {
+        if (_g1h->is_obj_ill(obj)) {
+          _bm->mark((HeapWord*)obj);
+          if (!push(obj)) {
+            gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
+            set_abort();
+          }
+        }
+      } else {
+        // Outside the collection set; we need to gray it
+        _cm->deal_with_reference(obj);
+      }
+    }
+  }
+};
+
+class CSMarkBitMapClosure: public BitMapClosure {
+  G1CollectedHeap* _g1h;
+  CMBitMap*        _bitMap;
+  ConcurrentMark*  _cm;
+  CSMarkOopClosure _oop_cl;
+public:
+  CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
+    _g1h(G1CollectedHeap::heap()),
+    _bitMap(cm->nextMarkBitMap()),
+    _oop_cl(cm, ms_size)
+  {}
+
+  ~CSMarkBitMapClosure() {}
+
+  bool do_bit(size_t offset) {
+    // convert offset into a HeapWord*
+    HeapWord* addr = _bitMap->offsetToHeapWord(offset);
+    assert(_bitMap->endWord() && addr < _bitMap->endWord(),
+           "address out of range");
+    assert(_bitMap->isMarked(addr), "tautology");
+    oop obj = oop(addr);
+    if (!obj->is_forwarded()) {
+      if (!_oop_cl.push(obj)) return false;
+      if (!_oop_cl.drain()) return false;
+    }
+    // Otherwise...
+    return true;
+  }
+};
+
+
+class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
+  CMBitMap* _bm;
+  CSMarkBitMapClosure _bit_cl;
+  enum SomePrivateConstants {
+    MSSize = 1000
+  };
+  bool _completed;
+public:
+  CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
+    _bm(cm->nextMarkBitMap()),
+    _bit_cl(cm, MSSize),
+    _completed(true)
+  {}
+
+  ~CompleteMarkingInCSHRClosure() {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->evacuation_failed()) {
+      MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
+      if (!mr.is_empty()) {
+        if (!_bm->iterate(&_bit_cl, mr)) {
+          _completed = false;
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  bool completed() { return _completed; }
+};
+
+class ClearMarksInHRClosure: public HeapRegionClosure {
+  CMBitMap* _bm;
+public:
+  ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->used_region().is_empty() && !r->evacuation_failed()) {
+      MemRegion usedMR = r->used_region();
+      _bm->clearRange(r->used_region());
+    }
+    return false;
+  }
+};
+
+void ConcurrentMark::complete_marking_in_collection_set() {
+  G1CollectedHeap* g1h =  G1CollectedHeap::heap();
+
+  if (!g1h->mark_in_progress()) {
+    g1h->g1_policy()->record_mark_closure_time(0.0);
+    return;
+  }
+
+  int i = 1;
+  double start = os::elapsedTime();
+  while (true) {
+    i++;
+    CompleteMarkingInCSHRClosure cmplt(this);
+    g1h->collection_set_iterate(&cmplt);
+    if (cmplt.completed()) break;
+  }
+  double end_time = os::elapsedTime();
+  double elapsed_time_ms = (end_time - start) * 1000.0;
+  g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
+  if (PrintGCDetails) {
+    gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
+  }
+
+  ClearMarksInHRClosure clr(nextMarkBitMap());
+  g1h->collection_set_iterate(&clr);
+}
+
+// The next two methods deal with the following optimisation. Some
+// objects are gray by being marked and located above the finger. If
+// they are copied, during an evacuation pause, below the finger then
+// the need to be pushed on the stack. The observation is that, if
+// there are no regions in the collection set located above the
+// finger, then the above cannot happen, hence we do not need to
+// explicitly gray any objects when copying them to below the
+// finger. The global stack will be scanned to ensure that, if it
+// points to objects being copied, it will update their
+// location. There is a tricky situation with the gray objects in
+// region stack that are being coped, however. See the comment in
+// newCSet().
+
+void ConcurrentMark::newCSet() {
+  if (!concurrent_marking_in_progress())
+    // nothing to do if marking is not in progress
+    return;
+
+  // find what the lowest finger is among the global and local fingers
+  _min_finger = _finger;
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    CMTask* task = _tasks[i];
+    HeapWord* task_finger = task->finger();
+    if (task_finger != NULL && task_finger < _min_finger)
+      _min_finger = task_finger;
+  }
+
+  _should_gray_objects = false;
+
+  // This fixes a very subtle and fustrating bug. It might be the case
+  // that, during en evacuation pause, heap regions that contain
+  // objects that are gray (by being in regions contained in the
+  // region stack) are included in the collection set. Since such gray
+  // objects will be moved, and because it's not easy to redirect
+  // region stack entries to point to a new location (because objects
+  // in one region might be scattered to multiple regions after they
+  // are copied), one option is to ensure that all marked objects
+  // copied during a pause are pushed on the stack. Notice, however,
+  // that this problem can only happen when the region stack is not
+  // empty during an evacuation pause. So, we make the fix a bit less
+  // conservative and ensure that regions are pushed on the stack,
+  // irrespective whether all collection set regions are below the
+  // finger, if the region stack is not empty. This is expected to be
+  // a rare case, so I don't think it's necessary to be smarted about it.
+  if (!region_stack_empty())
+    _should_gray_objects = true;
+}
+
+void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  if (!concurrent_marking_in_progress())
+    return;
+
+  HeapWord* region_end = hr->end();
+  if (region_end > _min_finger)
+    _should_gray_objects = true;
+}
+
+void ConcurrentMark::disable_co_trackers() {
+  if (has_aborted()) {
+    if (_cleanup_co_tracker.enabled())
+      _cleanup_co_tracker.disable();
+    for (int i = 0; i < (int)_max_task_num; ++i) {
+      CMTask* task = _tasks[i];
+      if (task->co_tracker_enabled())
+        task->disable_co_tracker();
+    }
+  } else {
+    guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
+    for (int i = 0; i < (int)_max_task_num; ++i) {
+      CMTask* task = _tasks[i];
+      guarantee( !task->co_tracker_enabled(), "invariant" );
+    }
+  }
+}
+
+// abandon current marking iteration due to a Full GC
+void ConcurrentMark::abort() {
+  // If we're not marking, nothing to do.
+  if (!G1ConcMark) return;
+
+  // Clear all marks to force marking thread to do nothing
+  _nextMarkBitMap->clearAll();
+  // Empty mark stack
+  clear_marking_state();
+  for (int i = 0; i < (int)_max_task_num; ++i)
+    _tasks[i]->clear_region_fields();
+  _has_aborted = true;
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.abandon_partial_marking();
+  satb_mq_set.set_active_all_threads(false);
+}
+
+static void print_ms_time_info(const char* prefix, const char* name,
+                               NumberSeq& ns) {
+  gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
+                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
+  if (ns.num() > 0) {
+    gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
+                           prefix, ns.sd(), ns.maximum());
+  }
+}
+
+void ConcurrentMark::print_summary_info() {
+  gclog_or_tty->print_cr(" Concurrent marking:");
+  print_ms_time_info("  ", "init marks", _init_times);
+  print_ms_time_info("  ", "remarks", _remark_times);
+  {
+    print_ms_time_info("     ", "final marks", _remark_mark_times);
+    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
+
+  }
+  print_ms_time_info("  ", "cleanups", _cleanup_times);
+  gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
+                         _total_counting_time,
+                         (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
+                          (double)_cleanup_times.num()
+                         : 0.0));
+  if (G1ScrubRemSets) {
+    gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
+                           _total_rs_scrub_time,
+                           (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
+                            (double)_cleanup_times.num()
+                           : 0.0));
+  }
+  gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
+                         (_init_times.sum() + _remark_times.sum() +
+                          _cleanup_times.sum())/1000.0);
+  gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
+                "(%8.2f s marking, %8.2f s counting).",
+                cmThread()->vtime_accum(),
+                cmThread()->vtime_mark_accum(),
+                cmThread()->vtime_count_accum());
+}
+
+// Closures
+// XXX: there seems to be a lot of code  duplication here;
+// should refactor and consolidate the shared code.
+
+// This closure is used to mark refs into the CMS generation in
+// the CMS bit map. Called at the first checkpoint.
+
+// We take a break if someone is trying to stop the world.
+bool ConcurrentMark::do_yield_check(int worker_i) {
+  if (should_yield()) {
+    if (worker_i == 0)
+      _g1h->g1_policy()->record_concurrent_pause();
+    cmThread()->yield();
+    if (worker_i == 0)
+      _g1h->g1_policy()->record_concurrent_pause_end();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ConcurrentMark::should_yield() {
+  return cmThread()->should_yield();
+}
+
+bool ConcurrentMark::containing_card_is_marked(void* p) {
+  size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
+  return _card_bm.at(offset >> CardTableModRefBS::card_shift);
+}
+
+bool ConcurrentMark::containing_cards_are_marked(void* start,
+                                                 void* last) {
+  return
+    containing_card_is_marked(start) &&
+    containing_card_is_marked(last);
+}
+
+#ifndef PRODUCT
+// for debugging purposes
+void ConcurrentMark::print_finger() {
+  gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
+                         _heap_start, _heap_end, _finger);
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
+  }
+  gclog_or_tty->print_cr("");
+}
+#endif
+
+// Closure for iteration over bitmaps
+class CMBitMapClosure : public BitMapClosure {
+private:
+  // the bitmap that is being iterated over
+  CMBitMap*                   _nextMarkBitMap;
+  ConcurrentMark*             _cm;
+  CMTask*                     _task;
+  // true if we're scanning a heap region claimed by the task (so that
+  // we move the finger along), false if we're not, i.e. currently when
+  // scanning a heap region popped from the region stack (so that we
+  // do not move the task finger along; it'd be a mistake if we did so).
+  bool                        _scanning_heap_region;
+
+public:
+  CMBitMapClosure(CMTask *task,
+                  ConcurrentMark* cm,
+                  CMBitMap* nextMarkBitMap)
+    :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
+
+  void set_scanning_heap_region(bool scanning_heap_region) {
+    _scanning_heap_region = scanning_heap_region;
+  }
+
+  bool do_bit(size_t offset) {
+    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
+    tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
+    tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
+
+    if (_scanning_heap_region) {
+      statsOnly( _task->increase_objs_found_on_bitmap() );
+      tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
+      // We move that task's local finger along.
+      _task->move_finger_to(addr);
+    } else {
+      // We move the task's region finger along.
+      _task->move_region_finger_to(addr);
+    }
+
+    _task->scan_object(oop(addr));
+    // we only partially drain the local queue and global stack
+    _task->drain_local_queue(true);
+    _task->drain_global_stack(true);
+
+    // if the has_aborted flag has been raised, we need to bail out of
+    // the iteration
+    return !_task->has_aborted();
+  }
+};
+
+// Closure for iterating over objects, currently only used for
+// processing SATB buffers.
+class CMObjectClosure : public ObjectClosure {
+private:
+  CMTask* _task;
+
+public:
+  void do_object(oop obj) {
+    _task->deal_with_reference(obj);
+  }
+
+  CMObjectClosure(CMTask* task) : _task(task) { }
+};
+
+// Closure for iterating over object fields
+class CMOopClosure : public OopClosure {
+private:
+  G1CollectedHeap*   _g1h;
+  ConcurrentMark*    _cm;
+  CMTask*            _task;
+
+public:
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
+
+    oop obj = *p;
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] we're looking at location "
+                             "*"PTR_FORMAT" = "PTR_FORMAT,
+                             _task->task_id(), p, (void*) obj);
+    _task->deal_with_reference(obj);
+  }
+
+  CMOopClosure(G1CollectedHeap* g1h,
+               ConcurrentMark* cm,
+               CMTask* task)
+    : _g1h(g1h), _cm(cm), _task(task) { }
+};
+
+void CMTask::setup_for_region(HeapRegion* hr) {
+  tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
+      "claim_region() should have filtered out continues humongous regions" );
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
+                           _task_id, hr);
+
+  _curr_region  = hr;
+  _finger       = hr->bottom();
+  update_region_limit();
+}
+
+void CMTask::update_region_limit() {
+  HeapRegion* hr            = _curr_region;
+  HeapWord* bottom          = hr->bottom();
+  HeapWord* limit           = hr->next_top_at_mark_start();
+
+  if (limit == bottom) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] found an empty region "
+                             "["PTR_FORMAT", "PTR_FORMAT")",
+                             _task_id, bottom, limit);
+    // The region was collected underneath our feet.
+    // We set the finger to bottom to ensure that the bitmap
+    // iteration that will follow this will not do anything.
+    // (this is not a condition that holds when we set the region up,
+    // as the region is not supposed to be empty in the first place)
+    _finger = bottom;
+  } else if (limit >= _region_limit) {
+    tmp_guarantee_CM( limit >= _finger, "peace of mind" );
+  } else {
+    tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
+    // This can happen under some pretty unusual circumstances.  An
+    // evacuation pause empties the region underneath our feet (NTAMS
+    // at bottom). We then do some allocation in the region (NTAMS
+    // stays at bottom), followed by the region being used as a GC
+    // alloc region (NTAMS will move to top() and the objects
+    // originally below it will be grayed). All objects now marked in
+    // the region are explicitly grayed, if below the global finger,
+    // and we do not need in fact to scan anything else. So, we simply
+    // set _finger to be limit to ensure that the bitmap iteration
+    // doesn't do anything.
+    _finger = limit;
+  }
+
+  _region_limit = limit;
+}
+
+void CMTask::giveup_current_region() {
+  tmp_guarantee_CM( _curr_region != NULL, "invariant" );
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
+                           _task_id, _curr_region);
+  clear_region_fields();
+}
+
+void CMTask::clear_region_fields() {
+  // Values for these three fields that indicate that we're not
+  // holding on to a region.
+  _curr_region   = NULL;
+  _finger        = NULL;
+  _region_limit  = NULL;
+
+  _region_finger = NULL;
+}
+
+void CMTask::reset(CMBitMap* nextMarkBitMap) {
+  guarantee( nextMarkBitMap != NULL, "invariant" );
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] resetting", _task_id);
+
+  _nextMarkBitMap                = nextMarkBitMap;
+  clear_region_fields();
+
+  _calls                         = 0;
+  _elapsed_time_ms               = 0.0;
+  _termination_time_ms           = 0.0;
+  _termination_start_time_ms     = 0.0;
+
+#if _MARKING_STATS_
+  _local_pushes                  = 0;
+  _local_pops                    = 0;
+  _local_max_size                = 0;
+  _objs_scanned                  = 0;
+  _global_pushes                 = 0;
+  _global_pops                   = 0;
+  _global_max_size               = 0;
+  _global_transfers_to           = 0;
+  _global_transfers_from         = 0;
+  _region_stack_pops             = 0;
+  _regions_claimed               = 0;
+  _objs_found_on_bitmap          = 0;
+  _satb_buffers_processed        = 0;
+  _steal_attempts                = 0;
+  _steals                        = 0;
+  _aborted                       = 0;
+  _aborted_overflow              = 0;
+  _aborted_cm_aborted            = 0;
+  _aborted_yield                 = 0;
+  _aborted_timed_out             = 0;
+  _aborted_satb                  = 0;
+  _aborted_termination           = 0;
+#endif // _MARKING_STATS_
+}
+
+bool CMTask::should_exit_termination() {
+  regular_clock_call();
+  // This is called when we are in the termination protocol. We should
+  // quit if, for some reason, this task wants to abort or the global
+  // stack is not empty (this means that we can get work from it).
+  return !_cm->mark_stack_empty() || has_aborted();
+}
+
+// This determines whether the method below will check both the local
+// and global fingers when determining whether to push on the stack a
+// gray object (value 1) or whether it will only check the global one
+// (value 0). The tradeoffs are that the former will be a bit more
+// accurate and possibly push less on the stack, but it might also be
+// a little bit slower.
+
+#define _CHECK_BOTH_FINGERS_      1
+
+void CMTask::deal_with_reference(oop obj) {
+  if (_cm->verbose_high())
+    gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
+                           _task_id, (void*) obj);
+
+  ++_refs_reached;
+
+  HeapWord* objAddr = (HeapWord*) obj;
+  if (_g1h->is_in_g1_reserved(objAddr)) {
+    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    HeapRegion* hr =  _g1h->heap_region_containing(obj);
+    if (_g1h->is_obj_ill(obj, hr)) {
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
+                               _task_id, (void*) obj);
+
+      // we need to mark it first
+      if (_nextMarkBitMap->parMark(objAddr)) {
+        // No OrderAccess:store_load() is needed. It is implicit in the
+        // CAS done in parMark(objAddr) above
+        HeapWord* global_finger = _cm->finger();
+
+#if _CHECK_BOTH_FINGERS_
+        // we will check both the local and global fingers
+
+        if (_finger != NULL && objAddr < _finger) {
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
+                                   "pushing it", _task_id, _finger);
+          push(obj);
+        } else if (_curr_region != NULL && objAddr < _region_limit) {
+          // do nothing
+        } else if (objAddr < global_finger) {
+          // Notice that the global finger might be moving forward
+          // concurrently. This is not a problem. In the worst case, we
+          // mark the object while it is above the global finger and, by
+          // the time we read the global finger, it has moved forward
+          // passed this object. In this case, the object will probably
+          // be visited when a task is scanning the region and will also
+          // be pushed on the stack. So, some duplicate work, but no
+          // correctness problems.
+
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the global finger "
+                                   "("PTR_FORMAT"), pushing it",
+                                   _task_id, global_finger);
+          push(obj);
+        } else {
+          // do nothing
+        }
+#else // _CHECK_BOTH_FINGERS_
+      // we will only check the global finger
+
+        if (objAddr < global_finger) {
+          // see long comment above
+
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the global finger "
+                                   "("PTR_FORMAT"), pushing it",
+                                   _task_id, global_finger);
+          push(obj);
+        }
+#endif // _CHECK_BOTH_FINGERS_
+      }
+    }
+  }
+}
+
+void CMTask::push(oop obj) {
+  HeapWord* objAddr = (HeapWord*) obj;
+  tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
+  tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
+  tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
+
+  if (_cm->verbose_high())
+    gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
+
+  if (!_task_queue->push(obj)) {
+    // The local task queue looks full. We need to push some entries
+    // to the global stack.
+
+    if (_cm->verbose_medium())
+      gclog_or_tty->print_cr("[%d] task queue overflow, "
+                             "moving entries to the global stack",
+                             _task_id);
+    move_entries_to_global_stack();
+
+    // this should succeed since, even if we overflow the global
+    // stack, we should have definitely removed some entries from the
+    // local queue. So, there must be space on it.
+    bool success = _task_queue->push(obj);
+    tmp_guarantee_CM( success, "invariant" );
+  }
+
+  statsOnly( int tmp_size = _task_queue->size();
+             if (tmp_size > _local_max_size)
+               _local_max_size = tmp_size;
+             ++_local_pushes );
+}
+
+void CMTask::reached_limit() {
+  tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
+                    _refs_reached >= _refs_reached_limit ,
+                 "shouldn't have been called otherwise" );
+  regular_clock_call();
+}
+
+void CMTask::regular_clock_call() {
+  if (has_aborted())
+    return;
+
+  // First, we need to recalculate the words scanned and refs reached
+  // limits for the next clock call.
+  recalculate_limits();
+
+  // During the regular clock call we do the following
+
+  // (1) If an overflow has been flagged, then we abort.
+  if (_cm->has_overflown()) {
+    set_has_aborted();
+    return;
+  }
+
+  // If we are not concurrent (i.e. we're doing remark) we don't need
+  // to check anything else. The other steps are only needed during
+  // the concurrent marking phase.
+  if (!concurrent())
+    return;
+
+  // (2) If marking has been aborted for Full GC, then we also abort.
+  if (_cm->has_aborted()) {
+    set_has_aborted();
+    statsOnly( ++_aborted_cm_aborted );
+    return;
+  }
+
+  double curr_time_ms = os::elapsedVTime() * 1000.0;
+
+  // (3) If marking stats are enabled, then we update the step history.
+#if _MARKING_STATS_
+  if (_words_scanned >= _words_scanned_limit)
+    ++_clock_due_to_scanning;
+  if (_refs_reached >= _refs_reached_limit)
+    ++_clock_due_to_marking;
+
+  double last_interval_ms = curr_time_ms - _interval_start_time_ms;
+  _interval_start_time_ms = curr_time_ms;
+  _all_clock_intervals_ms.add(last_interval_ms);
+
+  if (_cm->verbose_medium()) {
+    gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
+                           "scanned = %d%s, refs reached = %d%s",
+                           _task_id, last_interval_ms,
+                           _words_scanned,
+                           (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
+                           _refs_reached,
+                           (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
+  }
+#endif // _MARKING_STATS_
+
+  // (4) We check whether we should yield. If we have to, then we abort.
+  if (_cm->should_yield()) {
+    // We should yield. To do this we abort the task. The caller is
+    // responsible for yielding.
+    set_has_aborted();
+    statsOnly( ++_aborted_yield );
+    return;
+  }
+
+  // (5) We check whether we've reached our time quota. If we have,
+  // then we abort.
+  double elapsed_time_ms = curr_time_ms - _start_time_ms;
+  if (elapsed_time_ms > _time_target_ms) {
+    set_has_aborted();
+    _has_aborted_timed_out = true;
+    statsOnly( ++_aborted_timed_out );
+    return;
+  }
+
+  // (6) Finally, we check whether there are enough completed STAB
+  // buffers available for processing. If there are, we abort.
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
+                             _task_id);
+    // we do need to process SATB buffers, we'll abort and restart
+    // the marking task to do so
+    set_has_aborted();
+    statsOnly( ++_aborted_satb );
+    return;
+  }
+}
+
+void CMTask::recalculate_limits() {
+  _real_words_scanned_limit = _words_scanned + words_scanned_period;
+  _words_scanned_limit      = _real_words_scanned_limit;
+
+  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
+  _refs_reached_limit       = _real_refs_reached_limit;
+}
+
+void CMTask::decrease_limits() {
+  // This is called when we believe that we're going to do an infrequent
+  // operation which will increase the per byte scanned cost (i.e. move
+  // entries to/from the global stack). It basically tries to decrease the
+  // scanning limit so that the clock is called earlier.
+
+  if (_cm->verbose_medium())
+    gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
+
+  _words_scanned_limit = _real_words_scanned_limit -
+    3 * words_scanned_period / 4;
+  _refs_reached_limit  = _real_refs_reached_limit -
+    3 * refs_reached_period / 4;
+}
+
+void CMTask::move_entries_to_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the local queue
+  oop buffer[global_stack_transfer_size];
+
+  int n = 0;
+  oop obj;
+  while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
+    buffer[n] = obj;
+    ++n;
+  }
+
+  if (n > 0) {
+    // we popped at least one entry from the local queue
+
+    statsOnly( ++_global_transfers_to; _local_pops += n );
+
+    if (!_cm->mark_stack_push(buffer, n)) {
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
+      set_has_aborted();
+    } else {
+      // the transfer was successful
+
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
+                               _task_id, n);
+      statsOnly( int tmp_size = _cm->mark_stack_size();
+                 if (tmp_size > _global_max_size)
+                   _global_max_size = tmp_size;
+                 _global_pushes += n );
+    }
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void CMTask::get_entries_from_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the global stack.
+  oop buffer[global_stack_transfer_size];
+  int n;
+  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
+  tmp_guarantee_CM( n <= global_stack_transfer_size,
+                    "we should not pop more than the given limit" );
+  if (n > 0) {
+    // yes, we did actually pop at least one entry
+
+    statsOnly( ++_global_transfers_from; _global_pops += n );
+    if (_cm->verbose_medium())
+      gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
+                             _task_id, n);
+    for (int i = 0; i < n; ++i) {
+      bool success = _task_queue->push(buffer[i]);
+      // We only call this when the local queue is empty or under a
+      // given target limit. So, we do not expect this push to fail.
+      tmp_guarantee_CM( success, "invariant" );
+    }
+
+    statsOnly( int tmp_size = _task_queue->size();
+               if (tmp_size > _local_max_size)
+                 _local_max_size = tmp_size;
+               _local_pushes += n );
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void CMTask::drain_local_queue(bool partially) {
+  if (has_aborted())
+    return;
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).
+  size_t target_size;
+  if (partially)
+    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
+  else
+    target_size = 0;
+
+  if (_task_queue->size() > target_size) {
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
+                             _task_id, target_size);
+
+    oop obj;
+    bool ret = _task_queue->pop_local(obj);
+    while (ret) {
+      statsOnly( ++_local_pops );
+
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
+                               (void*) obj);
+
+      tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
+                        "invariant" );
+
+      scan_object(obj);
+
+      if (_task_queue->size() <= target_size || has_aborted())
+        ret = false;
+      else
+        ret = _task_queue->pop_local(obj);
+    }
+
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
+                             _task_id, _task_queue->size());
+  }
+}
+
+void CMTask::drain_global_stack(bool partially) {
+  if (has_aborted())
+    return;
+
+  // We have a policy to drain the local queue before we attempt to
+  // drain the global stack.
+  tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).  Notice that,
+  // because we move entries from the global stack in chunks or
+  // because another task might be doing the same, we might in fact
+  // drop below the target. But, this is not a problem.
+  size_t target_size;
+  if (partially)
+    target_size = _cm->partial_mark_stack_size_target();
+  else
+    target_size = 0;
+
+  if (_cm->mark_stack_size() > target_size) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
+                             _task_id, target_size);
+
+    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
+      get_entries_from_global_stack();
+      drain_local_queue(partially);
+    }
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
+                             _task_id, _cm->mark_stack_size());
+  }
+}
+
+// SATB Queue has several assumptions on whether to call the par or
+// non-par versions of the methods. this is why some of the code is
+// replicated. We should really get rid of the single-threaded version
+// of the code to simplify things.
+void CMTask::drain_satb_buffers() {
+  if (has_aborted())
+    return;
+
+  // We set this so that the regular clock knows that we're in the
+  // middle of draining buffers and doesn't set the abort flag when it
+  // notices that SATB buffers are available for draining. It'd be
+  // very counter productive if it did that. :-)
+  _draining_satb_buffers = true;
+
+  CMObjectClosure oc(this);
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  if (ParallelGCThreads > 0)
+    satb_mq_set.set_par_closure(_task_id, &oc);
+  else
+    satb_mq_set.set_closure(&oc);
+
+  // This keeps claiming and applying the closure to completed buffers
+  // until we run out of buffers or we need to abort.
+  if (ParallelGCThreads > 0) {
+    while (!has_aborted() &&
+           satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
+      statsOnly( ++_satb_buffers_processed );
+      regular_clock_call();
+    }
+  } else {
+    while (!has_aborted() &&
+           satb_mq_set.apply_closure_to_completed_buffer()) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
+      statsOnly( ++_satb_buffers_processed );
+      regular_clock_call();
+    }
+  }
+
+  if (!concurrent() && !has_aborted()) {
+    // We should only do this during remark.
+    if (ParallelGCThreads > 0)
+      satb_mq_set.par_iterate_closure_all_threads(_task_id);
+    else
+      satb_mq_set.iterate_closure_all_threads();
+  }
+
+  _draining_satb_buffers = false;
+
+  tmp_guarantee_CM( has_aborted() ||
+                    concurrent() ||
+                    satb_mq_set.completed_buffers_num() == 0, "invariant" );
+
+  if (ParallelGCThreads > 0)
+    satb_mq_set.set_par_closure(_task_id, NULL);
+  else
+    satb_mq_set.set_closure(NULL);
+
+  // again, this was a potentially expensive operation, decrease the
+  // limits to get the regular clock call early
+  decrease_limits();
+}
+
+void CMTask::drain_region_stack(BitMapClosure* bc) {
+  if (has_aborted())
+    return;
+
+  tmp_guarantee_CM( _region_finger == NULL,
+                    "it should be NULL when we're not scanning a region" );
+
+  if (!_cm->region_stack_empty()) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
+                             _task_id, _cm->region_stack_size());
+
+    MemRegion mr = _cm->region_stack_pop();
+    // it returns MemRegion() if the pop fails
+    statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+
+    while (mr.start() != NULL) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] we are scanning region "
+                               "["PTR_FORMAT", "PTR_FORMAT")",
+                               _task_id, mr.start(), mr.end());
+      tmp_guarantee_CM( mr.end() <= _cm->finger(),
+                        "otherwise the region shouldn't be on the stack" );
+      assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
+      if (_nextMarkBitMap->iterate(bc, mr)) {
+        tmp_guarantee_CM( !has_aborted(),
+               "cannot abort the task without aborting the bitmap iteration" );
+
+        // We finished iterating over the region without aborting.
+        regular_clock_call();
+        if (has_aborted())
+          mr = MemRegion();
+        else {
+          mr = _cm->region_stack_pop();
+          // it returns MemRegion() if the pop fails
+          statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+        }
+      } else {
+        guarantee( has_aborted(), "currently the only way to do so" );
+
+        // The only way to abort the bitmap iteration is to return
+        // false from the do_bit() method. However, inside the
+        // do_bit() method we move the _region_finger to point to the
+        // object currently being looked at. So, if we bail out, we
+        // have definitely set _region_finger to something non-null.
+        guarantee( _region_finger != NULL, "invariant" );
+
+        // The iteration was actually aborted. So now _region_finger
+        // points to the address of the object we last scanned. If we
+        // leave it there, when we restart this task, we will rescan
+        // the object. It is easy to avoid this. We move the finger by
+        // enough to point to the next possible object header (the
+        // bitmap knows by how much we need to move it as it knows its
+        // granularity).
+        MemRegion newRegion =
+          MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
+
+        if (!newRegion.is_empty()) {
+          if (_cm->verbose_low()) {
+            gclog_or_tty->print_cr("[%d] pushing unscanned region"
+                                   "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
+                                   _task_id,
+                                   newRegion.start(), newRegion.end());
+          }
+          // Now push the part of the region we didn't scan on the
+          // region stack to make sure a task scans it later.
+          _cm->region_stack_push(newRegion);
+        }
+        // break from while
+        mr = MemRegion();
+      }
+      _region_finger = NULL;
+    }
+
+    // We only push regions on the region stack during evacuation
+    // pauses. So if we come out the above iteration because we region
+    // stack is empty, it will remain empty until the next yield
+    // point. So, the guarantee below is safe.
+    guarantee( has_aborted() || _cm->region_stack_empty(),
+               "only way to exit the loop" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
+                             _task_id, _cm->region_stack_size());
+  }
+}
+
+void CMTask::print_stats() {
+  gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
+                         _task_id, _calls);
+  gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
+                         _elapsed_time_ms, _termination_time_ms);
+  gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
+                         _step_times_ms.num(), _step_times_ms.avg(),
+                         _step_times_ms.sd());
+  gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
+                         _step_times_ms.maximum(), _step_times_ms.sum());
+
+#if _MARKING_STATS_
+  gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
+                         _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
+                         _all_clock_intervals_ms.sd());
+  gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
+                         _all_clock_intervals_ms.maximum(),
+                         _all_clock_intervals_ms.sum());
+  gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
+                         _clock_due_to_scanning, _clock_due_to_marking);
+  gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
+                         _objs_scanned, _objs_found_on_bitmap);
+  gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
+                         _local_pushes, _local_pops, _local_max_size);
+  gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
+                         _global_pushes, _global_pops, _global_max_size);
+  gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
+                         _global_transfers_to,_global_transfers_from);
+  gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
+                         _regions_claimed, _region_stack_pops);
+  gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
+  gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
+                         _steal_attempts, _steals);
+  gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
+  gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
+                         _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
+  gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
+                         _aborted_timed_out, _aborted_satb, _aborted_termination);
+#endif // _MARKING_STATS_
+}
+
+/*****************************************************************************
+
+    The do_marking_step(time_target_ms) method is the building block
+    of the parallel marking framework. It can be called in parallel
+    with other invocations of do_marking_step() on different tasks
+    (but only one per task, obviously) and concurrently with the
+    mutator threads, or during remark, hence it eliminates the need
+    for two versions of the code. When called during remark, it will
+    pick up from where the task left off during the concurrent marking
+    phase. Interestingly, tasks are also claimable during evacuation
+    pauses too, since do_marking_step() ensures that it aborts before
+    it needs to yield.
+
+    The data structures that is uses to do marking work are the
+    following:
+
+      (1) Marking Bitmap. If there are gray objects that appear only
+      on the bitmap (this happens either when dealing with an overflow
+      or when the initial marking phase has simply marked the roots
+      and didn't push them on the stack), then tasks claim heap
+      regions whose bitmap they then scan to find gray objects. A
+      global finger indicates where the end of the last claimed region
+      is. A local finger indicates how far into the region a task has
+      scanned. The two fingers are used to determine how to gray an
+      object (i.e. whether simply marking it is OK, as it will be
+      visited by a task in the future, or whether it needs to be also
+      pushed on a stack).
+
+      (2) Local Queue. The local queue of the task which is accessed
+      reasonably efficiently by the task. Other tasks can steal from
+      it when they run out of work. Throughout the marking phase, a
+      task attempts to keep its local queue short but not totally
+      empty, so that entries are available for stealing by other
+      tasks. Only when there is no more work, a task will totally
+      drain its local queue.
+
+      (3) Global Mark Stack. This handles local queue overflow. During
+      marking only sets of entries are moved between it and the local
+      queues, as access to it requires a mutex and more fine-grain
+      interaction with it which might cause contention. If it
+      overflows, then the marking phase should restart and iterate
+      over the bitmap to identify gray objects. Throughout the marking
+      phase, tasks attempt to keep the global mark stack at a small
+      length but not totally empty, so that entries are available for
+      popping by other tasks. Only when there is no more work, tasks
+      will totally drain the global mark stack.
+
+      (4) Global Region Stack. Entries on it correspond to areas of
+      the bitmap that need to be scanned since they contain gray
+      objects. Pushes on the region stack only happen during
+      evacuation pauses and typically correspond to areas covered by
+      GC LABS. If it overflows, then the marking phase should restart
+      and iterate over the bitmap to identify gray objects. Tasks will
+      try to totally drain the region stack as soon as possible.
+
+      (5) SATB Buffer Queue. This is where completed SATB buffers are
+      made available. Buffers are regularly removed from this queue
+      and scanned for roots, so that the queue doesn't get too
+      long. During remark, all completed buffers are processed, as
+      well as the filled in parts of any uncompleted buffers.
+
+    The do_marking_step() method tries to abort when the time target
+    has been reached. There are a few other cases when the
+    do_marking_step() method also aborts:
+
+      (1) When the marking phase has been aborted (after a Full GC).
+
+      (2) When a global overflow (either on the global stack or the
+      region stack) has been triggered. Before the task aborts, it
+      will actually sync up with the other tasks to ensure that all
+      the marking data structures (local queues, stacks, fingers etc.)
+      are re-initialised so that when do_marking_step() completes,
+      the marking phase can immediately restart.
+
+      (3) When enough completed SATB buffers are available. The
+      do_marking_step() method only tries to drain SATB buffers right
+      at the beginning. So, if enough buffers are available, the
+      marking step aborts and the SATB buffers are processed at
+      the beginning of the next invocation.
+
+      (4) To yield. when we have to yield then we abort and yield
+      right at the end of do_marking_step(). This saves us from a lot
+      of hassle as, by yielding we might allow a Full GC. If this
+      happens then objects will be compacted underneath our feet, the
+      heap might shrink, etc. We save checking for this by just
+      aborting and doing the yield right at the end.
+
+    From the above it follows that the do_marking_step() method should
+    be called in a loop (or, otherwise, regularly) until it completes.
+
+    If a marking step completes without its has_aborted() flag being
+    true, it means it has completed the current marking phase (and
+    also all other marking tasks have done so and have all synced up).
+
+    A method called regular_clock_call() is invoked "regularly" (in
+    sub ms intervals) throughout marking. It is this clock method that
+    checks all the abort conditions which were mentioned above and
+    decides when the task should abort. A work-based scheme is used to
+    trigger this clock method: when the number of object words the
+    marking phase has scanned or the number of references the marking
+    phase has visited reach a given limit. Additional invocations to
+    the method clock have been planted in a few other strategic places
+    too. The initial reason for the clock method was to avoid calling
+    vtime too regularly, as it is quite expensive. So, once it was in
+    place, it was natural to piggy-back all the other conditions on it
+    too and not constantly check them throughout the code.
+
+ *****************************************************************************/
+
+void CMTask::do_marking_step(double time_target_ms) {
+  guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
+  guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
+
+  guarantee( concurrent() || _cm->region_stack_empty(),
+             "the region stack should have been cleared before remark" );
+  guarantee( _region_finger == NULL,
+             "this should be non-null only when a region is being scanned" );
+
+  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
+  guarantee( _task_queues != NULL, "invariant" );
+  guarantee( _task_queue != NULL,  "invariant" );
+  guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
+
+  guarantee( !_claimed,
+             "only one thread should claim this task at any one time" );
+
+  // OK, this doesn't safeguard again all possible scenarios, as it is
+  // possible for two threads to set the _claimed flag at the same
+  // time. But it is only for debugging purposes anyway and it will
+  // catch most problems.
+  _claimed = true;
+
+  _start_time_ms = os::elapsedVTime() * 1000.0;
+  statsOnly( _interval_start_time_ms = _start_time_ms );
+
+  double diff_prediction_ms =
+    g1_policy->get_new_prediction(&_marking_step_diffs_ms);
+  _time_target_ms = time_target_ms - diff_prediction_ms;
+
+  // set up the variables that are used in the work-based scheme to
+  // call the regular clock method
+  _words_scanned = 0;
+  _refs_reached  = 0;
+  recalculate_limits();
+
+  // clear all flags
+  clear_has_aborted();
+  _has_aborted_timed_out = false;
+  _draining_satb_buffers = false;
+
+  ++_calls;
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
+                           "target = %1.2lfms >>>>>>>>>>",
+                           _task_id, _calls, _time_target_ms);
+
+  // Set up the bitmap and oop closures. Anything that uses them is
+  // eventually called from this method, so it is OK to allocate these
+  // statically.
+  CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
+  CMOopClosure    oop_closure(_g1h, _cm, this);
+  set_oop_closure(&oop_closure);
+
+  if (_cm->has_overflown()) {
+    // This can happen if the region stack or the mark stack overflows
+    // during a GC pause and this task, after a yield point,
+    // restarts. We have to abort as we need to get into the overflow
+    // protocol which happens right at the end of this task.
+    set_has_aborted();
+  }
+
+  // First drain any available SATB buffers. After this, we will not
+  // look at SATB buffers before the next invocation of this method.
+  // If enough completed SATB buffers are queued up, the regular clock
+  // will abort this task so that it restarts.
+  drain_satb_buffers();
+  // ...then partially drain the local queue and the global stack
+  drain_local_queue(true);
+  drain_global_stack(true);
+
+  // Then totally drain the region stack.  We will not look at
+  // it again before the next invocation of this method. Entries on
+  // the region stack are only added during evacuation pauses, for
+  // which we have to yield. When we do, we abort the task anyway so
+  // it will look at the region stack again when it restarts.
+  bitmap_closure.set_scanning_heap_region(false);
+  drain_region_stack(&bitmap_closure);
+  // ...then partially drain the local queue and the global stack
+  drain_local_queue(true);
+  drain_global_stack(true);
+
+  do {
+    if (!has_aborted() && _curr_region != NULL) {
+      // This means that we're already holding on to a region.
+      tmp_guarantee_CM( _finger != NULL,
+                        "if region is not NULL, then the finger "
+                        "should not be NULL either" );
+
+      // We might have restarted this task after an evacuation pause
+      // which might have evacuated the region we're holding on to
+      // underneath our feet. Let's read its limit again to make sure
+      // that we do not iterate over a region of the heap that
+      // contains garbage (update_region_limit() will also move
+      // _finger to the start of the region if it is found empty).
+      update_region_limit();
+      // We will start from _finger not from the start of the region,
+      // as we might be restarting this task after aborting half-way
+      // through scanning this region. In this case, _finger points to
+      // the address where we last found a marked object. If this is a
+      // fresh region, _finger points to start().
+      MemRegion mr = MemRegion(_finger, _region_limit);
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] we're scanning part "
+                               "["PTR_FORMAT", "PTR_FORMAT") "
+                               "of region "PTR_FORMAT,
+                               _task_id, _finger, _region_limit, _curr_region);
+
+      // Let's iterate over the bitmap of the part of the
+      // region that is left.
+      bitmap_closure.set_scanning_heap_region(true);
+      if (mr.is_empty() ||
+          _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
+        // We successfully completed iterating over the region. Now,
+        // let's give up the region.
+        giveup_current_region();
+        regular_clock_call();
+      } else {
+        guarantee( has_aborted(), "currently the only way to do so" );
+        // The only way to abort the bitmap iteration is to return
+        // false from the do_bit() method. However, inside the
+        // do_bit() method we move the _finger to point to the
+        // object currently being looked at. So, if we bail out, we
+        // have definitely set _finger to something non-null.
+        guarantee( _finger != NULL, "invariant" );
+
+        // Region iteration was actually aborted. So now _finger
+        // points to the address of the object we last scanned. If we
+        // leave it there, when we restart this task, we will rescan
+        // the object. It is easy to avoid this. We move the finger by
+        // enough to point to the next possible object header (the
+        // bitmap knows by how much we need to move it as it knows its
+        // granularity).
+        move_finger_to(_nextMarkBitMap->nextWord(_finger));
+      }
+    }
+    // At this point we have either completed iterating over the
+    // region we were holding on to, or we have aborted.
+
+    // We then partially drain the local queue and the global stack.
+    // (Do we really need this?)
+    drain_local_queue(true);
+    drain_global_stack(true);
+
+    // Read the note on the claim_region() method on why it might
+    // return NULL with potentially more regions available for
+    // claiming and why we have to check out_of_regions() to determine
+    // whether we're done or not.
+    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
+      // We are going to try to claim a new region. We should have
+      // given up on the previous one.
+      tmp_guarantee_CM( _curr_region  == NULL &&
+                        _finger       == NULL &&
+                        _region_limit == NULL, "invariant" );
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
+      HeapRegion* claimed_region = _cm->claim_region(_task_id);
+      if (claimed_region != NULL) {
+        // Yes, we managed to claim one
+        statsOnly( ++_regions_claimed );
+
+        if (_cm->verbose_low())
+          gclog_or_tty->print_cr("[%d] we successfully claimed "
+                                 "region "PTR_FORMAT,
+                                 _task_id, claimed_region);
+
+        setup_for_region(claimed_region);
+        tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
+      }
+      // It is important to call the regular clock here. It might take
+      // a while to claim a region if, for example, we hit a large
+      // block of empty regions. So we need to call the regular clock
+      // method once round the loop to make sure it's called
+      // frequently enough.
+      regular_clock_call();
+    }
+
+    if (!has_aborted() && _curr_region == NULL) {
+      tmp_guarantee_CM( _cm->out_of_regions(),
+                        "at this point we should be out of regions" );
+    }
+  } while ( _curr_region != NULL && !has_aborted());
+
+  if (!has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently. We also cannot
+    // check if the region stack is empty because if a thread is aborting
+    // it can push a partially done region back.
+    tmp_guarantee_CM( _cm->out_of_regions(),
+                      "at this point we should be out of regions" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
+
+    // Try to reduce the number of available SATB buffers so that
+    // remark has less work to do.
+    drain_satb_buffers();
+  }
+
+  // Since we've done everything else, we can now totally drain the
+  // local queue and global stack.
+  drain_local_queue(false);
+  drain_global_stack(false);
+
+  // Attempt at work stealing from other task's queues.
+  if (!has_aborted()) {
+    // We have not aborted. This means that we have finished all that
+    // we could. Let's try to do some stealing...
+
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently. We also cannot
+    // check if the region stack is empty because if a thread is aborting
+    // it can push a partially done region back.
+    guarantee( _cm->out_of_regions() &&
+               _task_queue->size() == 0, "only way to reach here" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
+
+    while (!has_aborted()) {
+      oop obj;
+      statsOnly( ++_steal_attempts );
+
+      if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
+        if (_cm->verbose_medium())
+          gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
+                                 _task_id, (void*) obj);
+
+        statsOnly( ++_steals );
+
+        tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
+                          "any stolen object should be marked" );
+        scan_object(obj);
+
+        // And since we're towards the end, let's totally drain the
+        // local queue and global stack.
+        drain_local_queue(false);
+        drain_global_stack(false);
+      } else {
+        break;
+      }
+    }
+  }
+
+  // We still haven't aborted. Now, let's try to get into the
+  // termination protocol.
+  if (!has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be concurrently pushing objects on it. We also cannot
+    // check if the region stack is empty because if a thread is aborting
+    // it can push a partially done region back.
+    guarantee( _cm->out_of_regions() &&
+               _task_queue->size() == 0, "only way to reach here" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
+
+    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
+    // The CMTask class also extends the TerminatorTerminator class,
+    // hence its should_exit_termination() method will also decide
+    // whether to exit the termination protocol or not.
+    bool finished = _cm->terminator()->offer_termination(this);
+    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
+    _termination_time_ms +=
+      termination_end_time_ms - _termination_start_time_ms;
+
+    if (finished) {
+      // We're all done.
+
+      if (_task_id == 0) {
+        // let's allow task 0 to do this
+        if (concurrent()) {
+          guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
+          // we need to set this to false before the next
+          // safepoint. This way we ensure that the marking phase
+          // doesn't observe any more heap expansions.
+          _cm->clear_concurrent_marking_in_progress();
+        }
+      }
+
+      // We can now guarantee that the global stack is empty, since
+      // all other tasks have finished.
+      guarantee( _cm->out_of_regions() &&
+                 _cm->region_stack_empty() &&
+                 _cm->mark_stack_empty() &&
+                 _task_queue->size() == 0 &&
+                 !_cm->has_overflown() &&
+                 !_cm->mark_stack_overflow() &&
+                 !_cm->region_stack_overflow(),
+                 "only way to reach here" );
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
+    } else {
+      // Apparently there's more work to do. Let's abort this task. It
+      // will restart it and we can hopefully find more things to do.
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
+
+      set_has_aborted();
+      statsOnly( ++_aborted_termination );
+    }
+  }
+
+  // Mainly for debugging purposes to make sure that a pointer to the
+  // closure which was statically allocated in this frame doesn't
+  // escape it by accident.
+  set_oop_closure(NULL);
+  double end_time_ms = os::elapsedVTime() * 1000.0;
+  double elapsed_time_ms = end_time_ms - _start_time_ms;
+  // Update the step history.
+  _step_times_ms.add(elapsed_time_ms);
+
+  if (has_aborted()) {
+    // The task was aborted for some reason.
+
+    statsOnly( ++_aborted );
+
+    if (_has_aborted_timed_out) {
+      double diff_ms = elapsed_time_ms - _time_target_ms;
+      // Keep statistics of how well we did with respect to hitting
+      // our target only if we actually timed out (if we aborted for
+      // other reasons, then the results might get skewed).
+      _marking_step_diffs_ms.add(diff_ms);
+    }
+
+    if (_cm->has_overflown()) {
+      // This is the interesting one. We aborted because a global
+      // overflow was raised. This means we have to restart the
+      // marking phase and start iterating over regions. However, in
+      // order to do this we have to make sure that all tasks stop
+      // what they are doing and re-initialise in a safe manner. We
+      // will achieve this with the use of two barrier sync points.
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
+
+      _cm->enter_first_sync_barrier(_task_id);
+      // When we exit this sync barrier we know that all tasks have
+      // stopped doing marking work. So, it's now safe to
+      // re-initialise our data structures. At the end of this method,
+      // task 0 will clear the global data structures.
+
+      statsOnly( ++_aborted_overflow );
+
+      // We clear the local state of this task...
+      clear_region_fields();
+
+      // ...and enter the second barrier.
+      _cm->enter_second_sync_barrier(_task_id);
+      // At this point everything has bee re-initialised and we're
+      // ready to restart.
+    }
+
+    if (_cm->verbose_low()) {
+      gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
+                             "elapsed = %1.2lfms <<<<<<<<<<",
+                             _task_id, _time_target_ms, elapsed_time_ms);
+      if (_cm->has_aborted())
+        gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
+                               _task_id);
+    }
+  } else {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
+                             "elapsed = %1.2lfms <<<<<<<<<<",
+                             _task_id, _time_target_ms, elapsed_time_ms);
+  }
+
+  _claimed = false;
+}
+
+CMTask::CMTask(int task_id,
+               ConcurrentMark* cm,
+               CMTaskQueue* task_queue,
+               CMTaskQueueSet* task_queues)
+  : _g1h(G1CollectedHeap::heap()),
+    _co_tracker(G1CMGroup),
+    _task_id(task_id), _cm(cm),
+    _claimed(false),
+    _nextMarkBitMap(NULL), _hash_seed(17),
+    _task_queue(task_queue),
+    _task_queues(task_queues),
+    _oop_closure(NULL) {
+  guarantee( task_queue != NULL, "invariant" );
+  guarantee( task_queues != NULL, "invariant" );
+
+  statsOnly( _clock_due_to_scanning = 0;
+             _clock_due_to_marking  = 0 );
+
+  _marking_step_diffs_ms.add(0.5);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,1049 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectedHeap;
+class CMTask;
+typedef GenericTaskQueue<oop> CMTaskQueue;
+typedef GenericTaskQueueSet<oop> CMTaskQueueSet;
+
+// A generic CM bit map.  This is essentially a wrapper around the BitMap
+// class, with one bit per (1<<_shifter) HeapWords.
+
+class CMBitMapRO {
+ protected:
+  HeapWord* _bmStartWord;      // base address of range covered by map
+  size_t    _bmWordSize;       // map size (in #HeapWords covered)
+  const int _shifter;          // map to char or bit
+  VirtualSpace _virtual_space; // underlying the bit map
+  BitMap    _bm;               // the bit map itself
+
+ public:
+  // constructor
+  CMBitMapRO(ReservedSpace rs, int shifter);
+
+  enum { do_yield = true };
+
+  // inquiries
+  HeapWord* startWord()   const { return _bmStartWord; }
+  size_t    sizeInWords() const { return _bmWordSize;  }
+  // the following is one past the last word in space
+  HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
+
+  // read marks
+
+  bool isMarked(HeapWord* addr) const {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.at(heapWordToOffset(addr));
+  }
+
+  // iteration
+  bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
+  bool iterate(BitMapClosure* cl, MemRegion mr);
+
+  // Return the address corresponding to the next marked bit at or after
+  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
+  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
+  HeapWord* getNextMarkedWordAddress(HeapWord* addr,
+                                     HeapWord* limit = NULL) const;
+  // Return the address corresponding to the next unmarked bit at or after
+  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
+  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
+  HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
+                                       HeapWord* limit = NULL) const;
+
+  // conversion utilities
+  // XXX Fix these so that offsets are size_t's...
+  HeapWord* offsetToHeapWord(size_t offset) const {
+    return _bmStartWord + (offset << _shifter);
+  }
+  size_t heapWordToOffset(HeapWord* addr) const {
+    return pointer_delta(addr, _bmStartWord) >> _shifter;
+  }
+  int heapWordDiffToOffsetDiff(size_t diff) const;
+  HeapWord* nextWord(HeapWord* addr) {
+    return offsetToHeapWord(heapWordToOffset(addr) + 1);
+  }
+
+  void mostly_disjoint_range_union(BitMap*   from_bitmap,
+                                   size_t    from_start_index,
+                                   HeapWord* to_start_word,
+                                   size_t    word_num);
+
+  // debugging
+  NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
+};
+
+class CMBitMap : public CMBitMapRO {
+
+ public:
+  // constructor
+  CMBitMap(ReservedSpace rs, int shifter) :
+    CMBitMapRO(rs, shifter) {}
+
+  // write marks
+  void mark(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    _bm.at_put(heapWordToOffset(addr), true);
+  }
+  void clear(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    _bm.at_put(heapWordToOffset(addr), false);
+  }
+  bool parMark(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.par_at_put(heapWordToOffset(addr), true);
+  }
+  bool parClear(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.par_at_put(heapWordToOffset(addr), false);
+  }
+  void markRange(MemRegion mr);
+  void clearAll();
+  void clearRange(MemRegion mr);
+
+  // Starting at the bit corresponding to "addr" (inclusive), find the next
+  // "1" bit, if any.  This bit starts some run of consecutive "1"'s; find
+  // the end of this run (stopping at "end_addr").  Return the MemRegion
+  // covering from the start of the region corresponding to the first bit
+  // of the run to the end of the region corresponding to the last bit of
+  // the run.  If there is no "1" bit at or after "addr", return an empty
+  // MemRegion.
+  MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
+};
+
+// Represents a marking stack used by the CM collector.
+// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
+class CMMarkStack {
+  ConcurrentMark* _cm;
+  oop*   _base;      // bottom of stack
+  jint   _index;     // one more than last occupied index
+  jint   _capacity;  // max #elements
+  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
+
+  bool   _overflow;
+  DEBUG_ONLY(bool _drain_in_progress;)
+  DEBUG_ONLY(bool _drain_in_progress_yields;)
+
+ public:
+  CMMarkStack(ConcurrentMark* cm);
+  ~CMMarkStack();
+
+  void allocate(size_t size);
+
+  oop pop() {
+    if (!isEmpty()) {
+      return _base[--_index] ;
+    }
+    return NULL;
+  }
+
+  // If overflow happens, don't do the push, and record the overflow.
+  // *Requires* that "ptr" is already marked.
+  void push(oop ptr) {
+    if (isFull()) {
+      // Record overflow.
+      _overflow = true;
+      return;
+    } else {
+      _base[_index++] = ptr;
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
+    }
+  }
+  // Non-block impl.  Note: concurrency is allowed only with other
+  // "par_push" operations, not with "pop" or "drain".  We would need
+  // parallel versions of them if such concurrency was desired.
+  void par_push(oop ptr);
+
+  // Pushes the first "n" elements of "ptr_arr" on the stack.
+  // Non-block impl.  Note: concurrency is allowed only with other
+  // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
+  void par_adjoin_arr(oop* ptr_arr, int n);
+
+  // Pushes the first "n" elements of "ptr_arr" on the stack.
+  // Locking impl: concurrency is allowed only with
+  // "par_push_arr" and/or "par_pop_arr" operations, which use the same
+  // locking strategy.
+  void par_push_arr(oop* ptr_arr, int n);
+
+  // If returns false, the array was empty.  Otherwise, removes up to "max"
+  // elements from the stack, and transfers them to "ptr_arr" in an
+  // unspecified order.  The actual number transferred is given in "n" ("n
+  // == 0" is deliberately redundant with the return value.)  Locking impl:
+  // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
+  // operations, which use the same locking strategy.
+  bool par_pop_arr(oop* ptr_arr, int max, int* n);
+
+  // Drain the mark stack, applying the given closure to all fields of
+  // objects on the stack.  (That is, continue until the stack is empty,
+  // even if closure applications add entries to the stack.)  The "bm"
+  // argument, if non-null, may be used to verify that only marked objects
+  // are on the mark stack.  If "yield_after" is "true", then the
+  // concurrent marker performing the drain offers to yield after
+  // processing each object.  If a yield occurs, stops the drain operation
+  // and returns false.  Otherwise, returns true.
+  template<class OopClosureClass>
+  bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
+
+  bool isEmpty()    { return _index == 0; }
+  bool isFull()     { return _index == _capacity; }
+  int maxElems()    { return _capacity; }
+
+  bool overflow() { return _overflow; }
+  void clear_overflow() { _overflow = false; }
+
+  int  size() { return _index; }
+
+  void setEmpty()   { _index = 0; clear_overflow(); }
+
+  // Record the current size; a subsequent "oops_do" will iterate only over
+  // indices valid at the time of this call.
+  void set_oops_do_bound(jint bound = -1) {
+    if (bound == -1) {
+      _oops_do_bound = _index;
+    } else {
+      _oops_do_bound = bound;
+    }
+  }
+  jint oops_do_bound() { return _oops_do_bound; }
+  // iterate over the oops in the mark stack, up to the bound recorded via
+  // the call above.
+  void oops_do(OopClosure* f);
+};
+
+class CMRegionStack {
+  MemRegion* _base;
+  jint _capacity;
+  jint _index;
+  jint _oops_do_bound;
+  bool _overflow;
+public:
+  CMRegionStack();
+  ~CMRegionStack();
+  void allocate(size_t size);
+
+  // This is lock-free; assumes that it will only be called in parallel
+  // with other "push" operations (no pops).
+  void push(MemRegion mr);
+
+  // Lock-free; assumes that it will only be called in parallel
+  // with other "pop" operations (no pushes).
+  MemRegion pop();
+
+  bool isEmpty()    { return _index == 0; }
+  bool isFull()     { return _index == _capacity; }
+
+  bool overflow() { return _overflow; }
+  void clear_overflow() { _overflow = false; }
+
+  int  size() { return _index; }
+
+  // It iterates over the entries in the region stack and it
+  // invalidates (i.e. assigns MemRegion()) the ones that point to
+  // regions in the collection set.
+  bool invalidate_entries_into_cset();
+
+  // This gives an upper bound up to which the iteration in
+  // invalidate_entries_into_cset() will reach. This prevents
+  // newly-added entries to be unnecessarily scanned.
+  void set_oops_do_bound() {
+    _oops_do_bound = _index;
+  }
+
+  void setEmpty()   { _index = 0; clear_overflow(); }
+};
+
+// this will enable a variety of different statistics per GC task
+#define _MARKING_STATS_       0
+// this will enable the higher verbose levels
+#define _MARKING_VERBOSE_     0
+
+#if _MARKING_STATS_
+#define statsOnly(statement)  \
+do {                          \
+  statement ;                 \
+} while (0)
+#else // _MARKING_STATS_
+#define statsOnly(statement)  \
+do {                          \
+} while (0)
+#endif // _MARKING_STATS_
+
+// Some extra guarantees that I like to also enable in optimised mode
+// when debugging. If you want to enable them, comment out the assert
+// macro and uncomment out the guaratee macro
+// #define tmp_guarantee_CM(expr, str) guarantee(expr, str)
+#define tmp_guarantee_CM(expr, str) assert(expr, str)
+
+typedef enum {
+  no_verbose  = 0,   // verbose turned off
+  stats_verbose,     // only prints stats at the end of marking
+  low_verbose,       // low verbose, mostly per region and per major event
+  medium_verbose,    // a bit more detailed than low
+  high_verbose       // per object verbose
+} CMVerboseLevel;
+
+
+class ConcurrentMarkThread;
+
+class ConcurrentMark {
+  friend class ConcurrentMarkThread;
+  friend class CMTask;
+  friend class CMBitMapClosure;
+  friend class CSMarkOopClosure;
+  friend class CMGlobalObjectClosure;
+  friend class CMRemarkTask;
+  friend class CMConcurrentMarkingTask;
+  friend class G1ParNoteEndTask;
+  friend class CalcLiveObjectsClosure;
+
+protected:
+  ConcurrentMarkThread* _cmThread;   // the thread doing the work
+  G1CollectedHeap*      _g1h;        // the heap.
+  size_t                _parallel_marking_threads; // the number of marking
+                                                   // threads we'll use
+  double                _sleep_factor; // how much we have to sleep, with
+                                       // respect to the work we just did, to
+                                       // meet the marking overhead goal
+  double                _marking_task_overhead; // marking target overhead for
+                                                // a single task
+
+  // same as the two above, but for the cleanup task
+  double                _cleanup_sleep_factor;
+  double                _cleanup_task_overhead;
+
+  // Stuff related to age cohort processing.
+  struct ParCleanupThreadState {
+    char _pre[64];
+    UncleanRegionList list;
+    char _post[64];
+  };
+  ParCleanupThreadState** _par_cleanup_thread_state;
+
+  // CMS marking support structures
+  CMBitMap                _markBitMap1;
+  CMBitMap                _markBitMap2;
+  CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
+  CMBitMap*               _nextMarkBitMap; // under-construction mark bitmap
+  bool                    _at_least_one_mark_complete;
+
+  BitMap                  _region_bm;
+  BitMap                  _card_bm;
+
+  // Heap bounds
+  HeapWord*               _heap_start;
+  HeapWord*               _heap_end;
+
+  // For gray objects
+  CMMarkStack             _markStack; // Grey objects behind global finger.
+  CMRegionStack           _regionStack; // Grey regions behind global finger.
+  HeapWord* volatile      _finger;  // the global finger, region aligned,
+                                    // always points to the end of the
+                                    // last claimed region
+
+  // marking tasks
+  size_t                  _max_task_num; // maximum task number
+  size_t                  _active_tasks; // task num currently active
+  CMTask**                _tasks;        // task queue array (max_task_num len)
+  CMTaskQueueSet*         _task_queues;  // task queue set
+  ParallelTaskTerminator  _terminator;   // for termination
+
+  // Two sync barriers that are used to synchronise tasks when an
+  // overflow occurs. The algorithm is the following. All tasks enter
+  // the first one to ensure that they have all stopped manipulating
+  // the global data structures. After they exit it, they re-initialise
+  // their data structures and task 0 re-initialises the global data
+  // structures. Then, they enter the second sync barrier. This
+  // ensure, that no task starts doing work before all data
+  // structures (local and global) have been re-initialised. When they
+  // exit it, they are free to start working again.
+  WorkGangBarrierSync     _first_overflow_barrier_sync;
+  WorkGangBarrierSync     _second_overflow_barrier_sync;
+
+
+  // this is set by any task, when an overflow on the global data
+  // structures is detected.
+  volatile bool           _has_overflown;
+  // true: marking is concurrent, false: we're in remark
+  volatile bool           _concurrent;
+  // set at the end of a Full GC so that marking aborts
+  volatile bool           _has_aborted;
+  // used when remark aborts due to an overflow to indicate that
+  // another concurrent marking phase should start
+  volatile bool           _restart_for_overflow;
+
+  // This is true from the very start of concurrent marking until the
+  // point when all the tasks complete their work. It is really used
+  // to determine the points between the end of concurrent marking and
+  // time of remark.
+  volatile bool           _concurrent_marking_in_progress;
+
+  // verbose level
+  CMVerboseLevel          _verbose_level;
+
+  COTracker               _cleanup_co_tracker;
+
+  // These two fields are used to implement the optimisation that
+  // avoids pushing objects on the global/region stack if there are
+  // no collection set regions above the lowest finger.
+
+  // This is the lowest finger (among the global and local fingers),
+  // which is calculated before a new collection set is chosen.
+  HeapWord* _min_finger;
+  // If this flag is true, objects/regions that are marked below the
+  // finger should be pushed on the stack(s). If this is flag is
+  // false, it is safe not to push them on the stack(s).
+  bool      _should_gray_objects;
+
+  // All of these times are in ms.
+  NumberSeq _init_times;
+  NumberSeq _remark_times;
+  NumberSeq   _remark_mark_times;
+  NumberSeq   _remark_weak_ref_times;
+  NumberSeq _cleanup_times;
+  double    _total_counting_time;
+  double    _total_rs_scrub_time;
+
+  double*   _accum_task_vtime;   // accumulated task vtime
+
+  WorkGang* _parallel_workers;
+
+  void weakRefsWork(bool clear_all_soft_refs);
+
+  void swapMarkBitMaps();
+
+  // It resets the global marking data structures, as well as the
+  // task local ones; should be called during initial mark.
+  void reset();
+  // It resets all the marking data structures.
+  void clear_marking_state();
+
+  // It should be called to indicate which phase we're in (concurrent
+  // mark or remark) and how many threads are currently active.
+  void set_phase(size_t active_tasks, bool concurrent);
+  // We do this after we're done with marking so that the marking data
+  // structures are initialised to a sensible and predictable state.
+  void set_non_marking_state();
+
+  // prints all gathered CM-related statistics
+  void print_stats();
+
+  // accessor methods
+  size_t parallel_marking_threads() { return _parallel_marking_threads; }
+  double sleep_factor()             { return _sleep_factor; }
+  double marking_task_overhead()    { return _marking_task_overhead;}
+  double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
+  double cleanup_task_overhead()    { return _cleanup_task_overhead;}
+
+  HeapWord*               finger()        { return _finger;   }
+  bool                    concurrent()    { return _concurrent; }
+  size_t                  active_tasks()  { return _active_tasks; }
+  ParallelTaskTerminator* terminator()    { return &_terminator; }
+
+  // It claims the next available region to be scanned by a marking
+  // task. It might return NULL if the next region is empty or we have
+  // run out of regions. In the latter case, out_of_regions()
+  // determines whether we've really run out of regions or the task
+  // should call claim_region() again.  This might seem a bit
+  // awkward. Originally, the code was written so that claim_region()
+  // either successfully returned with a non-empty region or there
+  // were no more regions to be claimed. The problem with this was
+  // that, in certain circumstances, it iterated over large chunks of
+  // the heap finding only empty regions and, while it was working, it
+  // was preventing the calling task to call its regular clock
+  // method. So, this way, each task will spend very little time in
+  // claim_region() and is allowed to call the regular clock method
+  // frequently.
+  HeapRegion* claim_region(int task);
+
+  // It determines whether we've run out of regions to scan.
+  bool        out_of_regions() { return _finger == _heap_end; }
+
+  // Returns the task with the given id
+  CMTask* task(int id) {
+    guarantee( 0 <= id && id < (int) _active_tasks, "task id not within "
+               "active bounds" );
+    return _tasks[id];
+  }
+
+  // Returns the task queue with the given id
+  CMTaskQueue* task_queue(int id) {
+    guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within "
+               "active bounds" );
+    return (CMTaskQueue*) _task_queues->queue(id);
+  }
+
+  // Returns the task queue set
+  CMTaskQueueSet* task_queues()  { return _task_queues; }
+
+  // Access / manipulation of the overflow flag which is set to
+  // indicate that the global stack or region stack has overflown
+  bool has_overflown()           { return _has_overflown; }
+  void set_has_overflown()       { _has_overflown = true; }
+  void clear_has_overflown()     { _has_overflown = false; }
+
+  bool has_aborted()             { return _has_aborted; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
+
+  // Methods to enter the two overflow sync barriers
+  void enter_first_sync_barrier(int task_num);
+  void enter_second_sync_barrier(int task_num);
+
+public:
+  // Manipulation of the global mark stack.
+  // Notice that the first mark_stack_push is CAS-based, whereas the
+  // two below are Mutex-based. This is OK since the first one is only
+  // called during evacuation pauses and doesn't compete with the
+  // other two (which are called by the marking tasks during
+  // concurrent marking or remark).
+  bool mark_stack_push(oop p) {
+    _markStack.par_push(p);
+    if (_markStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  bool mark_stack_push(oop* arr, int n) {
+    _markStack.par_push_arr(arr, n);
+    if (_markStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  void mark_stack_pop(oop* arr, int max, int* n) {
+    _markStack.par_pop_arr(arr, max, n);
+  }
+  size_t mark_stack_size()              { return _markStack.size(); }
+  size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
+  bool mark_stack_overflow()            { return _markStack.overflow(); }
+  bool mark_stack_empty()               { return _markStack.isEmpty(); }
+
+  // Manipulation of the region stack
+  bool region_stack_push(MemRegion mr) {
+    _regionStack.push(mr);
+    if (_regionStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  MemRegion region_stack_pop()          { return _regionStack.pop(); }
+  int region_stack_size()               { return _regionStack.size(); }
+  bool region_stack_overflow()          { return _regionStack.overflow(); }
+  bool region_stack_empty()             { return _regionStack.isEmpty(); }
+
+  bool concurrent_marking_in_progress() {
+    return _concurrent_marking_in_progress;
+  }
+  void set_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = true;
+  }
+  void clear_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = false;
+  }
+
+  void update_accum_task_vtime(int i, double vtime) {
+    _accum_task_vtime[i] += vtime;
+  }
+
+  double all_task_accum_vtime() {
+    double ret = 0.0;
+    for (int i = 0; i < (int)_max_task_num; ++i)
+      ret += _accum_task_vtime[i];
+    return ret;
+  }
+
+  // Attempts to steal an object from the task queues of other tasks
+  bool try_stealing(int task_num, int* hash_seed, oop& obj) {
+    return _task_queues->steal(task_num, hash_seed, obj);
+  }
+
+  // It grays an object by first marking it. Then, if it's behind the
+  // global finger, it also pushes it on the global stack.
+  void deal_with_reference(oop obj);
+
+  ConcurrentMark(ReservedSpace rs, int max_regions);
+  ~ConcurrentMark();
+  ConcurrentMarkThread* cmThread() { return _cmThread; }
+
+  CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
+  CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
+
+  // The following three are interaction between CM and
+  // G1CollectedHeap
+
+  // This notifies CM that a root during initial-mark needs to be
+  // grayed and it's MT-safe. Currently, we just mark it. But, in the
+  // future, we can experiment with pushing it on the stack and we can
+  // do this without changing G1CollectedHeap.
+  void grayRoot(oop p);
+  // It's used during evacuation pauses to gray a region, if
+  // necessary, and it's MT-safe. It assumes that the caller has
+  // marked any objects on that region. If _should_gray_objects is
+  // true and we're still doing concurrent marking, the region is
+  // pushed on the region stack, if it is located below the global
+  // finger, otherwise we do nothing.
+  void grayRegionIfNecessary(MemRegion mr);
+  // It's used during evacuation pauses to mark and, if necessary,
+  // gray a single object and it's MT-safe. It assumes the caller did
+  // not mark the object. If _should_gray_objects is true and we're
+  // still doing concurrent marking, the objects is pushed on the
+  // global stack, if it is located below the global finger, otherwise
+  // we do nothing.
+  void markAndGrayObjectIfNecessary(oop p);
+
+  // This iterates over the bitmap of the previous marking and prints
+  // out all objects that are marked on the bitmap and indicates
+  // whether what they point to is also marked or not.
+  void print_prev_bitmap_reachable();
+
+  // Clear the next marking bitmap (will be called concurrently).
+  void clearNextBitmap();
+
+  // main CMS steps and related support
+  void checkpointRootsInitial();
+
+  // These two do the work that needs to be done before and after the
+  // initial root checkpoint. Since this checkpoint can be done at two
+  // different points (i.e. an explicit pause or piggy-backed on a
+  // young collection), then it's nice to be able to easily share the
+  // pre/post code. It might be the case that we can put everything in
+  // the post method. TP
+  void checkpointRootsInitialPre();
+  void checkpointRootsInitialPost();
+
+  // Do concurrent phase of marking, to a tentative transitive closure.
+  void markFromRoots();
+
+  // Process all unprocessed SATB buffers. It is called at the
+  // beginning of an evacuation pause.
+  void drainAllSATBBuffers();
+
+  void checkpointRootsFinal(bool clear_all_soft_refs);
+  void checkpointRootsFinalWork();
+  void calcDesiredRegions();
+  void cleanup();
+  void completeCleanup();
+
+  // Mark in the previous bitmap.  NB: this is usually read-only, so use
+  // this carefully!
+  void markPrev(oop p);
+  void clear(oop p);
+  // Clears marks for all objects in the given range, for both prev and
+  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
+  // this carefully!
+  void clearRangeBothMaps(MemRegion mr);
+
+  // Record the current top of the mark and region stacks; a
+  // subsequent oops_do() on the mark stack and
+  // invalidate_entries_into_cset() on the region stack will iterate
+  // only over indices valid at the time of this call.
+  void set_oops_do_bound() {
+    _markStack.set_oops_do_bound();
+    _regionStack.set_oops_do_bound();
+  }
+  // Iterate over the oops in the mark stack and all local queues. It
+  // also calls invalidate_entries_into_cset() on the region stack.
+  void oops_do(OopClosure* f);
+  // It is called at the end of an evacuation pause during marking so
+  // that CM is notified of where the new end of the heap is. It
+  // doesn't do anything if concurrent_marking_in_progress() is false,
+  // unless the force parameter is true.
+  void update_g1_committed(bool force = false);
+
+  void complete_marking_in_collection_set();
+
+  // It indicates that a new collection set is being chosen.
+  void newCSet();
+  // It registers a collection set heap region with CM. This is used
+  // to determine whether any heap regions are located above the finger.
+  void registerCSetRegion(HeapRegion* hr);
+
+  // Returns "true" if at least one mark has been completed.
+  bool at_least_one_mark_complete() { return _at_least_one_mark_complete; }
+
+  bool isMarked(oop p) const {
+    assert(p != NULL && p->is_oop(), "expected an oop");
+    HeapWord* addr = (HeapWord*)p;
+    assert(addr >= _nextMarkBitMap->startWord() ||
+           addr < _nextMarkBitMap->endWord(), "in a region");
+
+    return _nextMarkBitMap->isMarked(addr);
+  }
+
+  inline bool not_yet_marked(oop p) const;
+
+  // XXX Debug code
+  bool containing_card_is_marked(void* p);
+  bool containing_cards_are_marked(void* start, void* last);
+
+  bool isPrevMarked(oop p) const {
+    assert(p != NULL && p->is_oop(), "expected an oop");
+    HeapWord* addr = (HeapWord*)p;
+    assert(addr >= _prevMarkBitMap->startWord() ||
+           addr < _prevMarkBitMap->endWord(), "in a region");
+
+    return _prevMarkBitMap->isMarked(addr);
+  }
+
+  inline bool do_yield_check(int worker_i = 0);
+  inline bool should_yield();
+
+  // Called to abort the marking cycle after a Full GC takes palce.
+  void abort();
+
+  void disable_co_trackers();
+
+  // This prints the global/local fingers. It is used for debugging.
+  NOT_PRODUCT(void print_finger();)
+
+  void print_summary_info();
+
+  // The following indicate whether a given verbose level has been
+  // set. Notice that anything above stats is conditional to
+  // _MARKING_VERBOSE_ having been set to 1
+  bool verbose_stats()
+    { return _verbose_level >= stats_verbose; }
+  bool verbose_low()
+    { return _MARKING_VERBOSE_ && _verbose_level >= low_verbose; }
+  bool verbose_medium()
+    { return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose; }
+  bool verbose_high()
+    { return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; }
+};
+
+// A class representing a marking task.
+class CMTask : public TerminatorTerminator {
+private:
+  enum PrivateConstants {
+    // the regular clock call is called once the scanned words reaches
+    // this limit
+    words_scanned_period          = 12*1024,
+    // the regular clock call is called once the number of visited
+    // references reaches this limit
+    refs_reached_period           = 384,
+    // initial value for the hash seed, used in the work stealing code
+    init_hash_seed                = 17,
+    // how many entries will be transferred between global stack and
+    // local queues
+    global_stack_transfer_size    = 16
+  };
+
+  int                         _task_id;
+  G1CollectedHeap*            _g1h;
+  ConcurrentMark*             _cm;
+  CMBitMap*                   _nextMarkBitMap;
+  // the task queue of this task
+  CMTaskQueue*                _task_queue;
+  // the task queue set---needed for stealing
+  CMTaskQueueSet*             _task_queues;
+  // indicates whether the task has been claimed---this is only  for
+  // debugging purposes
+  bool                        _claimed;
+
+  // number of calls to this task
+  int                         _calls;
+
+  // concurrent overhead over a single CPU for this task
+  COTracker                   _co_tracker;
+
+  // when the virtual timer reaches this time, the marking step should
+  // exit
+  double                      _time_target_ms;
+  // the start time of the current marking step
+  double                      _start_time_ms;
+
+  // the oop closure used for iterations over oops
+  OopClosure*                 _oop_closure;
+
+  // the region this task is scanning, NULL if we're not scanning any
+  HeapRegion*                 _curr_region;
+  // the local finger of this task, NULL if we're not scanning a region
+  HeapWord*                   _finger;
+  // limit of the region this task is scanning, NULL if we're not scanning one
+  HeapWord*                   _region_limit;
+
+  // This is used only when we scan regions popped from the region
+  // stack. It records what the last object on such a region we
+  // scanned was. It is used to ensure that, if we abort region
+  // iteration, we do not rescan the first part of the region. This
+  // should be NULL when we're not scanning a region from the region
+  // stack.
+  HeapWord*                   _region_finger;
+
+  // the number of words this task has scanned
+  size_t                      _words_scanned;
+  // When _words_scanned reaches this limit, the regular clock is
+  // called. Notice that this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _words_scanned_limit;
+  // the initial value of _words_scanned_limit (i.e. what it was
+  // before it was decreased).
+  size_t                      _real_words_scanned_limit;
+
+  // the number of references this task has visited
+  size_t                      _refs_reached;
+  // When _refs_reached reaches this limit, the regular clock is
+  // called. Notice this this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _refs_reached_limit;
+  // the initial value of _refs_reached_limit (i.e. what it was before
+  // it was decreased).
+  size_t                      _real_refs_reached_limit;
+
+  // used by the work stealing stuff
+  int                         _hash_seed;
+  // if this is true, then the task has aborted for some reason
+  bool                        _has_aborted;
+  // set when the task aborts because it has met its time quota
+  bool                        _has_aborted_timed_out;
+  // true when we're draining SATB buffers; this avoids the task
+  // aborting due to SATB buffers being available (as we're already
+  // dealing with them)
+  bool                        _draining_satb_buffers;
+
+  // number sequence of past step times
+  NumberSeq                   _step_times_ms;
+  // elapsed time of this task
+  double                      _elapsed_time_ms;
+  // termination time of this task
+  double                      _termination_time_ms;
+  // when this task got into the termination protocol
+  double                      _termination_start_time_ms;
+
+  // true when the task is during a concurrent phase, false when it is
+  // in the remark phase (so, in the latter case, we do not have to
+  // check all the things that we have to check during the concurrent
+  // phase, i.e. SATB buffer availability...)
+  bool                        _concurrent;
+
+  TruncatedSeq                _marking_step_diffs_ms;
+
+  // LOTS of statistics related with this task
+#if _MARKING_STATS_
+  NumberSeq                   _all_clock_intervals_ms;
+  double                      _interval_start_time_ms;
+
+  int                         _aborted;
+  int                         _aborted_overflow;
+  int                         _aborted_cm_aborted;
+  int                         _aborted_yield;
+  int                         _aborted_timed_out;
+  int                         _aborted_satb;
+  int                         _aborted_termination;
+
+  int                         _steal_attempts;
+  int                         _steals;
+
+  int                         _clock_due_to_marking;
+  int                         _clock_due_to_scanning;
+
+  int                         _local_pushes;
+  int                         _local_pops;
+  int                         _local_max_size;
+  int                         _objs_scanned;
+
+  int                         _global_pushes;
+  int                         _global_pops;
+  int                         _global_max_size;
+
+  int                         _global_transfers_to;
+  int                         _global_transfers_from;
+
+  int                         _region_stack_pops;
+
+  int                         _regions_claimed;
+  int                         _objs_found_on_bitmap;
+
+  int                         _satb_buffers_processed;
+#endif // _MARKING_STATS_
+
+  // it updates the local fields after this task has claimed
+  // a new region to scan
+  void setup_for_region(HeapRegion* hr);
+  // it brings up-to-date the limit of the region
+  void update_region_limit();
+  // it resets the local fields after a task has finished scanning a
+  // region
+  void giveup_current_region();
+
+  // called when either the words scanned or the refs visited limit
+  // has been reached
+  void reached_limit();
+  // recalculates the words scanned and refs visited limits
+  void recalculate_limits();
+  // decreases the words scanned and refs visited limits when we reach
+  // an expensive operation
+  void decrease_limits();
+  // it checks whether the words scanned or refs visited reached their
+  // respective limit and calls reached_limit() if they have
+  void check_limits() {
+    if (_words_scanned >= _words_scanned_limit ||
+        _refs_reached >= _refs_reached_limit)
+      reached_limit();
+  }
+  // this is supposed to be called regularly during a marking step as
+  // it checks a bunch of conditions that might cause the marking step
+  // to abort
+  void regular_clock_call();
+  bool concurrent() { return _concurrent; }
+
+public:
+  // It resets the task; it should be called right at the beginning of
+  // a marking phase.
+  void reset(CMBitMap* _nextMarkBitMap);
+  // it clears all the fields that correspond to a claimed region.
+  void clear_region_fields();
+
+  void set_concurrent(bool concurrent) { _concurrent = concurrent; }
+
+  void enable_co_tracker() {
+    guarantee( !_co_tracker.enabled(), "invariant" );
+    _co_tracker.enable();
+  }
+  void disable_co_tracker() {
+    guarantee( _co_tracker.enabled(), "invariant" );
+    _co_tracker.disable();
+  }
+  bool co_tracker_enabled() {
+    return _co_tracker.enabled();
+  }
+  void reset_co_tracker(double starting_conc_overhead = 0.0) {
+    _co_tracker.reset(starting_conc_overhead);
+  }
+  void start_co_tracker() {
+    _co_tracker.start();
+  }
+  void update_co_tracker(bool force_end = false) {
+    _co_tracker.update(force_end);
+  }
+
+  // The main method of this class which performs a marking step
+  // trying not to exceed the given duration. However, it might exit
+  // prematurely, according to some conditions (i.e. SATB buffers are
+  // available for processing).
+  void do_marking_step(double target_ms);
+
+  // These two calls start and stop the timer
+  void record_start_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0;
+  }
+  void record_end_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
+  }
+
+  // returns the task ID
+  int task_id() { return _task_id; }
+
+  // From TerminatorTerminator. It determines whether this task should
+  // exit the termination protocol after it's entered it.
+  virtual bool should_exit_termination();
+
+  HeapWord* finger()            { return _finger; }
+
+  bool has_aborted()            { return _has_aborted; }
+  void set_has_aborted()        { _has_aborted = true; }
+  void clear_has_aborted()      { _has_aborted = false; }
+  bool claimed() { return _claimed; }
+
+  void set_oop_closure(OopClosure* oop_closure) {
+    _oop_closure = oop_closure;
+  }
+
+  // It grays the object by marking it and, if necessary, pushing it
+  // on the local queue
+  void deal_with_reference(oop obj);
+
+  // It scans an object and visits its children.
+  void scan_object(oop obj) {
+    tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
+                      "invariant" );
+
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
+                             _task_id, (void*) obj);
+
+    size_t obj_size = obj->size();
+    _words_scanned += obj_size;
+
+    obj->oop_iterate(_oop_closure);
+    statsOnly( ++_objs_scanned );
+    check_limits();
+  }
+
+  // It pushes an object on the local queue.
+  void push(oop obj);
+
+  // These two move entries to/from the global stack.
+  void move_entries_to_global_stack();
+  void get_entries_from_global_stack();
+
+  // It pops and scans objects from the local queue. If partially is
+  // true, then it stops when the queue size is of a given limit. If
+  // partially is false, then it stops when the queue is empty.
+  void drain_local_queue(bool partially);
+  // It moves entries from the global stack to the local queue and
+  // drains the local queue. If partially is true, then it stops when
+  // both the global stack and the local queue reach a given size. If
+  // partially if false, it tries to empty them totally.
+  void drain_global_stack(bool partially);
+  // It keeps picking SATB buffers and processing them until no SATB
+  // buffers are available.
+  void drain_satb_buffers();
+  // It keeps popping regions from the region stack and processing
+  // them until the region stack is empty.
+  void drain_region_stack(BitMapClosure* closure);
+
+  // moves the local finger to a new location
+  inline void move_finger_to(HeapWord* new_finger) {
+    tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit,
+                   "invariant" );
+    _finger = new_finger;
+  }
+
+  // moves the region finger to a new location
+  inline void move_region_finger_to(HeapWord* new_finger) {
+    tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" );
+    _region_finger = new_finger;
+  }
+
+  CMTask(int task_num, ConcurrentMark *cm,
+         CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
+
+  // it prints statistics associated with this task
+  void print_stats();
+
+#if _MARKING_STATS_
+  void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
+#endif // _MARKING_STATS_
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentMarkThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+SurrogateLockerThread*
+     ConcurrentMarkThread::_slt = NULL;
+
+ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) :
+  ConcurrentGCThread(),
+  _cm(cm),
+  _started(false),
+  _in_progress(false),
+  _vtime_accum(0.0),
+  _vtime_mark_accum(0.0),
+  _vtime_count_accum(0.0)
+{
+  create_and_start();
+}
+
+class CMCheckpointRootsInitialClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsInitialClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsInitial();
+  }
+};
+
+class CMCheckpointRootsFinalClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsFinalClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsFinal(false); // !clear_all_soft_refs
+  }
+};
+
+class CMCleanUp: public VoidClosure {
+  ConcurrentMark* _cm;
+public:
+
+  CMCleanUp(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->cleanup();
+  }
+};
+
+
+
+void ConcurrentMarkThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1_policy = g1->g1_policy();
+  G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker();
+  Thread *current_thread = Thread::current();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    sleepBeforeNextCycle();
+    {
+      ResourceMark rm;
+      HandleMark   hm;
+      double cycle_start = os::elapsedVTime();
+      double mark_start_sec = os::elapsedTime();
+      char verbose_str[128];
+
+      if (PrintGC) {
+        gclog_or_tty->date_stamp(PrintGCDateStamps);
+        gclog_or_tty->stamp(PrintGCTimeStamps);
+        tty->print_cr("[GC concurrent-mark-start]");
+      }
+
+      if (!g1_policy->in_young_gc_mode()) {
+        // this ensures the flag is not set if we bail out of the marking
+        // cycle; normally the flag is cleared immediately after cleanup
+        g1->set_marking_complete();
+
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double init_prediction_ms = g1_policy->predict_init_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        // We don't have to skip here if we've been asked to restart, because
+        // in the worst case we just enqueue a new VM operation to start a
+        // marking.  Note that the init operation resets has_aborted()
+        CMCheckpointRootsInitialClosure init_cl(_cm);
+        strcpy(verbose_str, "GC initial-mark");
+        VM_CGC_Operation op(&init_cl, verbose_str);
+        VMThread::execute(&op);
+      }
+
+      int iter = 0;
+      do {
+        iter++;
+        if (!cm()->has_aborted()) {
+          _cm->markFromRoots();
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-mark-from-roots");
+        }
+
+        double mark_end_time = os::elapsedVTime();
+        double mark_end_sec = os::elapsedTime();
+        _vtime_mark_accum += (mark_end_time - cycle_start);
+        if (!cm()->has_aborted()) {
+          if (g1_policy->adaptive_young_list_length()) {
+            double now = os::elapsedTime();
+            double remark_prediction_ms = g1_policy->predict_remark_time_ms();
+            jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms);
+            os::sleep(current_thread, sleep_time_ms, false);
+          }
+
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
+                                      mark_end_sec - mark_start_sec);
+          }
+
+          CMCheckpointRootsFinalClosure final_cl(_cm);
+          sprintf(verbose_str, "GC remark");
+          VM_CGC_Operation op(&final_cl, verbose_str);
+          VMThread::execute(&op);
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-remark");
+        }
+        if (cm()->restart_for_overflow() &&
+            G1TraceMarkStackOverflow) {
+          gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
+                                 "in remark (restart #%d).", iter);
+        }
+
+        if (cm()->restart_for_overflow()) {
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
+          }
+        }
+      } while (cm()->restart_for_overflow());
+      double counting_start_time = os::elapsedVTime();
+
+      // YSR: These look dubious (i.e. redundant) !!! FIX ME
+      slt()->manipulatePLL(SurrogateLockerThread::acquirePLL);
+      slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);
+
+      if (!cm()->has_aborted()) {
+        double count_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-count-start]");
+        }
+
+        _sts.join();
+        _cm->calcDesiredRegions();
+        _sts.leave();
+
+        if (!cm()->has_aborted()) {
+          double count_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
+                                   count_end_sec - count_start_sec);
+          }
+        }
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-end-game");
+      }
+      double end_time = os::elapsedVTime();
+      _vtime_count_accum += (end_time - counting_start_time);
+      // Update the total virtual time before doing this, since it will try
+      // to measure it to get the vtime for this marking.  We purposely
+      // neglect the presumably-short "completeCleanup" phase here.
+      _vtime_accum = (end_time - _vtime_start);
+      if (!cm()->has_aborted()) {
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        CMCleanUp cl_cl(_cm);
+        sprintf(verbose_str, "GC cleanup");
+        VM_CGC_Operation op(&cl_cl, verbose_str);
+        VMThread::execute(&op);
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-cleanup");
+        G1CollectedHeap::heap()->set_marking_complete();
+      }
+
+      if (!cm()->has_aborted()) {
+        double cleanup_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
+        }
+
+        // Now do the remainder of the cleanup operation.
+        _sts.join();
+        _cm->completeCleanup();
+        if (!cm()->has_aborted()) {
+          g1_policy->record_concurrent_mark_cleanup_completed();
+
+          double cleanup_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
+                                   cleanup_end_sec - cleanup_start_sec);
+          }
+        }
+        _sts.leave();
+      }
+      // We're done: no more unclean regions coming.
+      G1CollectedHeap::heap()->set_unclean_regions_coming(false);
+
+      if (cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
+        }
+      }
+
+      _sts.join();
+      _cm->disable_co_trackers();
+      _sts.leave();
+
+      // we now want to allow clearing of the marking bitmap to be
+      // suspended by a collection pause.
+      _sts.join();
+      _cm->clearNextBitmap();
+      _sts.leave();
+    }
+  }
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentMarkThread::yield() {
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield");
+  _sts.yield("Concurrent Mark");
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield-end");
+}
+
+void ConcurrentMarkThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-stop");
+}
+
+void ConcurrentMarkThread::print() {
+  gclog_or_tty->print("\"Concurrent Mark GC Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentMarkThread::sleepBeforeNextCycle() {
+  clear_in_progress();
+  // We join here because we don't want to do the "shouldConcurrentMark()"
+  // below while the world is otherwise stopped.
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  while (!started()) {
+    if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-sleeping");
+    CGC_lock->wait(Mutex::_no_safepoint_check_flag);
+  }
+  set_in_progress();
+  clear_started();
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting");
+
+  return;
+}
+
+// Note: this method, although exported by the ConcurrentMarkSweepThread,
+// which is a non-JavaThread, can only be called by a JavaThread.
+// Currently this is done at vm creation time (post-vm-init) by the
+// main/Primordial (Java)Thread.
+// XXX Consider changing this in the future to allow the CMS thread
+// itself to create this thread?
+void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) {
+  assert(_slt == NULL, "SLT already created");
+  _slt = SurrogateLockerThread::make(THREAD);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent Mark GC Thread (could be several in the future).
+// This is copied from the Concurrent Mark Sweep GC Thread
+// Still under construction.
+
+class ConcurrentMark;
+
+class ConcurrentMarkThread: public ConcurrentGCThread {
+  friend class VMStructs;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Accumulated virtual time.
+
+  double _vtime_mark_accum;
+  double _vtime_count_accum;
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentMark*                  _cm;
+  bool                             _started;
+  bool                             _in_progress;
+
+  void sleepBeforeNextCycle();
+
+  static SurrogateLockerThread*         _slt;
+
+ public:
+  // Constructor
+  ConcurrentMarkThread(ConcurrentMark* cm);
+
+  static void makeSurrogateLockerThread(TRAPS);
+  static SurrogateLockerThread* slt() { return _slt; }
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum();
+  // Marking virtual time so far
+  double vtime_mark_accum();
+  // Counting virtual time so far.
+  double vtime_count_accum() { return _vtime_count_accum; }
+
+  ConcurrentMark* cm()                           { return _cm;     }
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  // Total virtual time so far.
+inline double ConcurrentMarkThread::vtime_accum() {
+  return _vtime_accum + _cm->all_task_accum_vtime();
+}
+
+// Marking virtual time so far
+inline double ConcurrentMarkThread::vtime_mark_accum() {
+  return _vtime_mark_accum + _cm->all_task_accum_vtime();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentZFThread.cpp.incl"
+
+// ======= Concurrent Zero-Fill Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+int ConcurrentZFThread::_region_allocs = 0;
+int ConcurrentZFThread::_sync_zfs = 0;
+int ConcurrentZFThread::_zf_waits = 0;
+int ConcurrentZFThread::_regions_filled = 0;
+
+ConcurrentZFThread::ConcurrentZFThread() :
+  ConcurrentGCThread(),
+  _co_tracker(G1ZFGroup)
+{
+  create_and_start();
+}
+
+void ConcurrentZFThread::wait_for_ZF_completed(HeapRegion* hr) {
+  assert(ZF_mon->owned_by_self(), "Precondition.");
+  note_zf_wait();
+  while (hr->zero_fill_state() == HeapRegion::ZeroFilling) {
+    ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+void ConcurrentZFThread::processHeapRegion(HeapRegion* hr) {
+  assert(!Universe::heap()->is_gc_active(),
+         "This should not happen during GC.");
+  assert(hr != NULL, "Precondition");
+  // These are unlocked reads, but if this test is successful, then no
+  // other thread will attempt this zero filling.  Only a GC thread can
+  // modify the ZF state of a region whose state is zero-filling, and this
+  // should only happen while the ZF thread is locking out GC.
+  if (hr->zero_fill_state() == HeapRegion::ZeroFilling
+      && hr->zero_filler() == Thread::current()) {
+    assert(hr->top() == hr->bottom(), "better be empty!");
+    assert(!hr->isHumongous(), "Only free regions on unclean list.");
+    Copy::fill_to_words(hr->bottom(), hr->capacity()/HeapWordSize);
+    note_region_filled();
+  }
+}
+
+void ConcurrentZFThread::run() {
+  initialize_in_thread();
+  Thread* thr_self = Thread::current();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  _sts.join();
+  while (!_should_terminate) {
+    _sts.leave();
+
+    {
+      MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+
+      // This local variable will hold a region being zero-filled.  This
+      // region will neither be on the unclean or zero-filled lists, and
+      // will not be available for allocation; thus, we might have an
+      // allocation fail, causing a full GC, because of this, but this is a
+      // price we will pay.  (In future, we might want to make the fact
+      // that there's a region being zero-filled apparent to the G1 heap,
+      // which could then wait for it in this extreme case...)
+      HeapRegion* to_fill;
+
+      while (!g1->should_zf()
+             || (to_fill = g1->pop_unclean_region_list_locked()) == NULL)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+      while (to_fill->zero_fill_state() == HeapRegion::ZeroFilling)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+
+      // So now to_fill is non-NULL and is not ZeroFilling.  It might be
+      // Allocated or ZeroFilled.  (The latter could happen if this thread
+      // starts the zero-filling of a region, but a GC intervenes and
+      // pushes new regions needing on the front of the filling on the
+      // front of the list.)
+
+      switch (to_fill->zero_fill_state()) {
+      case HeapRegion::Allocated:
+        to_fill = NULL;
+        break;
+
+      case HeapRegion::NotZeroFilled:
+        to_fill->set_zero_fill_in_progress(thr_self);
+
+        ZF_mon->unlock();
+        _sts.join();
+        processHeapRegion(to_fill);
+        _sts.leave();
+        ZF_mon->lock_without_safepoint_check();
+
+        if (to_fill->zero_fill_state() == HeapRegion::ZeroFilling
+            && to_fill->zero_filler() == thr_self) {
+          to_fill->set_zero_fill_complete();
+          (void)g1->put_free_region_on_list_locked(to_fill);
+        }
+        break;
+
+      case HeapRegion::ZeroFilled:
+        (void)g1->put_free_region_on_list_locked(to_fill);
+        break;
+
+      case HeapRegion::ZeroFilling:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+    _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    _sts.join();
+
+    _co_tracker.update();
+  }
+  _co_tracker.update(false);
+  _sts.leave();
+
+  assert(_should_terminate, "just checking");
+  terminate();
+}
+
+bool ConcurrentZFThread::offer_yield() {
+  if (_sts.should_yield()) {
+    _sts.yield("Concurrent ZF");
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void ConcurrentZFThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+}
+
+void ConcurrentZFThread::print() {
+  gclog_or_tty->print("\"Concurrent ZF Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+
+double ConcurrentZFThread::_vtime_accum;
+
+void ConcurrentZFThread::print_summary_info() {
+  gclog_or_tty->print("\nConcurrent Zero-Filling:\n");
+  gclog_or_tty->print("  Filled %d regions, used %5.2fs.\n",
+                      _regions_filled,
+                      vtime_accum());
+  gclog_or_tty->print("  Of %d region allocs, %d (%5.2f%%) required sync ZF,\n",
+                      _region_allocs, _sync_zfs,
+                      (_region_allocs > 0 ?
+                       (float)_sync_zfs/(float)_region_allocs*100.0 :
+                       0.0));
+  gclog_or_tty->print("     and %d (%5.2f%%) required a ZF wait.\n",
+                      _zf_waits,
+                      (_region_allocs > 0 ?
+                       (float)_zf_waits/(float)_region_allocs*100.0 :
+                       0.0));
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent ZF Thread.  Performs concurrent zero-filling.
+
+class ConcurrentZFThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class ZeroFillRegionClosure;
+
+ private:
+
+  // Zero fill the heap region.
+  void processHeapRegion(HeapRegion* r);
+
+  // Stats
+  //   Allocation (protected by heap lock).
+  static int _region_allocs;  // Number of regions allocated
+  static int _sync_zfs;       //   Synchronous zero-fills +
+  static int _zf_waits;      //   Wait for conc zero-fill completion.
+
+  // Number of regions CFZ thread fills.
+  static int _regions_filled;
+
+  COTracker _co_tracker;
+
+  double _vtime_start;  // Initial virtual time.
+
+  // These are static because the "print_summary_info" method is, and
+  // it currently assumes there is only one ZF thread.  We'll change when
+  // we need to.
+  static double _vtime_accum;  // Initial virtual time.
+  static double vtime_accum() { return _vtime_accum; }
+
+  // Offer yield for GC.  Returns true if yield occurred.
+  bool offer_yield();
+
+ public:
+  // Constructor
+  ConcurrentZFThread();
+
+  // Main loop.
+  virtual void run();
+
+  // Printing
+  void print();
+
+  // Waits until "r" has been zero-filled.  Requires caller to hold the
+  // ZF_mon.
+  static void wait_for_ZF_completed(HeapRegion* r);
+
+  // Get or clear the current unclean region.  Should be done
+  // while holding the ZF_needed_mon lock.
+
+  // shutdown
+  static void stop();
+
+  // Stats
+  static void note_region_alloc() {_region_allocs++; }
+  static void note_sync_zfs() { _sync_zfs++; }
+  static void note_zf_wait() { _zf_waits++; }
+  static void note_region_filled() { _regions_filled++; }
+
+  static void print_summary_info();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_dirtyCardQueue.cpp.incl"
+
+bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl,
+                                   bool consume,
+                                   size_t worker_i) {
+  bool res = true;
+  if (_buf != NULL) {
+    res = apply_closure_to_buffer(cl, _buf, _index, _sz,
+                                  consume,
+                                  (int) worker_i);
+    if (res && consume) _index = _sz;
+  }
+  return res;
+}
+
+bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                             void** buf,
+                                             size_t index, size_t sz,
+                                             bool consume,
+                                             int worker_i) {
+  if (cl == NULL) return true;
+  for (size_t i = index; i < sz; i += oopSize) {
+    int ind = byte_index_to_index((int)i);
+    jbyte* card_ptr = (jbyte*)buf[ind];
+    if (card_ptr != NULL) {
+      // Set the entry to null, so we don't do it again (via the test
+      // above) if we reconsider this buffer.
+      if (consume) buf[ind] = NULL;
+      if (!cl->do_card_ptr(card_ptr, worker_i)) return false;
+    }
+  }
+  return true;
+}
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+DirtyCardQueueSet::DirtyCardQueueSet() :
+  PtrQueueSet(true /*notify_when_complete*/),
+  _closure(NULL),
+  _shared_dirty_card_queue(this, true /*perm*/),
+  _free_ids(NULL),
+  _processed_buffers_mut(0), _processed_buffers_rs_thread(0)
+{
+  _all_active = true;
+}
+
+size_t DirtyCardQueueSet::num_par_ids() {
+  return MAX2(ParallelGCThreads, (size_t)2);
+}
+
+
+void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                   int max_completed_queue,
+                                   Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  set_buffer_size(DCQBarrierQueueBufferSize);
+  set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
+
+  _shared_dirty_card_queue.set_lock(lock);
+  _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
+  bool b = _free_ids->claim_perm_id(0);
+  guarantee(b, "Must reserve id zero for concurrent refinement thread.");
+}
+
+void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->dirty_card_queue().handle_zero_index();
+}
+
+void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) {
+  _closure = closure;
+}
+
+void DirtyCardQueueSet::iterate_closure_all_threads(bool consume,
+                                                    size_t worker_i) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    bool b = t->dirty_card_queue().apply_closure(_closure, consume);
+    guarantee(b, "Should not be interrupted.");
+  }
+  bool b = shared_dirty_card_queue()->apply_closure(_closure,
+                                                    consume,
+                                                    worker_i);
+  guarantee(b, "Should not be interrupted.");
+}
+
+bool DirtyCardQueueSet::mut_process_buffer(void** buf) {
+
+  // Used to determine if we had already claimed a par_id
+  // before entering this method.
+  bool already_claimed = false;
+
+  // We grab the current JavaThread.
+  JavaThread* thread = JavaThread::current();
+
+  // We get the the number of any par_id that this thread
+  // might have already claimed.
+  int worker_i = thread->get_claimed_par_id();
+
+  // If worker_i is not -1 then the thread has already claimed
+  // a par_id. We make note of it using the already_claimed value
+  if (worker_i != -1) {
+    already_claimed = true;
+  } else {
+
+    // Otherwise we need to claim a par id
+    worker_i = _free_ids->claim_par_id();
+
+    // And store the par_id value in the thread
+    thread->set_claimed_par_id(worker_i);
+  }
+
+  bool b = false;
+  if (worker_i != -1) {
+    b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0,
+                                                _sz, true, worker_i);
+    if (b) Atomic::inc(&_processed_buffers_mut);
+
+    // If we had not claimed an id before entering the method
+    // then we must release the id.
+    if (!already_claimed) {
+
+      // we release the id
+      _free_ids->release_par_id(worker_i);
+
+      // and set the claimed_id in the thread to -1
+      thread->set_claimed_par_id(-1);
+    }
+  }
+  return b;
+}
+
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) {
+  CompletedBufferNode* nd = NULL;
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+
+  if ((int)_n_completed_buffers <= stop_at) {
+    _process_completed = false;
+    return NULL;
+  }
+
+  if (_completed_buffers_head != NULL) {
+    nd = _completed_buffers_head;
+    _completed_buffers_head = nd->next;
+    if (_completed_buffers_head == NULL)
+      _completed_buffers_tail = NULL;
+    _n_completed_buffers--;
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  return nd;
+}
+
+// We only do this in contexts where there is no concurrent enqueueing.
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_CAS() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+
+  while (nd != NULL) {
+    CompletedBufferNode* next = nd->next;
+    CompletedBufferNode* result =
+      (CompletedBufferNode*)Atomic::cmpxchg_ptr(next,
+                                                &_completed_buffers_head,
+                                                nd);
+    if (result == nd) {
+      return result;
+    } else {
+      nd = _completed_buffers_head;
+    }
+  }
+  assert(_completed_buffers_head == NULL, "Loop post");
+  _completed_buffers_tail = NULL;
+  return NULL;
+}
+
+bool DirtyCardQueueSet::
+apply_closure_to_completed_buffer_helper(int worker_i,
+                                         CompletedBufferNode* nd) {
+  if (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf,
+                                              nd->index, _sz,
+                                              true, worker_i);
+    void** buf = nd->buf;
+    size_t index = nd->index;
+    delete nd;
+    if (b) {
+      deallocate_buffer(buf);
+      return true;  // In normal case, go on to next buffer.
+    } else {
+      enqueue_complete_buffer(buf, index, true);
+      return false;
+    }
+  } else {
+    return false;
+  }
+}
+
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
+                                                          int stop_at,
+                                                          bool with_CAS)
+{
+  CompletedBufferNode* nd = NULL;
+  if (with_CAS) {
+    guarantee(stop_at == 0, "Precondition");
+    nd = get_completed_buffer_CAS();
+  } else {
+    nd = get_completed_buffer_lock(stop_at);
+  }
+  bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
+  if (res) _processed_buffers_rs_thread++;
+  return res;
+}
+
+void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz,
+                                              false);
+    guarantee(b, "Should not stop early.");
+    nd = nd->next;
+  }
+}
+
+void DirtyCardQueueSet::abandon_logs() {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _n_completed_buffers = 0;
+    _completed_buffers_tail = NULL;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  // Since abandon is done only at safepoints, we can safely manipulate
+  // these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->dirty_card_queue().reset();
+  }
+  shared_dirty_card_queue()->reset();
+}
+
+
+void DirtyCardQueueSet::concatenate_logs() {
+  // Iterate over all the threads, if we find a partial log add it to
+  // the global list of logs.  Temporarily turn off the limit on the number
+  // of outstanding buffers.
+  int save_max_completed_queue = _max_completed_queue;
+  _max_completed_queue = max_jint;
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    DirtyCardQueue& dcq = t->dirty_card_queue();
+    if (dcq.size() != 0) {
+      void **buf = t->dirty_card_queue().get_buf();
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < t->dirty_card_queue().get_index(); i += oopSize) {
+        buf[PtrQueue::byte_index_to_index((int)i)] = NULL;
+      }
+      enqueue_complete_buffer(dcq.get_buf(), dcq.get_index());
+      dcq.reinitialize();
+    }
+  }
+  if (_shared_dirty_card_queue.size() != 0) {
+    enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(),
+                            _shared_dirty_card_queue.get_index());
+    _shared_dirty_card_queue.reinitialize();
+  }
+  // Restore the completed buffer queue limit.
+  _max_completed_queue = save_max_completed_queue;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class FreeIdSet;
+
+// A closure class for processing card table entries.  Note that we don't
+// require these closure objects to be stack-allocated.
+class CardTableEntryClosure: public CHeapObj {
+public:
+  // Process the card whose card table entry is "card_ptr".  If returns
+  // "false", terminate the iteration early.
+  virtual bool do_card_ptr(jbyte* card_ptr, int worker_i = 0) = 0;
+};
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class DirtyCardQueue: public PtrQueue {
+public:
+  DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {
+    // Dirty card queues are always active.
+    _active = true;
+  }
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.  If a closure application returns "false", return
+  // "false" immediately, halting the iteration.  If "consume" is true,
+  // deletes processed entries from logs.
+  bool apply_closure(CardTableEntryClosure* cl,
+                     bool consume = true,
+                     size_t worker_i = 0);
+
+  // Apply the closure to all elements of "buf", down to "index"
+  // (inclusive.)  If returns "false", then a closure application returned
+  // "false", and we return immediately.  If "consume" is true, entries are
+  // set to NULL as they are processed, so they will not be processed again
+  // later.
+  static bool apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                      void** buf, size_t index, size_t sz,
+                                      bool consume = true,
+                                      int worker_i = 0);
+  void **get_buf() { return _buf;}
+  void set_buf(void **buf) {_buf = buf;}
+  size_t get_index() { return _index;}
+  void reinitialize() { _buf = 0; _sz = 0; _index = 0;}
+};
+
+
+
+class DirtyCardQueueSet: public PtrQueueSet {
+  CardTableEntryClosure* _closure;
+
+  DirtyCardQueue _shared_dirty_card_queue;
+
+  // Override.
+  bool mut_process_buffer(void** buf);
+
+  // Protected by the _cbl_mon.
+  FreeIdSet* _free_ids;
+
+  // The number of completed buffers processed by mutator and rs thread,
+  // respectively.
+  jint _processed_buffers_mut;
+  jint _processed_buffers_rs_thread;
+
+public:
+  DirtyCardQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  // The number of parallel ids that can be claimed to allow collector or
+  // mutator threads to do card-processing work.
+  static size_t num_par_ids();
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(CardTableEntryClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)  If "consume" is true, processed entries are
+  // discarded.
+  void iterate_closure_all_threads(bool consume = true,
+                                   size_t worker_i = 0);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, nulling out those elements
+  // processed.  If all elements are processed, returns "true".  If no
+  // completed buffers exist, returns false.  If a completed buffer exists,
+  // but is only partially completed before a "yield" happens, the
+  // partially completed buffer (with its processed elements set to NULL)
+  // is returned to the completed buffer set, and this call returns false.
+  bool apply_closure_to_completed_buffer(int worker_i = 0,
+                                         int stop_at = 0,
+                                         bool with_CAS = false);
+  bool apply_closure_to_completed_buffer_helper(int worker_i,
+                                                CompletedBufferNode* nd);
+
+  CompletedBufferNode* get_completed_buffer_CAS();
+  CompletedBufferNode* get_completed_buffer_lock(int stop_at);
+  // Applies the current closure to all completed buffers,
+  // non-consumptively.
+  void apply_closure_to_all_completed_buffers();
+
+  DirtyCardQueue* shared_dirty_card_queue() {
+    return &_shared_dirty_card_queue;
+  }
+
+  // If a full collection is happening, reset partial logs, and ignore
+  // completed ones: the full collection will make them all irrelevant.
+  void abandon_logs();
+
+  // If any threads have partial logs, add them to the global list of logs.
+  void concatenate_logs();
+  void clear_n_completed_buffers() { _n_completed_buffers = 0;}
+
+  jint processed_buffers_mut() {
+    return _processed_buffers_mut;
+  }
+  jint processed_buffers_rs_thread() {
+    return _processed_buffers_rs_thread;
+  }
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,628 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1BlockOffsetTable.cpp.incl"
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetSharedArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetSharedArray::G1BlockOffsetSharedArray(MemRegion reserved,
+                                                   size_t init_word_size) :
+  _reserved(reserved), _end(NULL)
+{
+  size_t size = compute_size(reserved.word_size());
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(size));
+  if (!rs.is_reserved()) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  if (!_vs.initialize(rs, 0)) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  _offset_array = (u_char*)_vs.low_boundary();
+  resize(init_word_size);
+  if (TraceBlockOffsetTable) {
+    gclog_or_tty->print_cr("G1BlockOffsetSharedArray::G1BlockOffsetSharedArray: ");
+    gclog_or_tty->print_cr("  "
+                  "  rs.base(): " INTPTR_FORMAT
+                  "  rs.size(): " INTPTR_FORMAT
+                  "  rs end(): " INTPTR_FORMAT,
+                  rs.base(), rs.size(), rs.base() + rs.size());
+    gclog_or_tty->print_cr("  "
+                  "  _vs.low_boundary(): " INTPTR_FORMAT
+                  "  _vs.high_boundary(): " INTPTR_FORMAT,
+                  _vs.low_boundary(),
+                  _vs.high_boundary());
+  }
+}
+
+void G1BlockOffsetSharedArray::resize(size_t new_word_size) {
+  assert(new_word_size <= _reserved.word_size(), "Resize larger than reserved");
+  size_t new_size = compute_size(new_word_size);
+  size_t old_size = _vs.committed_size();
+  size_t delta;
+  char* high = _vs.high();
+  _end = _reserved.start() + new_word_size;
+  if (new_size > old_size) {
+    delta = ReservedSpace::page_align_size_up(new_size - old_size);
+    assert(delta > 0, "just checking");
+    if (!_vs.expand_by(delta)) {
+      // Do better than this for Merlin
+      vm_exit_out_of_memory(delta, "offset table expansion");
+    }
+    assert(_vs.high() == high + delta, "invalid expansion");
+    // Initialization of the contents is left to the
+    // G1BlockOffsetArray that uses it.
+  } else {
+    delta = ReservedSpace::page_align_size_down(old_size - new_size);
+    if (delta == 0) return;
+    _vs.shrink_by(delta);
+    assert(_vs.high() == high - delta, "invalid expansion");
+  }
+}
+
+bool G1BlockOffsetSharedArray::is_card_boundary(HeapWord* p) const {
+  assert(p >= _reserved.start(), "just checking");
+  size_t delta = pointer_delta(p, _reserved.start());
+  return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetArray::G1BlockOffsetArray(G1BlockOffsetSharedArray* array,
+                                       MemRegion mr, bool init_to_zero) :
+  G1BlockOffsetTable(mr.start(), mr.end()),
+  _unallocated_block(_bottom),
+  _array(array), _csp(NULL),
+  _init_to_zero(init_to_zero) {
+  assert(_bottom <= _end, "arguments out of order");
+  if (!_init_to_zero) {
+    // initialize cards to point back to mr.start()
+    set_remainder_to_point_to_start(mr.start() + N_words, mr.end());
+    _array->set_offset_array(0, 0);  // set first card to 0
+  }
+}
+
+void G1BlockOffsetArray::set_space(Space* sp) {
+  _sp = sp;
+  _csp = sp->toContiguousSpace();
+}
+
+// The arguments follow the normal convention of denoting
+// a right-open interval: [start, end)
+void
+G1BlockOffsetArray:: set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) {
+
+  if (start >= end) {
+    // The start address is equal to the end address (or to
+    // the right of the end address) so there are not cards
+    // that need to be updated..
+    return;
+  }
+
+  // Write the backskip value for each region.
+  //
+  //    offset
+  //    card             2nd                       3rd
+  //     | +- 1st        |                         |
+  //     v v             v                         v
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    |x|0|0|0|0|0|0|0|1|1|1|1|1|1| ... |1|1|1|1|2|2|2|2|2|2| ...
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    11              19                        75
+  //      12
+  //
+  //    offset card is the card that points to the start of an object
+  //      x - offset value of offset card
+  //    1st - start of first logarithmic region
+  //      0 corresponds to logarithmic value N_words + 0 and 2**(3 * 0) = 1
+  //    2nd - start of second logarithmic region
+  //      1 corresponds to logarithmic value N_words + 1 and 2**(3 * 1) = 8
+  //    3rd - start of third logarithmic region
+  //      2 corresponds to logarithmic value N_words + 2 and 2**(3 * 2) = 64
+  //
+  //    integer below the block offset entry is an example of
+  //    the index of the entry
+  //
+  //    Given an address,
+  //      Find the index for the address
+  //      Find the block offset table entry
+  //      Convert the entry to a back slide
+  //        (e.g., with today's, offset = 0x81 =>
+  //          back slip = 2**(3*(0x81 - N_words)) = 2**3) = 8
+  //      Move back N (e.g., 8) entries and repeat with the
+  //        value of the new entry
+  //
+  size_t start_card = _array->index_for(start);
+  size_t end_card = _array->index_for(end-1);
+  assert(start ==_array->address_for_index(start_card), "Precondition");
+  assert(end ==_array->address_for_index(end_card)+N_words, "Precondition");
+  set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval
+}
+
+// Unlike the normal convention in this code, the argument here denotes
+// a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start()
+// above.
+void
+G1BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) {
+  if (start_card > end_card) {
+    return;
+  }
+  assert(start_card > _array->index_for(_bottom), "Cannot be first card");
+  assert(_array->offset_array(start_card-1) <= N_words,
+    "Offset card has an unexpected value");
+  size_t start_card_for_region = start_card;
+  u_char offset = max_jubyte;
+  for (int i = 0; i < BlockOffsetArray::N_powers; i++) {
+    // -1 so that the the card with the actual offset is counted.  Another -1
+    // so that the reach ends in this region and not at the start
+    // of the next.
+    size_t reach = start_card - 1 + (BlockOffsetArray::power_to_cards_back(i+1) - 1);
+    offset = N_words + i;
+    if (reach >= end_card) {
+      _array->set_offset_array(start_card_for_region, end_card, offset);
+      start_card_for_region = reach + 1;
+      break;
+    }
+    _array->set_offset_array(start_card_for_region, reach, offset);
+    start_card_for_region = reach + 1;
+  }
+  assert(start_card_for_region > end_card, "Sanity check");
+  DEBUG_ONLY(check_all_cards(start_card, end_card);)
+}
+
+// The block [blk_start, blk_end) has been allocated;
+// adjust the block offset table to represent this information;
+// right-open interval: [blk_start, blk_end)
+void
+G1BlockOffsetArray::alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+  mark_block(blk_start, blk_end);
+  allocated(blk_start, blk_end);
+}
+
+// Adjust BOT to show that a previously whole block has been split
+// into two.
+void G1BlockOffsetArray::split_block(HeapWord* blk, size_t blk_size,
+                                     size_t left_blk_size) {
+  // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+  verify_single_block(blk, blk_size);
+  // Update the BOT to indicate that [blk + left_blk_size, blk + blk_size)
+  // is one single block.
+  mark_block(blk + left_blk_size, blk + blk_size);
+}
+
+
+// Action_mark - update the BOT for the block [blk_start, blk_end).
+//               Current typical use is for splitting a block.
+// Action_single - udpate the BOT for an allocation.
+// Action_verify - BOT verification.
+void G1BlockOffsetArray::do_block_internal(HeapWord* blk_start,
+                                           HeapWord* blk_end,
+                                           Action action) {
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  // This is optimized to make the test fast, assuming we only rarely
+  // cross boundaries.
+  uintptr_t end_ui = (uintptr_t)(blk_end - 1);
+  uintptr_t start_ui = (uintptr_t)blk_start;
+  // Calculate the last card boundary preceding end of blk
+  intptr_t boundary_before_end = (intptr_t)end_ui;
+  clear_bits(boundary_before_end, right_n_bits(LogN));
+  if (start_ui <= (uintptr_t)boundary_before_end) {
+    // blk starts at or crosses a boundary
+    // Calculate index of card on which blk begins
+    size_t    start_index = _array->index_for(blk_start);
+    // Index of card on which blk ends
+    size_t    end_index   = _array->index_for(blk_end - 1);
+    // Start address of card on which blk begins
+    HeapWord* boundary    = _array->address_for_index(start_index);
+    assert(boundary <= blk_start, "blk should start at or after boundary");
+    if (blk_start != boundary) {
+      // blk starts strictly after boundary
+      // adjust card boundary and start_index forward to next card
+      boundary += N_words;
+      start_index++;
+    }
+    assert(start_index <= end_index, "monotonicity of index_for()");
+    assert(boundary <= (HeapWord*)boundary_before_end, "tautology");
+    switch (action) {
+      case Action_mark: {
+        if (init_to_zero()) {
+          _array->set_offset_array(start_index, boundary, blk_start);
+          break;
+        } // Else fall through to the next case
+      }
+      case Action_single: {
+        _array->set_offset_array(start_index, boundary, blk_start);
+        // We have finished marking the "offset card". We need to now
+        // mark the subsequent cards that this blk spans.
+        if (start_index < end_index) {
+          HeapWord* rem_st = _array->address_for_index(start_index) + N_words;
+          HeapWord* rem_end = _array->address_for_index(end_index) + N_words;
+          set_remainder_to_point_to_start(rem_st, rem_end);
+        }
+        break;
+      }
+      case Action_check: {
+        _array->check_offset_array(start_index, boundary, blk_start);
+        // We have finished checking the "offset card". We need to now
+        // check the subsequent cards that this blk spans.
+        check_all_cards(start_index + 1, end_index);
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+}
+
+// The card-interval [start_card, end_card] is a closed interval; this
+// is an expensive check -- use with care and only under protection of
+// suitable flag.
+void G1BlockOffsetArray::check_all_cards(size_t start_card, size_t end_card) const {
+
+  if (end_card < start_card) {
+    return;
+  }
+  guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card");
+  for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) {
+    u_char entry = _array->offset_array(c);
+    if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) {
+      guarantee(entry > N_words, "Should be in logarithmic region");
+    }
+    size_t backskip = BlockOffsetArray::entry_to_cards_back(entry);
+    size_t landing_card = c - backskip;
+    guarantee(landing_card >= (start_card - 1), "Inv");
+    if (landing_card >= start_card) {
+      guarantee(_array->offset_array(landing_card) <= entry, "monotonicity");
+    } else {
+      guarantee(landing_card == start_card - 1, "Tautology");
+      guarantee(_array->offset_array(landing_card) <= N_words, "Offset value");
+    }
+  }
+}
+
+// The range [blk_start, blk_end) represents a single contiguous block
+// of storage; modify the block offset table to represent this
+// information; Right-open interval: [blk_start, blk_end)
+// NOTE: this method does _not_ adjust _unallocated_block.
+void
+G1BlockOffsetArray::single_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_single);
+}
+
+// Mark the BOT such that if [blk_start, blk_end) straddles a card
+// boundary, the card following the first such boundary is marked
+// with the appropriate offset.
+// NOTE: this method does _not_ adjust _unallocated_block or
+// any cards subsequent to the first one.
+void
+G1BlockOffsetArray::mark_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_mark);
+}
+
+void G1BlockOffsetArray::join_blocks(HeapWord* blk1, HeapWord* blk2) {
+  HeapWord* blk1_start = Universe::heap()->block_start(blk1);
+  HeapWord* blk2_start = Universe::heap()->block_start(blk2);
+  assert(blk1 == blk1_start && blk2 == blk2_start,
+         "Must be block starts.");
+  assert(blk1 + _sp->block_size(blk1) == blk2, "Must be contiguous.");
+  size_t blk1_start_index = _array->index_for(blk1);
+  size_t blk2_start_index = _array->index_for(blk2);
+  assert(blk1_start_index <= blk2_start_index, "sanity");
+  HeapWord* blk2_card_start = _array->address_for_index(blk2_start_index);
+  if (blk2 == blk2_card_start) {
+    // blk2 starts a card.  Does blk1 start on the prevous card, or futher
+    // back?
+    assert(blk1_start_index < blk2_start_index, "must be lower card.");
+    if (blk1_start_index + 1 == blk2_start_index) {
+      // previous card; new value for blk2 card is size of blk1.
+      _array->set_offset_array(blk2_start_index, (u_char) _sp->block_size(blk1));
+    } else {
+      // Earlier card; go back a card.
+      _array->set_offset_array(blk2_start_index, N_words);
+    }
+  } else {
+    // blk2 does not start a card.  Does it cross a card?  If not, nothing
+    // to do.
+    size_t blk2_end_index =
+      _array->index_for(blk2 + _sp->block_size(blk2) - 1);
+    assert(blk2_end_index >= blk2_start_index, "sanity");
+    if (blk2_end_index > blk2_start_index) {
+      // Yes, it crosses a card.  The value for the next card must change.
+      if (blk1_start_index + 1 == blk2_start_index) {
+        // previous card; new value for second blk2 card is size of blk1.
+        _array->set_offset_array(blk2_start_index + 1,
+                                 (u_char) _sp->block_size(blk1));
+      } else {
+        // Earlier card; go back a card.
+        _array->set_offset_array(blk2_start_index + 1, N_words);
+      }
+    }
+  }
+}
+
+HeapWord* G1BlockOffsetArray::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+// This duplicates a little code from the above: unavoidable.
+HeapWord*
+G1BlockOffsetArray::block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+
+HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
+                                                          HeapWord* n,
+                                                          const void* addr) {
+  // We're not in the normal case.  We need to handle an important subcase
+  // here: LAB allocation.  An allocation previously recorded in the
+  // offset table was actually a lab allocation, and was divided into
+  // several objects subsequently.  Fix this situation as we answer the
+  // query, by updating entries as we cross them.
+
+  // If the fist object's end q is at the card boundary. Start refining
+  // with the corresponding card (the value of the entry will be basically
+  // set to 0). If the object crosses the boundary -- start from the next card.
+  size_t next_index = _array->index_for(n) + !_array->is_card_boundary(n);
+  HeapWord* next_boundary = _array->address_for_index(next_index);
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += obj->size();
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  } else {
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += _sp->block_size(q);
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  }
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+HeapWord* G1BlockOffsetArray::block_start_careful(const void* addr) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+
+  // Otherwise, find the block start using the table, but taking
+  // care (cf block_start_unsafe() above) not to parse any objects/blocks
+  // on the cards themsleves.
+  size_t index = _array->index_for(addr);
+  assert(_array->address_for_index(index) == addr,
+         "arg should be start of card");
+
+  HeapWord* q = (HeapWord*)addr;
+  uint offset;
+  do {
+    offset = _array->offset_array(index--);
+    q -= offset;
+  } while (offset == N_words);
+  assert(q <= addr, "block start should be to left of arg");
+  return q;
+}
+
+// Note that the committed size of the covered space may have changed,
+// so the table size might also wish to change.
+void G1BlockOffsetArray::resize(size_t new_word_size) {
+  HeapWord* new_end = _bottom + new_word_size;
+  if (_end < new_end && !init_to_zero()) {
+    // verify that the old and new boundaries are also card boundaries
+    assert(_array->is_card_boundary(_end),
+           "_end not a card boundary");
+    assert(_array->is_card_boundary(new_end),
+           "new _end would not be a card boundary");
+    // set all the newly added cards
+    _array->set_offset_array(_end, new_end, N_words);
+  }
+  _end = new_end;  // update _end
+}
+
+void G1BlockOffsetArray::set_region(MemRegion mr) {
+  _bottom = mr.start();
+  _end = mr.end();
+}
+
+//
+//              threshold_
+//              |   _index_
+//              v   v
+//      +-------+-------+-------+-------+-------+
+//      | i-1   |   i   | i+1   | i+2   | i+3   |
+//      +-------+-------+-------+-------+-------+
+//       ( ^    ]
+//         block-start
+//
+void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                                           HeapWord* blk_start, HeapWord* blk_end) {
+  // For efficiency, do copy-in/copy-out.
+  HeapWord* threshold = *threshold_;
+  size_t    index = *index_;
+
+  assert(blk_start != NULL && blk_end > blk_start,
+         "phantom block");
+  assert(blk_end > threshold, "should be past threshold");
+  assert(blk_start <= threshold, "blk_start should be at or before threshold")
+  assert(pointer_delta(threshold, blk_start) <= N_words,
+         "offset should be <= BlockOffsetSharedArray::N");
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  assert(threshold == _array->_reserved.start() + index*N_words,
+         "index must agree with threshold");
+
+  DEBUG_ONLY(size_t orig_index = index;)
+
+  // Mark the card that holds the offset into the block.  Note
+  // that _next_offset_index and _next_offset_threshold are not
+  // updated until the end of this method.
+  _array->set_offset_array(index, threshold, blk_start);
+
+  // We need to now mark the subsequent cards that this blk spans.
+
+  // Index of card on which blk ends.
+  size_t end_index   = _array->index_for(blk_end - 1);
+
+  // Are there more cards left to be updated?
+  if (index + 1 <= end_index) {
+    HeapWord* rem_st  = _array->address_for_index(index + 1);
+    // Calculate rem_end this way because end_index
+    // may be the last valid index in the covered region.
+    HeapWord* rem_end = _array->address_for_index(end_index) +  N_words;
+    set_remainder_to_point_to_start(rem_st, rem_end);
+  }
+
+  index = end_index + 1;
+  // Calculate threshold_ this way because end_index
+  // may be the last valid index in the covered region.
+  threshold = _array->address_for_index(end_index) + N_words;
+  assert(threshold >= blk_end, "Incorrect offset threshold");
+
+  // index_ and threshold_ updated here.
+  *threshold_ = threshold;
+  *index_ = index;
+
+#ifdef ASSERT
+  // The offset can be 0 if the block starts on a boundary.  That
+  // is checked by an assertion above.
+  size_t start_index = _array->index_for(blk_start);
+  HeapWord* boundary    = _array->address_for_index(start_index);
+  assert((_array->offset_array(orig_index) == 0 &&
+          blk_start == boundary) ||
+          (_array->offset_array(orig_index) > 0 &&
+         _array->offset_array(orig_index) <= N_words),
+         "offset array should have been set");
+  for (size_t j = orig_index + 1; j <= end_index; j++) {
+    assert(_array->offset_array(j) > 0 &&
+           _array->offset_array(j) <=
+             (u_char) (N_words+BlockOffsetArray::N_powers-1),
+           "offset array should have been set");
+  }
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArrayContigSpace
+//////////////////////////////////////////////////////////////////////
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::
+block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+G1BlockOffsetArrayContigSpace::
+G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array,
+                              MemRegion mr) :
+  G1BlockOffsetArray(array, mr, true)
+{
+  _next_offset_threshold = NULL;
+  _next_offset_index = 0;
+}
+
+HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  _next_offset_index = _array->index_for(_bottom);
+  _next_offset_index++;
+  _next_offset_threshold =
+    _array->address_for_index(_next_offset_index);
+  return _next_offset_threshold;
+}
+
+void G1BlockOffsetArrayContigSpace::zero_bottom_entry() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  size_t bottom_index = _array->index_for(_bottom);
+  assert(_array->address_for_index(bottom_index) == _bottom,
+         "Precondition of call");
+  _array->set_offset_array(bottom_index, 0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,487 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The CollectedHeap type requires subtypes to implement a method
+// "block_start".  For some subtypes, notably generational
+// systems using card-table-based write barriers, the efficiency of this
+// operation may be important.  Implementations of the "BlockOffsetArray"
+// class may be useful in providing such efficient implementations.
+//
+// While generally mirroring the structure of the BOT for GenCollectedHeap,
+// the following types are tailored more towards G1's uses; these should,
+// however, be merged back into a common BOT to avoid code duplication
+// and reduce maintenance overhead.
+//
+//    G1BlockOffsetTable (abstract)
+//    -- G1BlockOffsetArray                (uses G1BlockOffsetSharedArray)
+//       -- G1BlockOffsetArrayContigSpace
+//
+// A main impediment to the consolidation of this code might be the
+// effect of making some of the block_start*() calls non-const as
+// below. Whether that might adversely affect performance optimizations
+// that compilers might normally perform in the case of non-G1
+// collectors needs to be carefully investigated prior to any such
+// consolidation.
+
+// Forward declarations
+class ContiguousSpace;
+class G1BlockOffsetSharedArray;
+
+class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+protected:
+  // These members describe the region covered by the table.
+
+  // The space this table is covering.
+  HeapWord* _bottom;    // == reserved.start
+  HeapWord* _end;       // End of currently allocated region.
+
+public:
+  // Initialize the table to cover the given space.
+  // The contents of the initial table are undefined.
+  G1BlockOffsetTable(HeapWord* bottom, HeapWord* end) :
+    _bottom(bottom), _end(end)
+    {
+      assert(_bottom <= _end, "arguments out of order");
+    }
+
+  // Note that the committed size of the covered space may have changed,
+  // so the table size might also wish to change.
+  virtual void resize(size_t new_word_size) = 0;
+
+  virtual void set_bottom(HeapWord* new_bottom) {
+    assert(new_bottom <= _end, "new_bottom > _end");
+    _bottom = new_bottom;
+    resize(pointer_delta(_end, _bottom));
+  }
+
+  // Requires "addr" to be contained by a block, and returns the address of
+  // the start of that block.  (May have side effects, namely updating of
+  // shared array entries that "point" too far backwards.  This can occur,
+  // for example, when LAB allocation is used in a space covered by the
+  // table.)
+  virtual HeapWord* block_start_unsafe(const void* addr) = 0;
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const = 0;
+
+  // Returns the address of the start of the block containing "addr", or
+  // else "null" if it is covered by no block.  (May have side effects,
+  // namely updating of shared array entries that "point" too far
+  // backwards.  This can occur, for example, when lab allocation is used
+  // in a space covered by the table.)
+  inline HeapWord* block_start(const void* addr);
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  inline HeapWord* block_start_const(const void* addr) const;
+};
+
+// This implementation of "G1BlockOffsetTable" divides the covered region
+// into "N"-word subregions (where "N" = 2^"LogN".  An array with an entry
+// for each such subregion indicates how far back one must go to find the
+// start of the chunk that includes the first word of the subregion.
+//
+// Each BlockOffsetArray is owned by a Space.  However, the actual array
+// may be shared by several BlockOffsetArrays; this is useful
+// when a single resizable area (such as a generation) is divided up into
+// several spaces in which contiguous allocation takes place,
+// such as, for example, in G1 or in the train generation.)
+
+// Here is the shared array type.
+
+class G1BlockOffsetSharedArray: public CHeapObj {
+  friend class G1BlockOffsetArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+
+private:
+  // The reserved region covered by the shared array.
+  MemRegion _reserved;
+
+  // End of the current committed region.
+  HeapWord* _end;
+
+  // Array for keeping offsets for retrieving object start fast given an
+  // address.
+  VirtualSpace _vs;
+  u_char* _offset_array;          // byte array keeping backwards offsets
+
+  // Bounds checking accessors:
+  // For performance these have to devolve to array accesses in product builds.
+  u_char offset_array(size_t index) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    return _offset_array[index];
+  }
+
+  void set_offset_array(size_t index, u_char offset) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(offset <= N_words, "offset too large");
+    _offset_array[index] = offset;
+  }
+
+  void set_offset_array(size_t index, HeapWord* high, HeapWord* low) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    _offset_array[index] = (u_char) pointer_delta(high, low);
+  }
+
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+    assert(index_for(right - 1) < _vs.committed_size(),
+           "right address out of range");
+    assert(left  < right, "Heap addresses out of order");
+    size_t num_cards = pointer_delta(right, left) >> LogN_words;
+    memset(&_offset_array[index_for(left)], offset, num_cards);
+  }
+
+  void set_offset_array(size_t left, size_t right, u_char offset) {
+    assert(right < _vs.committed_size(), "right address out of range");
+    assert(left  <= right, "indexes out of order");
+    size_t num_cards = right - left + 1;
+    memset(&_offset_array[left], offset, num_cards);
+  }
+
+  void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    assert(_offset_array[index] == pointer_delta(high, low),
+           "Wrong offset");
+  }
+
+  bool is_card_boundary(HeapWord* p) const;
+
+  // Return the number of slots needed for an offset array
+  // that covers mem_region_words words.
+  // We always add an extra slot because if an object
+  // ends on a card boundary we put a 0 in the next
+  // offset array slot, so we want that slot always
+  // to be reserved.
+
+  size_t compute_size(size_t mem_region_words) {
+    size_t number_of_slots = (mem_region_words / N_words) + 1;
+    return ReservedSpace::page_align_size_up(number_of_slots);
+  }
+
+public:
+  enum SomePublicConstants {
+    LogN = 9,
+    LogN_words = LogN - LogHeapWordSize,
+    N_bytes = 1 << LogN,
+    N_words = 1 << LogN_words
+  };
+
+  // Initialize the table to cover from "base" to (at least)
+  // "base + init_word_size".  In the future, the table may be expanded
+  // (see "resize" below) up to the size of "_reserved" (which must be at
+  // least "init_word_size".) The contents of the initial table are
+  // undefined; it is the responsibility of the constituent
+  // G1BlockOffsetTable(s) to initialize cards.
+  G1BlockOffsetSharedArray(MemRegion reserved, size_t init_word_size);
+
+  // Notes a change in the committed size of the region covered by the
+  // table.  The "new_word_size" may not be larger than the size of the
+  // reserved region this table covers.
+  void resize(size_t new_word_size);
+
+  void set_bottom(HeapWord* new_bottom);
+
+  // Updates all the BlockOffsetArray's sharing this shared array to
+  // reflect the current "top"'s of their spaces.
+  void update_offset_arrays();
+
+  // Return the appropriate index into "_offset_array" for "p".
+  inline size_t index_for(const void* p) const;
+
+  // Return the address indicating the start of the region corresponding to
+  // "index" in "_offset_array".
+  inline HeapWord* address_for_index(size_t index) const;
+};
+
+// And here is the G1BlockOffsetTable subtype that uses the array.
+
+class G1BlockOffsetArray: public G1BlockOffsetTable {
+  friend class G1BlockOffsetSharedArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+private:
+  enum SomePrivateConstants {
+    N_words = G1BlockOffsetSharedArray::N_words,
+    LogN    = G1BlockOffsetSharedArray::LogN
+  };
+
+  // The following enums are used by do_block_helper
+  enum Action {
+    Action_single,      // BOT records a single block (see single_block())
+    Action_mark,        // BOT marks the start of a block (see mark_block())
+    Action_check        // Check that BOT records block correctly
+                        // (see verify_single_block()).
+  };
+
+  // This is the array, which can be shared by several BlockOffsetArray's
+  // servicing different
+  G1BlockOffsetSharedArray* _array;
+
+  // The space that owns this subregion.
+  Space* _sp;
+
+  // If "_sp" is a contiguous space, the field below is the view of "_sp"
+  // as a contiguous space, else NULL.
+  ContiguousSpace* _csp;
+
+  // If true, array entries are initialized to 0; otherwise, they are
+  // initialized to point backwards to the beginning of the covered region.
+  bool _init_to_zero;
+
+  // The portion [_unallocated_block, _sp.end()) of the space that
+  // is a single block known not to contain any objects.
+  // NOTE: See BlockOffsetArrayUseUnallocatedBlock flag.
+  HeapWord* _unallocated_block;
+
+  // Sets the entries
+  // corresponding to the cards starting at "start" and ending at "end"
+  // to point back to the card before "start": the interval [start, end)
+  // is right-open.
+  void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end);
+  // Same as above, except that the args here are a card _index_ interval
+  // that is closed: [start_index, end_index]
+  void set_remainder_to_point_to_start_incl(size_t start, size_t end);
+
+  // A helper function for BOT adjustment/verification work
+  void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action);
+
+protected:
+
+  ContiguousSpace* csp() const { return _csp; }
+
+  // Returns the address of a block whose start is at most "addr".
+  // If "has_max_index" is true, "assumes "max_index" is the last valid one
+  // in the array.
+  inline HeapWord* block_at_or_preceding(const void* addr,
+                                         bool has_max_index,
+                                         size_t max_index) const;
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  Does so without side
+  // effects (see, e.g., spec of  block_start.)
+  inline HeapWord*
+  forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                         const void* addr) const;
+
+  // "q" is a block boundary that is <= "addr"; return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  inline HeapWord* forward_to_block_containing_addr(HeapWord* q,
+                                                    const void* addr);
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  HeapWord* forward_to_block_containing_addr_slow(HeapWord* q,
+                                                  HeapWord* n,
+                                                  const void* addr);
+
+  // Requires that "*threshold_" be the first array entry boundary at or
+  // above "blk_start", and that "*index_" be the corresponding array
+  // index.  If the block starts at or crosses "*threshold_", records
+  // "blk_start" as the appropriate block start for the array index
+  // starting at "*threshold_", and for any other indices crossed by the
+  // block.  Updates "*threshold_" and "*index_" to correspond to the first
+  // index after the block end.
+  void alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                         HeapWord* blk_start, HeapWord* blk_end);
+
+public:
+  // The space may not have it's bottom and top set yet, which is why the
+  // region is passed as a parameter.  If "init_to_zero" is true, the
+  // elements of the array are initialized to zero.  Otherwise, they are
+  // initialized to point backwards to the beginning.
+  G1BlockOffsetArray(G1BlockOffsetSharedArray* array, MemRegion mr,
+                     bool init_to_zero);
+
+  // Note: this ought to be part of the constructor, but that would require
+  // "this" to be passed as a parameter to a member constructor for
+  // the containing concrete subtype of Space.
+  // This would be legal C++, but MS VC++ doesn't allow it.
+  void set_space(Space* sp);
+
+  // Resets the covered region to the given "mr".
+  void set_region(MemRegion mr);
+
+  // Resets the covered region to one with the same _bottom as before but
+  // the "new_word_size".
+  void resize(size_t new_word_size);
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".
+  virtual void alloc_block(HeapWord* blk_start, HeapWord* blk_end);
+  virtual void alloc_block(HeapWord* blk, size_t size) {
+    alloc_block(blk, blk + size);
+  }
+
+  // The following methods are useful and optimized for a
+  // general, non-contiguous space.
+
+  // The given arguments are required to be the starts of adjacent ("blk1"
+  // before "blk2") well-formed blocks covered by "this".  After this call,
+  // they should be considered to form one block.
+  virtual void join_blocks(HeapWord* blk1, HeapWord* blk2);
+
+  // Given a block [blk_start, blk_start + full_blk_size), and
+  // a left_blk_size < full_blk_size, adjust the BOT to show two
+  // blocks [blk_start, blk_start + left_blk_size) and
+  // [blk_start + left_blk_size, blk_start + full_blk_size).
+  // It is assumed (and verified in the non-product VM) that the
+  // BOT was correct for the original block.
+  void split_block(HeapWord* blk_start, size_t full_blk_size,
+                           size_t left_blk_size);
+
+  // Adjust the BOT to show that it has a single block in the
+  // range [blk_start, blk_start + size). All necessary BOT
+  // cards are adjusted, but _unallocated_block isn't.
+  void single_block(HeapWord* blk_start, HeapWord* blk_end);
+  void single_block(HeapWord* blk, size_t size) {
+    single_block(blk, blk + size);
+  }
+
+  // Adjust BOT to show that it has a block in the range
+  // [blk_start, blk_start + size). Only the first card
+  // of BOT is touched. It is assumed (and verified in the
+  // non-product VM) that the remaining cards of the block
+  // are correct.
+  void mark_block(HeapWord* blk_start, HeapWord* blk_end);
+  void mark_block(HeapWord* blk, size_t size) {
+    mark_block(blk, blk + size);
+  }
+
+  // Adjust _unallocated_block to indicate that a particular
+  // block has been newly allocated or freed. It is assumed (and
+  // verified in the non-product VM) that the BOT is correct for
+  // the given block.
+  inline void allocated(HeapWord* blk_start, HeapWord* blk_end) {
+    // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+    verify_single_block(blk_start, blk_end);
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      _unallocated_block = MAX2(_unallocated_block, blk_end);
+    }
+  }
+
+  inline void allocated(HeapWord* blk, size_t size) {
+    allocated(blk, blk + size);
+  }
+
+  inline void freed(HeapWord* blk_start, HeapWord* blk_end);
+
+  inline void freed(HeapWord* blk, size_t size);
+
+  virtual HeapWord* block_start_unsafe(const void* addr);
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const;
+
+  // Requires "addr" to be the start of a card and returns the
+  // start of the block that contains the given address.
+  HeapWord* block_start_careful(const void* addr) const;
+
+  // If true, initialize array slots with no allocated blocks to zero.
+  // Otherwise, make them point back to the front.
+  bool init_to_zero() { return _init_to_zero; }
+
+  // Verification & debugging - ensure that the offset table reflects the fact
+  // that the block [blk_start, blk_end) or [blk, blk + size) is a
+  // single block of storage. NOTE: can;t const this because of
+  // call to non-const do_block_internal() below.
+  inline void verify_single_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (VerifyBlockOffsetArray) {
+      do_block_internal(blk_start, blk_end, Action_check);
+    }
+  }
+
+  inline void verify_single_block(HeapWord* blk, size_t size) {
+    verify_single_block(blk, blk + size);
+  }
+
+  // Verify that the given block is before _unallocated_block
+  inline void verify_not_unallocated(HeapWord* blk_start,
+                                     HeapWord* blk_end) const {
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      assert(blk_start < blk_end, "Block inconsistency?");
+      assert(blk_end <= _unallocated_block, "_unallocated_block problem");
+    }
+  }
+
+  inline void verify_not_unallocated(HeapWord* blk, size_t size) const {
+    verify_not_unallocated(blk, blk + size);
+  }
+
+  void check_all_cards(size_t left_card, size_t right_card) const;
+};
+
+// A subtype of BlockOffsetArray that takes advantage of the fact
+// that its underlying space is a ContiguousSpace, so that its "active"
+// region can be more efficiently tracked (than for a non-contiguous space).
+class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray {
+  friend class VMStructs;
+
+  // allocation boundary at which offset array must be updated
+  HeapWord* _next_offset_threshold;
+  size_t    _next_offset_index;      // index corresponding to that boundary
+
+  // Work function to be called when allocation start crosses the next
+  // threshold in the contig space.
+  void alloc_block_work1(HeapWord* blk_start, HeapWord* blk_end) {
+    alloc_block_work2(&_next_offset_threshold, &_next_offset_index,
+                      blk_start, blk_end);
+  }
+
+
+ public:
+  G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr);
+
+  // Initialize the threshold to reflect the first boundary after the
+  // bottom of the covered region.
+  HeapWord* initialize_threshold();
+
+  // Zero out the entry for _bottom (offset will be zero).
+  void      zero_bottom_entry();
+
+  // Return the next threshold, the point at which the table should be
+  // updated.
+  HeapWord* threshold() const { return _next_offset_threshold; }
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".  In this
+  // implementation, that's true because NULL is represented as 0, and thus
+  // never exceeds the "_next_offset_threshold".
+  void alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (blk_end > _next_offset_threshold)
+      alloc_block_work1(blk_start, blk_end);
+  }
+  void alloc_block(HeapWord* blk, size_t size) {
+     alloc_block(blk, blk+size);
+  }
+
+  HeapWord* block_start_unsafe(const void* addr);
+  HeapWord* block_start_unsafe_const(const void* addr) const;
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline HeapWord*
+G1BlockOffsetTable::block_start_const(const void* addr) const {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe_const(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline size_t G1BlockOffsetSharedArray::index_for(const void* p) const {
+  char* pc = (char*)p;
+  assert(pc >= (char*)_reserved.start() &&
+         pc <  (char*)_reserved.end(),
+         "p not in range.");
+  size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char));
+  size_t result = delta >> LogN;
+  assert(result < _vs.committed_size(), "bad index from address");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetSharedArray::address_for_index(size_t index) const {
+  assert(index < _vs.committed_size(), "bad index");
+  HeapWord* result = _reserved.start() + (index << LogN_words);
+  assert(result >= _reserved.start() && result < _reserved.end(),
+         "bad address from index");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::block_at_or_preceding(const void* addr,
+                                          bool has_max_index,
+                                          size_t max_index) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+  size_t index = _array->index_for(addr);
+  // We must make sure that the offset table entry we use is valid.  If
+  // "addr" is past the end, start at the last known one and go forward.
+  if (has_max_index) {
+    index = MIN2(index, max_index);
+  }
+  HeapWord* q = _array->address_for_index(index);
+
+  uint offset = _array->offset_array(index);  // Extend u_char to uint.
+  while (offset >= N_words) {
+    // The excess of the offset from N_words indicates a power of Base
+    // to go back by.
+    size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
+    q -= (N_words * n_cards_back);
+    assert(q >= _sp->bottom(), "Went below bottom!");
+    index -= n_cards_back;
+    offset = _array->offset_array(index);
+  }
+  assert(offset < N_words, "offset too large");
+  q -= offset;
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::
+forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                       const void* addr) const {
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += obj->size();
+    }
+  } else {
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += _sp->block_size(q);
+    }
+  }
+  assert(q <= n, "wrong order for q and addr");
+  assert(addr < n, "wrong order for addr and n");
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
+                                                     const void* addr) {
+  if (oop(q)->klass() == NULL) return q;
+  HeapWord* n = q + _sp->block_size(q);
+  // In the normal case, where the query "addr" is a card boundary, and the
+  // offset table chunks are the same size as cards, the block starting at
+  // "q" will contain addr, so the test below will fail, and we'll fall
+  // through quickly.
+  if (n <= addr) {
+    q = forward_to_block_containing_addr_slow(q, n, addr);
+  }
+  assert(q <= addr, "wrong order for current and arg");
+  return q;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// BlockOffsetArrayNonContigSpace inlines
+//////////////////////////////////////////////////////////////////////////
+inline void G1BlockOffsetArray::freed(HeapWord* blk_start, HeapWord* blk_end) {
+  // Verify that the BOT shows [blk_start, blk_end) to be one block.
+  verify_single_block(blk_start, blk_end);
+  // adjust _unallocated_block upward or downward
+  // as appropriate
+  if (BlockOffsetArrayUseUnallocatedBlock) {
+    assert(_unallocated_block <= _end,
+           "Inconsistent value for _unallocated_block");
+    if (blk_end >= _unallocated_block && blk_start <= _unallocated_block) {
+      // CMS-specific note: a block abutting _unallocated_block to
+      // its left is being freed, a new block is being added or
+      // we are resetting following a compaction
+      _unallocated_block = blk_start;
+    }
+  }
+}
+
+inline void G1BlockOffsetArray::freed(HeapWord* blk, size_t size) {
+  freed(blk, blk + size);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,5497 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1CollectedHeap.cpp.incl"
+
+// turn it on so that the contents of the young list (scan-only /
+// to-be-collected) are printed at "strategic" points before / during
+// / after the collection --- this is useful for debugging
+#define SCAN_ONLY_VERBOSE 0
+// CURRENT STATUS
+// This file is under construction.  Search for "FIXME".
+
+// INVARIANTS/NOTES
+//
+// All allocation activity covered by the G1CollectedHeap interface is
+//   serialized by acquiring the HeapLock.  This happens in
+//   mem_allocate_work, which all such allocation functions call.
+//   (Note that this does not apply to TLAB allocation, which is not part
+//   of this interface: it is done by clients of this interface.)
+
+// Local to this file.
+
+// Finds the first HeapRegion.
+// No longer used, but might be handy someday.
+
+class FindFirstRegionClosure: public HeapRegionClosure {
+  HeapRegion* _a_region;
+public:
+  FindFirstRegionClosure() : _a_region(NULL) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _a_region = r;
+    return true;
+  }
+  HeapRegion* result() { return _a_region; }
+};
+
+
+class RefineCardTableEntryClosure: public CardTableEntryClosure {
+  SuspendibleThreadSet* _sts;
+  G1RemSet* _g1rs;
+  ConcurrentG1Refine* _cg1r;
+  bool _concurrent;
+public:
+  RefineCardTableEntryClosure(SuspendibleThreadSet* sts,
+                              G1RemSet* g1rs,
+                              ConcurrentG1Refine* cg1r) :
+    _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
+  {}
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    _g1rs->concurrentRefineOneCard(card_ptr, worker_i);
+    if (_concurrent && _sts->should_yield()) {
+      // Caller will actually yield.
+      return false;
+    }
+    // Otherwise, we finished successfully; return true.
+    return true;
+  }
+  void set_concurrent(bool b) { _concurrent = b; }
+};
+
+
+class ClearLoggedCardTableEntryClosure: public CardTableEntryClosure {
+  int _calls;
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+  int _histo[256];
+public:
+  ClearLoggedCardTableEntryClosure() :
+    _calls(0)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _ctbs = (CardTableModRefBS*)_g1h->barrier_set();
+    for (int i = 0; i < 256; i++) _histo[i] = 0;
+  }
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
+      _calls++;
+      unsigned char* ujb = (unsigned char*)card_ptr;
+      int ind = (int)(*ujb);
+      _histo[ind]++;
+      *card_ptr = -1;
+    }
+    return true;
+  }
+  int calls() { return _calls; }
+  void print_histo() {
+    gclog_or_tty->print_cr("Card table value histogram:");
+    for (int i = 0; i < 256; i++) {
+      if (_histo[i] != 0) {
+        gclog_or_tty->print_cr("  %d: %d", i, _histo[i]);
+      }
+    }
+  }
+};
+
+class RedirtyLoggedCardTableEntryClosure: public CardTableEntryClosure {
+  int _calls;
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+public:
+  RedirtyLoggedCardTableEntryClosure() :
+    _calls(0)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _ctbs = (CardTableModRefBS*)_g1h->barrier_set();
+  }
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
+      _calls++;
+      *card_ptr = 0;
+    }
+    return true;
+  }
+  int calls() { return _calls; }
+};
+
+YoungList::YoungList(G1CollectedHeap* g1h)
+  : _g1h(g1h), _head(NULL),
+    _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL),
+    _length(0), _scan_only_length(0),
+    _last_sampled_rs_lengths(0),
+    _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0)
+{
+  guarantee( check_list_empty(false), "just making sure..." );
+}
+
+void YoungList::push_region(HeapRegion *hr) {
+  assert(!hr->is_young(), "should not already be young");
+  assert(hr->get_next_young_region() == NULL, "cause it should!");
+
+  hr->set_next_young_region(_head);
+  _head = hr;
+
+  hr->set_young();
+  double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length);
+  ++_length;
+}
+
+void YoungList::add_survivor_region(HeapRegion* hr) {
+  assert(!hr->is_survivor(), "should not already be for survived");
+  assert(hr->get_next_young_region() == NULL, "cause it should!");
+
+  hr->set_next_young_region(_survivor_head);
+  if (_survivor_head == NULL) {
+    _survivors_tail = hr;
+  }
+  _survivor_head = hr;
+
+  hr->set_survivor();
+  ++_survivor_length;
+}
+
+HeapRegion* YoungList::pop_region() {
+  while (_head != NULL) {
+    assert( length() > 0, "list should not be empty" );
+    HeapRegion* ret = _head;
+    _head = ret->get_next_young_region();
+    ret->set_next_young_region(NULL);
+    --_length;
+    assert(ret->is_young(), "region should be very young");
+
+    // Replace 'Survivor' region type with 'Young'. So the region will
+    // be treated as a young region and will not be 'confused' with
+    // newly created survivor regions.
+    if (ret->is_survivor()) {
+      ret->set_young();
+    }
+
+    if (!ret->is_scan_only()) {
+      return ret;
+    }
+
+    // scan-only, we'll add it to the scan-only list
+    if (_scan_only_tail == NULL) {
+      guarantee( _scan_only_head == NULL, "invariant" );
+
+      _scan_only_head = ret;
+      _curr_scan_only = ret;
+    } else {
+      guarantee( _scan_only_head != NULL, "invariant" );
+      _scan_only_tail->set_next_young_region(ret);
+    }
+    guarantee( ret->get_next_young_region() == NULL, "invariant" );
+    _scan_only_tail = ret;
+
+    // no need to be tagged as scan-only any more
+    ret->set_young();
+
+    ++_scan_only_length;
+  }
+  assert( length() == 0, "list should be empty" );
+  return NULL;
+}
+
+void YoungList::empty_list(HeapRegion* list) {
+  while (list != NULL) {
+    HeapRegion* next = list->get_next_young_region();
+    list->set_next_young_region(NULL);
+    list->uninstall_surv_rate_group();
+    list->set_not_young();
+    list = next;
+  }
+}
+
+void YoungList::empty_list() {
+  assert(check_list_well_formed(), "young list should be well formed");
+
+  empty_list(_head);
+  _head = NULL;
+  _length = 0;
+
+  empty_list(_scan_only_head);
+  _scan_only_head = NULL;
+  _scan_only_tail = NULL;
+  _scan_only_length = 0;
+  _curr_scan_only = NULL;
+
+  empty_list(_survivor_head);
+  _survivor_head = NULL;
+  _survivors_tail = NULL;
+  _survivor_length = 0;
+
+  _last_sampled_rs_lengths = 0;
+
+  assert(check_list_empty(false), "just making sure...");
+}
+
+bool YoungList::check_list_well_formed() {
+  bool ret = true;
+
+  size_t length = 0;
+  HeapRegion* curr = _head;
+  HeapRegion* last = NULL;
+  while (curr != NULL) {
+    if (!curr->is_young() || curr->is_scan_only()) {
+      gclog_or_tty->print_cr("### YOUNG REGION "PTR_FORMAT"-"PTR_FORMAT" "
+                             "incorrectly tagged (%d, %d)",
+                             curr->bottom(), curr->end(),
+                             curr->is_young(), curr->is_scan_only());
+      ret = false;
+    }
+    ++length;
+    last = curr;
+    curr = curr->get_next_young_region();
+  }
+  ret = ret && (length == _length);
+
+  if (!ret) {
+    gclog_or_tty->print_cr("### YOUNG LIST seems not well formed!");
+    gclog_or_tty->print_cr("###   list has %d entries, _length is %d",
+                           length, _length);
+  }
+
+  bool scan_only_ret = true;
+  length = 0;
+  curr = _scan_only_head;
+  last = NULL;
+  while (curr != NULL) {
+    if (!curr->is_young() || curr->is_scan_only()) {
+      gclog_or_tty->print_cr("### SCAN-ONLY REGION "PTR_FORMAT"-"PTR_FORMAT" "
+                             "incorrectly tagged (%d, %d)",
+                             curr->bottom(), curr->end(),
+                             curr->is_young(), curr->is_scan_only());
+      scan_only_ret = false;
+    }
+    ++length;
+    last = curr;
+    curr = curr->get_next_young_region();
+  }
+  scan_only_ret = scan_only_ret && (length == _scan_only_length);
+
+  if ( (last != _scan_only_tail) ||
+       (_scan_only_head == NULL && _scan_only_tail != NULL) ||
+       (_scan_only_head != NULL && _scan_only_tail == NULL) ) {
+     gclog_or_tty->print_cr("## _scan_only_tail is set incorrectly");
+     scan_only_ret = false;
+  }
+
+  if (_curr_scan_only != NULL && _curr_scan_only != _scan_only_head) {
+    gclog_or_tty->print_cr("### _curr_scan_only is set incorrectly");
+    scan_only_ret = false;
+   }
+
+  if (!scan_only_ret) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST seems not well formed!");
+    gclog_or_tty->print_cr("###   list has %d entries, _scan_only_length is %d",
+                  length, _scan_only_length);
+  }
+
+  return ret && scan_only_ret;
+}
+
+bool YoungList::check_list_empty(bool ignore_scan_only_list,
+                                 bool check_sample) {
+  bool ret = true;
+
+  if (_length != 0) {
+    gclog_or_tty->print_cr("### YOUNG LIST should have 0 length, not %d",
+                  _length);
+    ret = false;
+  }
+  if (check_sample && _last_sampled_rs_lengths != 0) {
+    gclog_or_tty->print_cr("### YOUNG LIST has non-zero last sampled RS lengths");
+    ret = false;
+  }
+  if (_head != NULL) {
+    gclog_or_tty->print_cr("### YOUNG LIST does not have a NULL head");
+    ret = false;
+  }
+  if (!ret) {
+    gclog_or_tty->print_cr("### YOUNG LIST does not seem empty");
+  }
+
+  if (ignore_scan_only_list)
+    return ret;
+
+  bool scan_only_ret = true;
+  if (_scan_only_length != 0) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST should have 0 length, not %d",
+                  _scan_only_length);
+    scan_only_ret = false;
+  }
+  if (_scan_only_head != NULL) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL head");
+     scan_only_ret = false;
+  }
+  if (_scan_only_tail != NULL) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL tail");
+    scan_only_ret = false;
+  }
+  if (!scan_only_ret) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not seem empty");
+  }
+
+  return ret && scan_only_ret;
+}
+
+void
+YoungList::rs_length_sampling_init() {
+  _sampled_rs_lengths = 0;
+  _curr               = _head;
+}
+
+bool
+YoungList::rs_length_sampling_more() {
+  return _curr != NULL;
+}
+
+void
+YoungList::rs_length_sampling_next() {
+  assert( _curr != NULL, "invariant" );
+  _sampled_rs_lengths += _curr->rem_set()->occupied();
+  _curr = _curr->get_next_young_region();
+  if (_curr == NULL) {
+    _last_sampled_rs_lengths = _sampled_rs_lengths;
+    // gclog_or_tty->print_cr("last sampled RS lengths = %d", _last_sampled_rs_lengths);
+  }
+}
+
+void
+YoungList::reset_auxilary_lists() {
+  // We could have just "moved" the scan-only list to the young list.
+  // However, the scan-only list is ordered according to the region
+  // age in descending order, so, by moving one entry at a time, we
+  // ensure that it is recreated in ascending order.
+
+  guarantee( is_empty(), "young list should be empty" );
+  assert(check_list_well_formed(), "young list should be well formed");
+
+  // Add survivor regions to SurvRateGroup.
+  _g1h->g1_policy()->note_start_adding_survivor_regions();
+  for (HeapRegion* curr = _survivor_head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    _g1h->g1_policy()->set_region_survivors(curr);
+  }
+  _g1h->g1_policy()->note_stop_adding_survivor_regions();
+
+  if (_survivor_head != NULL) {
+    _head           = _survivor_head;
+    _length         = _survivor_length + _scan_only_length;
+    _survivors_tail->set_next_young_region(_scan_only_head);
+  } else {
+    _head           = _scan_only_head;
+    _length         = _scan_only_length;
+  }
+
+  for (HeapRegion* curr = _scan_only_head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    curr->recalculate_age_in_surv_rate_group();
+  }
+  _scan_only_head   = NULL;
+  _scan_only_tail   = NULL;
+  _scan_only_length = 0;
+  _curr_scan_only   = NULL;
+
+  _survivor_head    = NULL;
+  _survivors_tail   = NULL;
+  _survivor_length  = 0;
+  _g1h->g1_policy()->finished_recalculating_age_indexes();
+
+  assert(check_list_well_formed(), "young list should be well formed");
+}
+
+void YoungList::print() {
+  HeapRegion* lists[] = {_head,   _scan_only_head, _survivor_head};
+  const char* names[] = {"YOUNG", "SCAN-ONLY",     "SURVIVOR"};
+
+  for (unsigned int list = 0; list < ARRAY_SIZE(lists); ++list) {
+    gclog_or_tty->print_cr("%s LIST CONTENTS", names[list]);
+    HeapRegion *curr = lists[list];
+    if (curr == NULL)
+      gclog_or_tty->print_cr("  empty");
+    while (curr != NULL) {
+      gclog_or_tty->print_cr("  [%08x-%08x], t: %08x, P: %08x, N: %08x, C: %08x, "
+                             "age: %4d, y: %d, s-o: %d, surv: %d",
+                             curr->bottom(), curr->end(),
+                             curr->top(),
+                             curr->prev_top_at_mark_start(),
+                             curr->next_top_at_mark_start(),
+                             curr->top_at_conc_mark_count(),
+                             curr->age_in_surv_rate_group_cond(),
+                             curr->is_young(),
+                             curr->is_scan_only(),
+                             curr->is_survivor());
+      curr = curr->get_next_young_region();
+    }
+  }
+
+  gclog_or_tty->print_cr("");
+}
+
+void G1CollectedHeap::stop_conc_gc_threads() {
+  _cg1r->cg1rThread()->stop();
+  _czft->stop();
+  _cmThread->stop();
+}
+
+
+void G1CollectedHeap::check_ct_logs_at_safepoint() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set();
+
+  // Count the dirty cards at the start.
+  CountNonCleanMemRegionClosure count1(this);
+  ct_bs->mod_card_iterate(&count1);
+  int orig_count = count1.n();
+
+  // First clear the logged cards.
+  ClearLoggedCardTableEntryClosure clear;
+  dcqs.set_closure(&clear);
+  dcqs.apply_closure_to_all_completed_buffers();
+  dcqs.iterate_closure_all_threads(false);
+  clear.print_histo();
+
+  // Now ensure that there's no dirty cards.
+  CountNonCleanMemRegionClosure count2(this);
+  ct_bs->mod_card_iterate(&count2);
+  if (count2.n() != 0) {
+    gclog_or_tty->print_cr("Card table has %d entries; %d originally",
+                           count2.n(), orig_count);
+  }
+  guarantee(count2.n() == 0, "Card table should be clean.");
+
+  RedirtyLoggedCardTableEntryClosure redirty;
+  JavaThread::dirty_card_queue_set().set_closure(&redirty);
+  dcqs.apply_closure_to_all_completed_buffers();
+  dcqs.iterate_closure_all_threads(false);
+  gclog_or_tty->print_cr("Log entries = %d, dirty cards = %d.",
+                         clear.calls(), orig_count);
+  guarantee(redirty.calls() == clear.calls(),
+            "Or else mechanism is broken.");
+
+  CountNonCleanMemRegionClosure count3(this);
+  ct_bs->mod_card_iterate(&count3);
+  if (count3.n() != orig_count) {
+    gclog_or_tty->print_cr("Should have restored them all: orig = %d, final = %d.",
+                           orig_count, count3.n());
+    guarantee(count3.n() >= orig_count, "Should have restored them all.");
+  }
+
+  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
+}
+
+// Private class members.
+
+G1CollectedHeap* G1CollectedHeap::_g1h;
+
+// Private methods.
+
+// Finds a HeapRegion that can be used to allocate a given size of block.
+
+
+HeapRegion* G1CollectedHeap::newAllocRegion_work(size_t word_size,
+                                                 bool do_expand,
+                                                 bool zero_filled) {
+  ConcurrentZFThread::note_region_alloc();
+  HeapRegion* res = alloc_free_region_from_lists(zero_filled);
+  if (res == NULL && do_expand) {
+    expand(word_size * HeapWordSize);
+    res = alloc_free_region_from_lists(zero_filled);
+    assert(res == NULL ||
+           (!res->isHumongous() &&
+            (!zero_filled ||
+             res->zero_fill_state() == HeapRegion::Allocated)),
+           "Alloc Regions must be zero filled (and non-H)");
+  }
+  if (res != NULL && res->is_empty()) _free_regions--;
+  assert(res == NULL ||
+         (!res->isHumongous() &&
+          (!zero_filled ||
+           res->zero_fill_state() == HeapRegion::Allocated)),
+         "Non-young alloc Regions must be zero filled (and non-H)");
+
+  if (G1TraceRegions) {
+    if (res != NULL) {
+      gclog_or_tty->print_cr("new alloc region %d:["PTR_FORMAT", "PTR_FORMAT"], "
+                             "top "PTR_FORMAT,
+                             res->hrs_index(), res->bottom(), res->end(), res->top());
+    }
+  }
+
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::newAllocRegionWithExpansion(int purpose,
+                                                         size_t word_size,
+                                                         bool zero_filled) {
+  HeapRegion* alloc_region = NULL;
+  if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
+    alloc_region = newAllocRegion_work(word_size, true, zero_filled);
+    if (purpose == GCAllocForSurvived && alloc_region != NULL) {
+      _young_list->add_survivor_region(alloc_region);
+    }
+    ++_gc_alloc_region_counts[purpose];
+  } else {
+    g1_policy()->note_alloc_region_limit_reached(purpose);
+  }
+  return alloc_region;
+}
+
+// If could fit into free regions w/o expansion, try.
+// Otherwise, if can expand, do so.
+// Otherwise, if using ex regions might help, try with ex given back.
+HeapWord* G1CollectedHeap::humongousObjAllocate(size_t word_size) {
+  assert(regions_accounted_for(), "Region leakage!");
+
+  // We can't allocate H regions while cleanupComplete is running, since
+  // some of the regions we find to be empty might not yet be added to the
+  // unclean list.  (If we're already at a safepoint, this call is
+  // unnecessary, not to mention wrong.)
+  if (!SafepointSynchronize::is_at_safepoint())
+    wait_for_cleanup_complete();
+
+  size_t num_regions =
+    round_to(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords;
+
+  // Special case if < one region???
+
+  // Remember the ft size.
+  size_t x_size = expansion_regions();
+
+  HeapWord* res = NULL;
+  bool eliminated_allocated_from_lists = false;
+
+  // Can the allocation potentially fit in the free regions?
+  if (free_regions() >= num_regions) {
+    res = _hrs->obj_allocate(word_size);
+  }
+  if (res == NULL) {
+    // Try expansion.
+    size_t fs = _hrs->free_suffix();
+    if (fs + x_size >= num_regions) {
+      expand((num_regions - fs) * HeapRegion::GrainBytes);
+      res = _hrs->obj_allocate(word_size);
+      assert(res != NULL, "This should have worked.");
+    } else {
+      // Expansion won't help.  Are there enough free regions if we get rid
+      // of reservations?
+      size_t avail = free_regions();
+      if (avail >= num_regions) {
+        res = _hrs->obj_allocate(word_size);
+        if (res != NULL) {
+          remove_allocated_regions_from_lists();
+          eliminated_allocated_from_lists = true;
+        }
+      }
+    }
+  }
+  if (res != NULL) {
+    // Increment by the number of regions allocated.
+    // FIXME: Assumes regions all of size GrainBytes.
+#ifndef PRODUCT
+    mr_bs()->verify_clean_region(MemRegion(res, res + num_regions *
+                                           HeapRegion::GrainWords));
+#endif
+    if (!eliminated_allocated_from_lists)
+      remove_allocated_regions_from_lists();
+    _summary_bytes_used += word_size * HeapWordSize;
+    _free_regions -= num_regions;
+    _num_humongous_regions += (int) num_regions;
+  }
+  assert(regions_accounted_for(), "Region Leakage");
+  return res;
+}
+
+HeapWord*
+G1CollectedHeap::attempt_allocation_slow(size_t word_size,
+                                         bool permit_collection_pause) {
+  HeapWord* res = NULL;
+  HeapRegion* allocated_young_region = NULL;
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          Heap_lock->owned_by_self(), "pre condition of the call" );
+
+  if (isHumongous(word_size)) {
+    // Allocation of a humongous object can, in a sense, complete a
+    // partial region, if the previous alloc was also humongous, and
+    // caused the test below to succeed.
+    if (permit_collection_pause)
+      do_collection_pause_if_appropriate(word_size);
+    res = humongousObjAllocate(word_size);
+    assert(_cur_alloc_region == NULL
+           || !_cur_alloc_region->isHumongous(),
+           "Prevent a regression of this bug.");
+
+  } else {
+    // We may have concurrent cleanup working at the time. Wait for it
+    // to complete. In the future we would probably want to make the
+    // concurrent cleanup truly concurrent by decoupling it from the
+    // allocation.
+    if (!SafepointSynchronize::is_at_safepoint())
+      wait_for_cleanup_complete();
+    // If we do a collection pause, this will be reset to a non-NULL
+    // value.  If we don't, nulling here ensures that we allocate a new
+    // region below.
+    if (_cur_alloc_region != NULL) {
+      // We're finished with the _cur_alloc_region.
+      _summary_bytes_used += _cur_alloc_region->used();
+      _cur_alloc_region = NULL;
+    }
+    assert(_cur_alloc_region == NULL, "Invariant.");
+    // Completion of a heap region is perhaps a good point at which to do
+    // a collection pause.
+    if (permit_collection_pause)
+      do_collection_pause_if_appropriate(word_size);
+    // Make sure we have an allocation region available.
+    if (_cur_alloc_region == NULL) {
+      if (!SafepointSynchronize::is_at_safepoint())
+        wait_for_cleanup_complete();
+      bool next_is_young = should_set_young_locked();
+      // If the next region is not young, make sure it's zero-filled.
+      _cur_alloc_region = newAllocRegion(word_size, !next_is_young);
+      if (_cur_alloc_region != NULL) {
+        _summary_bytes_used -= _cur_alloc_region->used();
+        if (next_is_young) {
+          set_region_short_lived_locked(_cur_alloc_region);
+          allocated_young_region = _cur_alloc_region;
+        }
+      }
+    }
+    assert(_cur_alloc_region == NULL || !_cur_alloc_region->isHumongous(),
+           "Prevent a regression of this bug.");
+
+    // Now retry the allocation.
+    if (_cur_alloc_region != NULL) {
+      res = _cur_alloc_region->allocate(word_size);
+    }
+  }
+
+  // NOTE: fails frequently in PRT
+  assert(regions_accounted_for(), "Region leakage!");
+
+  if (res != NULL) {
+    if (!SafepointSynchronize::is_at_safepoint()) {
+      assert( permit_collection_pause, "invariant" );
+      assert( Heap_lock->owned_by_self(), "invariant" );
+      Heap_lock->unlock();
+    }
+
+    if (allocated_young_region != NULL) {
+      HeapRegion* hr = allocated_young_region;
+      HeapWord* bottom = hr->bottom();
+      HeapWord* end = hr->end();
+      MemRegion mr(bottom, end);
+      ((CardTableModRefBS*)_g1h->barrier_set())->dirty(mr);
+    }
+  }
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          (res == NULL && Heap_lock->owned_by_self()) ||
+          (res != NULL && !Heap_lock->owned_by_self()),
+          "post condition of the call" );
+
+  return res;
+}
+
+HeapWord*
+G1CollectedHeap::mem_allocate(size_t word_size,
+                              bool   is_noref,
+                              bool   is_tlab,
+                              bool* gc_overhead_limit_was_exceeded) {
+  debug_only(check_for_valid_allocation_state());
+  assert(no_gc_in_progress(), "Allocation during gc not allowed");
+  HeapWord* result = NULL;
+
+  // Loop until the allocation is satisified,
+  // or unsatisfied after GC.
+  for (int try_count = 1; /* return or throw */; try_count += 1) {
+    int gc_count_before;
+    {
+      Heap_lock->lock();
+      result = attempt_allocation(word_size);
+      if (result != NULL) {
+        // attempt_allocation should have unlocked the heap lock
+        assert(is_in(result), "result not in heap");
+        return result;
+      }
+      // Read the gc count while the heap lock is held.
+      gc_count_before = SharedHeap::heap()->total_collections();
+      Heap_lock->unlock();
+    }
+
+    // Create the garbage collection operation...
+    VM_G1CollectForAllocation op(word_size,
+                                 gc_count_before);
+
+    // ...and get the VM thread to execute it.
+    VMThread::execute(&op);
+    if (op.prologue_succeeded()) {
+      result = op.result();
+      assert(result == NULL || is_in(result), "result not in heap");
+      return result;
+    }
+
+    // Give a warning if we seem to be looping forever.
+    if ((QueuedAllocationWarningCount > 0) &&
+        (try_count % QueuedAllocationWarningCount == 0)) {
+      warning("G1CollectedHeap::mem_allocate_work retries %d times",
+              try_count);
+    }
+  }
+}
+
+void G1CollectedHeap::abandon_cur_alloc_region() {
+  if (_cur_alloc_region != NULL) {
+    // We're finished with the _cur_alloc_region.
+    if (_cur_alloc_region->is_empty()) {
+      _free_regions++;
+      free_region(_cur_alloc_region);
+    } else {
+      _summary_bytes_used += _cur_alloc_region->used();
+    }
+    _cur_alloc_region = NULL;
+  }
+}
+
+class PostMCRemSetClearClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mr_bs;
+public:
+  PostMCRemSetClearClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
+  bool doHeapRegion(HeapRegion* r) {
+    r->reset_gc_time_stamp();
+    if (r->continuesHumongous())
+      return false;
+    HeapRegionRemSet* hrrs = r->rem_set();
+    if (hrrs != NULL) hrrs->clear();
+    // You might think here that we could clear just the cards
+    // corresponding to the used region.  But no: if we leave a dirty card
+    // in a region we might allocate into, then it would prevent that card
+    // from being enqueued, and cause it to be missed.
+    // Re: the performance cost: we shouldn't be doing full GC anyway!
+    _mr_bs->clear(MemRegion(r->bottom(), r->end()));
+    return false;
+  }
+};
+
+
+class PostMCRemSetInvalidateClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mr_bs;
+public:
+  PostMCRemSetInvalidateClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->used_region().word_size() != 0) {
+      _mr_bs->invalidate(r->used_region(), true /*whole heap*/);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
+                                    size_t word_size) {
+  ResourceMark rm;
+
+  if (full && DisableExplicitGC) {
+    gclog_or_tty->print("\n\n\nDisabling Explicit GC\n\n\n");
+    return;
+  }
+
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
+  assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
+
+  if (GC_locker::is_active()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
+  {
+    IsGCActiveMark x;
+
+    // Timing
+    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+    TraceTime t(full ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty);
+
+    double start = os::elapsedTime();
+    GCOverheadReporter::recordSTWStart(start);
+    g1_policy()->record_full_collection_start();
+
+    gc_prologue(true);
+    increment_total_collections();
+
+    size_t g1h_prev_used = used();
+    assert(used() == recalculate_used(), "Should be equal");
+
+    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      prepare_for_verify();
+      gclog_or_tty->print(" VerifyBeforeGC:");
+      Universe::verify(true);
+    }
+    assert(regions_accounted_for(), "Region leakage!");
+
+    COMPILER2_PRESENT(DerivedPointerTable::clear());
+
+    // We want to discover references, but not process them yet.
+    // This mode is disabled in
+    // instanceRefKlass::process_discovered_references if the
+    // generation does some collection work, or
+    // instanceRefKlass::enqueue_discovered_references if the
+    // generation returns without doing any work.
+    ref_processor()->disable_discovery();
+    ref_processor()->abandon_partial_discovery();
+    ref_processor()->verify_no_references_recorded();
+
+    // Abandon current iterations of concurrent marking and concurrent
+    // refinement, if any are in progress.
+    concurrent_mark()->abort();
+
+    // Make sure we'll choose a new allocation region afterwards.
+    abandon_cur_alloc_region();
+    assert(_cur_alloc_region == NULL, "Invariant.");
+    g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS();
+    tear_down_region_lists();
+    set_used_regions_to_need_zero_fill();
+    if (g1_policy()->in_young_gc_mode()) {
+      empty_young_list();
+      g1_policy()->set_full_young_gcs(true);
+    }
+
+    // Temporarily make reference _discovery_ single threaded (non-MT).
+    ReferenceProcessorMTMutator rp_disc_ser(ref_processor(), false);
+
+    // Temporarily make refs discovery atomic
+    ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true);
+
+    // Temporarily clear _is_alive_non_header
+    ReferenceProcessorIsAliveMutator rp_is_alive_null(ref_processor(), NULL);
+
+    ref_processor()->enable_discovery();
+
+    // Do collection work
+    {
+      HandleMark hm;  // Discard invalid handles created during gc
+      G1MarkSweep::invoke_at_safepoint(ref_processor(), clear_all_soft_refs);
+    }
+    // Because freeing humongous regions may have added some unclean
+    // regions, it is necessary to tear down again before rebuilding.
+    tear_down_region_lists();
+    rebuild_region_lists();
+
+    _summary_bytes_used = recalculate_used();
+
+    ref_processor()->enqueue_discovered_references();
+
+    COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+
+    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      gclog_or_tty->print(" VerifyAfterGC:");
+      Universe::verify(false);
+    }
+    NOT_PRODUCT(ref_processor()->verify_no_references_recorded());
+
+    reset_gc_time_stamp();
+    // Since everything potentially moved, we will clear all remembered
+    // sets, and clear all cards.  Later we will also cards in the used
+    // portion of the heap after the resizing (which could be a shrinking.)
+    // We will also reset the GC time stamps of the regions.
+    PostMCRemSetClearClosure rs_clear(mr_bs());
+    heap_region_iterate(&rs_clear);
+
+    // Resize the heap if necessary.
+    resize_if_necessary_after_full_collection(full ? 0 : word_size);
+
+    // Since everything potentially moved, we will clear all remembered
+    // sets, but also dirty all cards corresponding to used regions.
+    PostMCRemSetInvalidateClosure rs_invalidate(mr_bs());
+    heap_region_iterate(&rs_invalidate);
+    if (_cg1r->use_cache()) {
+      _cg1r->clear_and_record_card_counts();
+      _cg1r->clear_hot_cache();
+    }
+
+    if (PrintGC) {
+      print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity());
+    }
+
+    if (true) { // FIXME
+      // Ask the permanent generation to adjust size for full collections
+      perm()->compute_new_size();
+    }
+
+    double end = os::elapsedTime();
+    GCOverheadReporter::recordSTWEnd(end);
+    g1_policy()->record_full_collection_end();
+
+    gc_epilogue(true);
+
+    // Abandon concurrent refinement.  This must happen last: in the
+    // dirty-card logging system, some cards may be dirty by weak-ref
+    // processing, and may be enqueued.  But the whole card table is
+    // dirtied, so this should abandon those logs, and set "do_traversal"
+    // to true.
+    concurrent_g1_refine()->set_pya_restart();
+
+    assert(regions_accounted_for(), "Region leakage!");
+  }
+
+  if (g1_policy()->in_young_gc_mode()) {
+    _young_list->reset_sampled_info();
+    assert( check_young_list_empty(false, false),
+            "young list should be empty at this point");
+  }
+}
+
+void G1CollectedHeap::do_full_collection(bool clear_all_soft_refs) {
+  do_collection(true, clear_all_soft_refs, 0);
+}
+
+// This code is mostly copied from TenuredGeneration.
+void
+G1CollectedHeap::
+resize_if_necessary_after_full_collection(size_t word_size) {
+  assert(MinHeapFreeRatio <= MaxHeapFreeRatio, "sanity check");
+
+  // Include the current allocation, if any, and bytes that will be
+  // pre-allocated to support collections, as "used".
+  const size_t used_after_gc = used();
+  const size_t capacity_after_gc = capacity();
+  const size_t free_after_gc = capacity_after_gc - used_after_gc;
+
+  // We don't have floating point command-line arguments
+  const double minimum_free_percentage = (double) MinHeapFreeRatio / 100;
+  const double maximum_used_percentage = 1.0 - minimum_free_percentage;
+  const double maximum_free_percentage = (double) MaxHeapFreeRatio / 100;
+  const double minimum_used_percentage = 1.0 - maximum_free_percentage;
+
+  size_t minimum_desired_capacity = (size_t) (used_after_gc / maximum_used_percentage);
+  size_t maximum_desired_capacity = (size_t) (used_after_gc / minimum_used_percentage);
+
+  // Don't shrink less than the initial size.
+  minimum_desired_capacity =
+    MAX2(minimum_desired_capacity,
+         collector_policy()->initial_heap_byte_size());
+  maximum_desired_capacity =
+    MAX2(maximum_desired_capacity,
+         collector_policy()->initial_heap_byte_size());
+
+  // We are failing here because minimum_desired_capacity is
+  assert(used_after_gc <= minimum_desired_capacity, "sanity check");
+  assert(minimum_desired_capacity <= maximum_desired_capacity, "sanity check");
+
+  if (PrintGC && Verbose) {
+    const double free_percentage = ((double)free_after_gc) / capacity();
+    gclog_or_tty->print_cr("Computing new size after full GC ");
+    gclog_or_tty->print_cr("  "
+                           "  minimum_free_percentage: %6.2f",
+                           minimum_free_percentage);
+    gclog_or_tty->print_cr("  "
+                           "  maximum_free_percentage: %6.2f",
+                           maximum_free_percentage);
+    gclog_or_tty->print_cr("  "
+                           "  capacity: %6.1fK"
+                           "  minimum_desired_capacity: %6.1fK"
+                           "  maximum_desired_capacity: %6.1fK",
+                           capacity() / (double) K,
+                           minimum_desired_capacity / (double) K,
+                           maximum_desired_capacity / (double) K);
+    gclog_or_tty->print_cr("  "
+                           "   free_after_gc   : %6.1fK"
+                           "   used_after_gc   : %6.1fK",
+                           free_after_gc / (double) K,
+                           used_after_gc / (double) K);
+    gclog_or_tty->print_cr("  "
+                           "   free_percentage: %6.2f",
+                           free_percentage);
+  }
+  if (capacity() < minimum_desired_capacity) {
+    // Don't expand unless it's significant
+    size_t expand_bytes = minimum_desired_capacity - capacity_after_gc;
+    expand(expand_bytes);
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("    expanding:"
+                             "  minimum_desired_capacity: %6.1fK"
+                             "  expand_bytes: %6.1fK",
+                             minimum_desired_capacity / (double) K,
+                             expand_bytes / (double) K);
+    }
+
+    // No expansion, now see if we want to shrink
+  } else if (capacity() > maximum_desired_capacity) {
+    // Capacity too large, compute shrinking size
+    size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity;
+    shrink(shrink_bytes);
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("  "
+                             "  shrinking:"
+                             "  initSize: %.1fK"
+                             "  maximum_desired_capacity: %.1fK",
+                             collector_policy()->initial_heap_byte_size() / (double) K,
+                             maximum_desired_capacity / (double) K);
+      gclog_or_tty->print_cr("  "
+                             "  shrink_bytes: %.1fK",
+                             shrink_bytes / (double) K);
+    }
+  }
+}
+
+
+HeapWord*
+G1CollectedHeap::satisfy_failed_allocation(size_t word_size) {
+  HeapWord* result = NULL;
+
+  // In a G1 heap, we're supposed to keep allocation from failing by
+  // incremental pauses.  Therefore, at least for now, we'll favor
+  // expansion over collection.  (This might change in the future if we can
+  // do something smarter than full collection to satisfy a failed alloc.)
+
+  result = expand_and_allocate(word_size);
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // OK, I guess we have to try collection.
+
+  do_collection(false, false, word_size);
+
+  result = attempt_allocation(word_size, /*permit_collection_pause*/false);
+
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // Try collecting soft references.
+  do_collection(false, true, word_size);
+  result = attempt_allocation(word_size, /*permit_collection_pause*/false);
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // What else?  We might try synchronous finalization later.  If the total
+  // space available is large enough for the allocation, then a more
+  // complete compaction phase than we've tried so far might be
+  // appropriate.
+  return NULL;
+}
+
+// Attempting to expand the heap sufficiently
+// to support an allocation of the given "word_size".  If
+// successful, perform the allocation and return the address of the
+// allocated block, or else "NULL".
+
+HeapWord* G1CollectedHeap::expand_and_allocate(size_t word_size) {
+  size_t expand_bytes = word_size * HeapWordSize;
+  if (expand_bytes < MinHeapDeltaBytes) {
+    expand_bytes = MinHeapDeltaBytes;
+  }
+  expand(expand_bytes);
+  assert(regions_accounted_for(), "Region leakage!");
+  HeapWord* result = attempt_allocation(word_size, false /* permit_collection_pause */);
+  return result;
+}
+
+size_t G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr) {
+  size_t pre_used = 0;
+  size_t cleared_h_regions = 0;
+  size_t freed_regions = 0;
+  UncleanRegionList local_list;
+  free_region_if_totally_empty_work(hr, pre_used, cleared_h_regions,
+                                    freed_regions, &local_list);
+
+  finish_free_region_work(pre_used, cleared_h_regions, freed_regions,
+                          &local_list);
+  return pre_used;
+}
+
+void
+G1CollectedHeap::free_region_if_totally_empty_work(HeapRegion* hr,
+                                                   size_t& pre_used,
+                                                   size_t& cleared_h,
+                                                   size_t& freed_regions,
+                                                   UncleanRegionList* list,
+                                                   bool par) {
+  assert(!hr->continuesHumongous(), "should have filtered these out");
+  size_t res = 0;
+  if (!hr->popular() && hr->used() > 0 && hr->garbage_bytes() == hr->used()) {
+    if (!hr->is_young()) {
+      if (G1PolicyVerbose > 0)
+        gclog_or_tty->print_cr("Freeing empty region "PTR_FORMAT "(" SIZE_FORMAT " bytes)"
+                               " during cleanup", hr, hr->used());
+      free_region_work(hr, pre_used, cleared_h, freed_regions, list, par);
+    }
+  }
+}
+
+// FIXME: both this and shrink could probably be more efficient by
+// doing one "VirtualSpace::expand_by" call rather than several.
+void G1CollectedHeap::expand(size_t expand_bytes) {
+  size_t old_mem_size = _g1_storage.committed_size();
+  // We expand by a minimum of 1K.
+  expand_bytes = MAX2(expand_bytes, (size_t)K);
+  size_t aligned_expand_bytes =
+    ReservedSpace::page_align_size_up(expand_bytes);
+  aligned_expand_bytes = align_size_up(aligned_expand_bytes,
+                                       HeapRegion::GrainBytes);
+  expand_bytes = aligned_expand_bytes;
+  while (expand_bytes > 0) {
+    HeapWord* base = (HeapWord*)_g1_storage.high();
+    // Commit more storage.
+    bool successful = _g1_storage.expand_by(HeapRegion::GrainBytes);
+    if (!successful) {
+        expand_bytes = 0;
+    } else {
+      expand_bytes -= HeapRegion::GrainBytes;
+      // Expand the committed region.
+      HeapWord* high = (HeapWord*) _g1_storage.high();
+      _g1_committed.set_end(high);
+      // Create a new HeapRegion.
+      MemRegion mr(base, high);
+      bool is_zeroed = !_g1_max_committed.contains(base);
+      HeapRegion* hr = new HeapRegion(_bot_shared, mr, is_zeroed);
+
+      // Now update max_committed if necessary.
+      _g1_max_committed.set_end(MAX2(_g1_max_committed.end(), high));
+
+      // Add it to the HeapRegionSeq.
+      _hrs->insert(hr);
+      // Set the zero-fill state, according to whether it's already
+      // zeroed.
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        if (is_zeroed) {
+          hr->set_zero_fill_complete();
+          put_free_region_on_list_locked(hr);
+        } else {
+          hr->set_zero_fill_needed();
+          put_region_on_unclean_list_locked(hr);
+        }
+      }
+      _free_regions++;
+      // And we used up an expansion region to create it.
+      _expansion_regions--;
+      // Tell the cardtable about it.
+      Universe::heap()->barrier_set()->resize_covered_region(_g1_committed);
+      // And the offset table as well.
+      _bot_shared->resize(_g1_committed.word_size());
+    }
+  }
+  if (Verbose && PrintGC) {
+    size_t new_mem_size = _g1_storage.committed_size();
+    gclog_or_tty->print_cr("Expanding garbage-first heap from %ldK by %ldK to %ldK",
+                           old_mem_size/K, aligned_expand_bytes/K,
+                           new_mem_size/K);
+  }
+}
+
+void G1CollectedHeap::shrink_helper(size_t shrink_bytes)
+{
+  size_t old_mem_size = _g1_storage.committed_size();
+  size_t aligned_shrink_bytes =
+    ReservedSpace::page_align_size_down(shrink_bytes);
+  aligned_shrink_bytes = align_size_down(aligned_shrink_bytes,
+                                         HeapRegion::GrainBytes);
+  size_t num_regions_deleted = 0;
+  MemRegion mr = _hrs->shrink_by(aligned_shrink_bytes, num_regions_deleted);
+
+  assert(mr.end() == (HeapWord*)_g1_storage.high(), "Bad shrink!");
+  if (mr.byte_size() > 0)
+    _g1_storage.shrink_by(mr.byte_size());
+  assert(mr.start() == (HeapWord*)_g1_storage.high(), "Bad shrink!");
+
+  _g1_committed.set_end(mr.start());
+  _free_regions -= num_regions_deleted;
+  _expansion_regions += num_regions_deleted;
+
+  // Tell the cardtable about it.
+  Universe::heap()->barrier_set()->resize_covered_region(_g1_committed);
+
+  // And the offset table as well.
+  _bot_shared->resize(_g1_committed.word_size());
+
+  HeapRegionRemSet::shrink_heap(n_regions());
+
+  if (Verbose && PrintGC) {
+    size_t new_mem_size = _g1_storage.committed_size();
+    gclog_or_tty->print_cr("Shrinking garbage-first heap from %ldK by %ldK to %ldK",
+                           old_mem_size/K, aligned_shrink_bytes/K,
+                           new_mem_size/K);
+  }
+}
+
+void G1CollectedHeap::shrink(size_t shrink_bytes) {
+  release_gc_alloc_regions();
+  tear_down_region_lists();  // We will rebuild them in a moment.
+  shrink_helper(shrink_bytes);
+  rebuild_region_lists();
+}
+
+// Public methods.
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
+  SharedHeap(policy_),
+  _g1_policy(policy_),
+  _ref_processor(NULL),
+  _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
+  _bot_shared(NULL),
+  _par_alloc_during_gc_lock(Mutex::leaf, "par alloc during GC lock"),
+  _objs_with_preserved_marks(NULL), _preserved_marks_of_objs(NULL),
+  _evac_failure_scan_stack(NULL) ,
+  _mark_in_progress(false),
+  _cg1r(NULL), _czft(NULL), _summary_bytes_used(0),
+  _cur_alloc_region(NULL),
+  _refine_cte_cl(NULL),
+  _free_region_list(NULL), _free_region_list_size(0),
+  _free_regions(0),
+  _popular_object_boundary(NULL),
+  _cur_pop_hr_index(0),
+  _popular_regions_to_be_evacuated(NULL),
+  _pop_obj_rc_at_copy(),
+  _full_collection(false),
+  _unclean_region_list(),
+  _unclean_regions_coming(false),
+  _young_list(new YoungList(this)),
+  _gc_time_stamp(0),
+  _surviving_young_words(NULL)
+{
+  _g1h = this; // To catch bugs.
+  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
+    vm_exit_during_initialization("Failed necessary allocation.");
+  }
+  int n_queues = MAX2((int)ParallelGCThreads, 1);
+  _task_queues = new RefToScanQueueSet(n_queues);
+
+  int n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
+  assert(n_rem_sets > 0, "Invariant.");
+
+  HeapRegionRemSetIterator** iter_arr =
+    NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues);
+  for (int i = 0; i < n_queues; i++) {
+    iter_arr[i] = new HeapRegionRemSetIterator();
+  }
+  _rem_set_iterator = iter_arr;
+
+  for (int i = 0; i < n_queues; i++) {
+    RefToScanQueue* q = new RefToScanQueue();
+    q->initialize();
+    _task_queues->register_queue(i, q);
+  }
+
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    _gc_alloc_regions[ap]       = NULL;
+    _gc_alloc_region_counts[ap] = 0;
+  }
+  guarantee(_task_queues != NULL, "task_queues allocation failure.");
+}
+
+jint G1CollectedHeap::initialize() {
+  os::enable_vtime();
+
+  // Necessary to satisfy locking discipline assertions.
+
+  MutexLocker x(Heap_lock);
+
+  // While there are no constraints in the GC code that HeapWordSize
+  // be any particular value, there are multiple other areas in the
+  // system which believe this to be true (e.g. oop->object_size in some
+  // cases incorrectly returns the size in wordSize units rather than
+  // HeapWordSize).
+  guarantee(HeapWordSize == wordSize, "HeapWordSize must equal wordSize");
+
+  size_t init_byte_size = collector_policy()->initial_heap_byte_size();
+  size_t max_byte_size = collector_policy()->max_heap_byte_size();
+
+  // Ensure that the sizes are properly aligned.
+  Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
+  Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
+
+  // We allocate this in any case, but only do no work if the command line
+  // param is off.
+  _cg1r = new ConcurrentG1Refine();
+
+  // Reserve the maximum.
+  PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
+  // Includes the perm-gen.
+  ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
+                        HeapRegion::GrainBytes,
+                        false /*ism*/);
+
+  if (!heap_rs.is_reserved()) {
+    vm_exit_during_initialization("Could not reserve enough space for object heap");
+    return JNI_ENOMEM;
+  }
+
+  // It is important to do this in a way such that concurrent readers can't
+  // temporarily think somethings in the heap.  (I've actually seen this
+  // happen in asserts: DLD.)
+  _reserved.set_word_size(0);
+  _reserved.set_start((HeapWord*)heap_rs.base());
+  _reserved.set_end((HeapWord*)(heap_rs.base() + heap_rs.size()));
+
+  _expansion_regions = max_byte_size/HeapRegion::GrainBytes;
+
+  _num_humongous_regions = 0;
+
+  // Create the gen rem set (and barrier set) for the entire reserved region.
+  _rem_set = collector_policy()->create_rem_set(_reserved, 2);
+  set_barrier_set(rem_set()->bs());
+  if (barrier_set()->is_a(BarrierSet::ModRef)) {
+    _mr_bs = (ModRefBarrierSet*)_barrier_set;
+  } else {
+    vm_exit_during_initialization("G1 requires a mod ref bs.");
+    return JNI_ENOMEM;
+  }
+
+  // Also create a G1 rem set.
+  if (G1UseHRIntoRS) {
+    if (mr_bs()->is_a(BarrierSet::CardTableModRef)) {
+      _g1_rem_set = new HRInto_G1RemSet(this, (CardTableModRefBS*)mr_bs());
+    } else {
+      vm_exit_during_initialization("G1 requires a cardtable mod ref bs.");
+      return JNI_ENOMEM;
+    }
+  } else {
+    _g1_rem_set = new StupidG1RemSet(this);
+  }
+
+  // Carve out the G1 part of the heap.
+
+  ReservedSpace g1_rs   = heap_rs.first_part(max_byte_size);
+  _g1_reserved = MemRegion((HeapWord*)g1_rs.base(),
+                           g1_rs.size()/HeapWordSize);
+  ReservedSpace perm_gen_rs = heap_rs.last_part(max_byte_size);
+
+  _perm_gen = pgs->init(perm_gen_rs, pgs->init_size(), rem_set());
+
+  _g1_storage.initialize(g1_rs, 0);
+  _g1_committed = MemRegion((HeapWord*)_g1_storage.low(), (size_t) 0);
+  _g1_max_committed = _g1_committed;
+  _hrs = new HeapRegionSeq(_expansion_regions);
+  guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq");
+  guarantee(_cur_alloc_region == NULL, "from constructor");
+
+  _bot_shared = new G1BlockOffsetSharedArray(_reserved,
+                                             heap_word_size(init_byte_size));
+
+  _g1h = this;
+
+  // Create the ConcurrentMark data structure and thread.
+  // (Must do this late, so that "max_regions" is defined.)
+  _cm       = new ConcurrentMark(heap_rs, (int) max_regions());
+  _cmThread = _cm->cmThread();
+
+  // ...and the concurrent zero-fill thread, if necessary.
+  if (G1ConcZeroFill) {
+    _czft = new ConcurrentZFThread();
+  }
+
+
+
+  // Allocate the popular regions; take them off free lists.
+  size_t pop_byte_size = G1NumPopularRegions * HeapRegion::GrainBytes;
+  expand(pop_byte_size);
+  _popular_object_boundary =
+    _g1_reserved.start() + (G1NumPopularRegions * HeapRegion::GrainWords);
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    HeapRegion* hr = newAllocRegion(HeapRegion::GrainWords);
+    //    assert(hr != NULL && hr->bottom() < _popular_object_boundary,
+    //     "Should be enough, and all should be below boundary.");
+    hr->set_popular(true);
+  }
+  assert(_cur_pop_hr_index == 0, "Start allocating at the first region.");
+
+  // Initialize the from_card cache structure of HeapRegionRemSet.
+  HeapRegionRemSet::init_heap(max_regions());
+
+  // Now expand into the rest of the initial heap size.
+  expand(init_byte_size - pop_byte_size);
+
+  // Perform any initialization actions delegated to the policy.
+  g1_policy()->init();
+
+  g1_policy()->note_start_of_mark_thread();
+
+  _refine_cte_cl =
+    new RefineCardTableEntryClosure(ConcurrentG1RefineThread::sts(),
+                                    g1_rem_set(),
+                                    concurrent_g1_refine());
+  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
+
+  JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
+                                               SATB_Q_FL_lock,
+                                               0,
+                                               Shared_SATB_Q_lock);
+  if (G1RSBarrierUseQueue) {
+    JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+                                                  DirtyCardQ_FL_lock,
+                                                  G1DirtyCardQueueMax,
+                                                  Shared_DirtyCardQ_lock);
+  }
+  // In case we're keeping closure specialization stats, initialize those
+  // counts and that mechanism.
+  SpecializationStats::clear();
+
+  _gc_alloc_region_list = NULL;
+
+  // Do later initialization work for concurrent refinement.
+  _cg1r->init();
+
+  const char* group_names[] = { "CR", "ZF", "CM", "CL" };
+  GCOverheadReporter::initGCOverheadReporter(4, group_names);
+
+  return JNI_OK;
+}
+
+void G1CollectedHeap::ref_processing_init() {
+  SharedHeap::ref_processing_init();
+  MemRegion mr = reserved_region();
+  _ref_processor = ReferenceProcessor::create_ref_processor(
+                                         mr,    // span
+                                         false, // Reference discovery is not atomic
+                                                // (though it shouldn't matter here.)
+                                         true,  // mt_discovery
+                                         NULL,  // is alive closure: need to fill this in for efficiency
+                                         ParallelGCThreads,
+                                         ParallelRefProcEnabled,
+                                         true); // Setting next fields of discovered
+                                                // lists requires a barrier.
+}
+
+size_t G1CollectedHeap::capacity() const {
+  return _g1_committed.byte_size();
+}
+
+void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
+                                                 int worker_i) {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  int n_completed_buffers = 0;
+  while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
+    n_completed_buffers++;
+  }
+  g1_policy()->record_update_rs_processed_buffers(worker_i,
+                                                  (double) n_completed_buffers);
+  dcqs.clear_n_completed_buffers();
+  // Finish up the queue...
+  if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i,
+                                                            g1_rem_set());
+  assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
+}
+
+
+// Computes the sum of the storage used by the various regions.
+
+size_t G1CollectedHeap::used() const {
+  assert(Heap_lock->owner() != NULL,
+         "Should be owned on this thread's behalf.");
+  size_t result = _summary_bytes_used;
+  if (_cur_alloc_region != NULL)
+    result += _cur_alloc_region->used();
+  return result;
+}
+
+class SumUsedClosure: public HeapRegionClosure {
+  size_t _used;
+public:
+  SumUsedClosure() : _used(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      _used += r->used();
+    }
+    return false;
+  }
+  size_t result() { return _used; }
+};
+
+size_t G1CollectedHeap::recalculate_used() const {
+  SumUsedClosure blk;
+  _hrs->iterate(&blk);
+  return blk.result();
+}
+
+#ifndef PRODUCT
+class SumUsedRegionsClosure: public HeapRegionClosure {
+  size_t _num;
+public:
+  // _num is set to 1 to account for the popular region
+  SumUsedRegionsClosure() : _num(G1NumPopularRegions) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous() || r->used() > 0 || r->is_gc_alloc_region()) {
+      _num += 1;
+    }
+    return false;
+  }
+  size_t result() { return _num; }
+};
+
+size_t G1CollectedHeap::recalculate_used_regions() const {
+  SumUsedRegionsClosure blk;
+  _hrs->iterate(&blk);
+  return blk.result();
+}
+#endif // PRODUCT
+
+size_t G1CollectedHeap::unsafe_max_alloc() {
+  if (_free_regions > 0) return HeapRegion::GrainBytes;
+  // otherwise, is there space in the current allocation region?
+
+  // We need to store the current allocation region in a local variable
+  // here. The problem is that this method doesn't take any locks and
+  // there may be other threads which overwrite the current allocation
+  // region field. attempt_allocation(), for example, sets it to NULL
+  // and this can happen *after* the NULL check here but before the call
+  // to free(), resulting in a SIGSEGV. Note that this doesn't appear
+  // to be a problem in the optimized build, since the two loads of the
+  // current allocation region field are optimized away.
+  HeapRegion* car = _cur_alloc_region;
+
+  // FIXME: should iterate over all regions?
+  if (car == NULL) {
+    return 0;
+  }
+  return car->free();
+}
+
+void G1CollectedHeap::collect(GCCause::Cause cause) {
+  // The caller doesn't have the Heap_lock
+  assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock");
+  MutexLocker ml(Heap_lock);
+  collect_locked(cause);
+}
+
+void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) {
+  assert(Thread::current()->is_VM_thread(), "Precondition#1");
+  assert(Heap_lock->is_locked(), "Precondition#2");
+  GCCauseSetter gcs(this, cause);
+  switch (cause) {
+    case GCCause::_heap_inspection:
+    case GCCause::_heap_dump: {
+      HandleMark hm;
+      do_full_collection(false);         // don't clear all soft refs
+      break;
+    }
+    default: // XXX FIX ME
+      ShouldNotReachHere(); // Unexpected use of this function
+  }
+}
+
+
+void G1CollectedHeap::collect_locked(GCCause::Cause cause) {
+  // Don't want to do a GC until cleanup is completed.
+  wait_for_cleanup_complete();
+
+  // Read the GC count while holding the Heap_lock
+  int gc_count_before = SharedHeap::heap()->total_collections();
+  {
+    MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+    VM_G1CollectFull op(gc_count_before, cause);
+    VMThread::execute(&op);
+  }
+}
+
+bool G1CollectedHeap::is_in(const void* p) const {
+  if (_g1_committed.contains(p)) {
+    HeapRegion* hr = _hrs->addr_to_region(p);
+    return hr->is_in(p);
+  } else {
+    return _perm_gen->as_gen()->is_in(p);
+  }
+}
+
+// Iteration functions.
+
+// Iterates an OopClosure over all ref-containing fields of objects
+// within a HeapRegion.
+
+class IterateOopClosureRegionClosure: public HeapRegionClosure {
+  MemRegion _mr;
+  OopClosure* _cl;
+public:
+  IterateOopClosureRegionClosure(MemRegion mr, OopClosure* cl)
+    : _mr(mr), _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (! r->continuesHumongous()) {
+      r->oop_iterate(_cl);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::oop_iterate(OopClosure* cl) {
+  IterateOopClosureRegionClosure blk(_g1_committed, cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::oop_iterate(MemRegion mr, OopClosure* cl) {
+  IterateOopClosureRegionClosure blk(mr, cl);
+  _hrs->iterate(&blk);
+}
+
+// Iterates an ObjectClosure over all objects within a HeapRegion.
+
+class IterateObjectClosureRegionClosure: public HeapRegionClosure {
+  ObjectClosure* _cl;
+public:
+  IterateObjectClosureRegionClosure(ObjectClosure* cl) : _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (! r->continuesHumongous()) {
+      r->object_iterate(_cl);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::object_iterate(ObjectClosure* cl) {
+  IterateObjectClosureRegionClosure blk(cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) {
+  // FIXME: is this right?
+  guarantee(false, "object_iterate_since_last_GC not supported by G1 heap");
+}
+
+// Calls a SpaceClosure on a HeapRegion.
+
+class SpaceClosureRegionClosure: public HeapRegionClosure {
+  SpaceClosure* _cl;
+public:
+  SpaceClosureRegionClosure(SpaceClosure* cl) : _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _cl->do_space(r);
+    return false;
+  }
+};
+
+void G1CollectedHeap::space_iterate(SpaceClosure* cl) {
+  SpaceClosureRegionClosure blk(cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::heap_region_iterate(HeapRegionClosure* cl) {
+  _hrs->iterate(cl);
+}
+
+void G1CollectedHeap::heap_region_iterate_from(HeapRegion* r,
+                                               HeapRegionClosure* cl) {
+  _hrs->iterate_from(r, cl);
+}
+
+void
+G1CollectedHeap::heap_region_iterate_from(int idx, HeapRegionClosure* cl) {
+  _hrs->iterate_from(idx, cl);
+}
+
+HeapRegion* G1CollectedHeap::region_at(size_t idx) { return _hrs->at(idx); }
+
+void
+G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
+                                                 int worker,
+                                                 jint claim_value) {
+  const size_t regions = n_regions();
+  const size_t worker_num = (ParallelGCThreads > 0 ? ParallelGCThreads : 1);
+  // try to spread out the starting points of the workers
+  const size_t start_index = regions / worker_num * (size_t) worker;
+
+  // each worker will actually look at all regions
+  for (size_t count = 0; count < regions; ++count) {
+    const size_t index = (start_index + count) % regions;
+    assert(0 <= index && index < regions, "sanity");
+    HeapRegion* r = region_at(index);
+    // we'll ignore "continues humongous" regions (we'll process them
+    // when we come across their corresponding "start humongous"
+    // region) and regions already claimed
+    if (r->claim_value() == claim_value || r->continuesHumongous()) {
+      continue;
+    }
+    // OK, try to claim it
+    if (r->claimHeapRegion(claim_value)) {
+      // success!
+      assert(!r->continuesHumongous(), "sanity");
+      if (r->startsHumongous()) {
+        // If the region is "starts humongous" we'll iterate over its
+        // "continues humongous" first; in fact we'll do them
+        // first. The order is important. In on case, calling the
+        // closure on the "starts humongous" region might de-allocate
+        // and clear all its "continues humongous" regions and, as a
+        // result, we might end up processing them twice. So, we'll do
+        // them first (notice: most closures will ignore them anyway) and
+        // then we'll do the "starts humongous" region.
+        for (size_t ch_index = index + 1; ch_index < regions; ++ch_index) {
+          HeapRegion* chr = region_at(ch_index);
+
+          // if the region has already been claimed or it's not
+          // "continues humongous" we're done
+          if (chr->claim_value() == claim_value ||
+              !chr->continuesHumongous()) {
+            break;
+          }
+
+          // Noone should have claimed it directly. We can given
+          // that we claimed its "starts humongous" region.
+          assert(chr->claim_value() != claim_value, "sanity");
+          assert(chr->humongous_start_region() == r, "sanity");
+
+          if (chr->claimHeapRegion(claim_value)) {
+            // we should always be able to claim it; noone else should
+            // be trying to claim this region
+
+            bool res2 = cl->doHeapRegion(chr);
+            assert(!res2, "Should not abort");
+
+            // Right now, this holds (i.e., no closure that actually
+            // does something with "continues humongous" regions
+            // clears them). We might have to weaken it in the future,
+            // but let's leave these two asserts here for extra safety.
+            assert(chr->continuesHumongous(), "should still be the case");
+            assert(chr->humongous_start_region() == r, "sanity");
+          } else {
+            guarantee(false, "we should not reach here");
+          }
+        }
+      }
+
+      assert(!r->continuesHumongous(), "sanity");
+      bool res = cl->doHeapRegion(r);
+      assert(!res, "Should not abort");
+    }
+  }
+}
+
+class ResetClaimValuesClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    r->set_claim_value(HeapRegion::InitialClaimValue);
+    return false;
+  }
+};
+
+void
+G1CollectedHeap::reset_heap_region_claim_values() {
+  ResetClaimValuesClosure blk;
+  heap_region_iterate(&blk);
+}
+
+#ifdef ASSERT
+// This checks whether all regions in the heap have the correct claim
+// value. I also piggy-backed on this a check to ensure that the
+// humongous_start_region() information on "continues humongous"
+// regions is correct.
+
+class CheckClaimValuesClosure : public HeapRegionClosure {
+private:
+  jint _claim_value;
+  size_t _failures;
+  HeapRegion* _sh_region;
+public:
+  CheckClaimValuesClosure(jint claim_value) :
+    _claim_value(claim_value), _failures(0), _sh_region(NULL) { }
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->claim_value() != _claim_value) {
+      gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
+                             "claim value = %d, should be %d",
+                             r->bottom(), r->end(), r->claim_value(),
+                             _claim_value);
+      ++_failures;
+    }
+    if (!r->isHumongous()) {
+      _sh_region = NULL;
+    } else if (r->startsHumongous()) {
+      _sh_region = r;
+    } else if (r->continuesHumongous()) {
+      if (r->humongous_start_region() != _sh_region) {
+        gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
+                               "HS = "PTR_FORMAT", should be "PTR_FORMAT,
+                               r->bottom(), r->end(),
+                               r->humongous_start_region(),
+                               _sh_region);
+        ++_failures;
+      }
+    }
+    return false;
+  }
+  size_t failures() {
+    return _failures;
+  }
+};
+
+bool G1CollectedHeap::check_heap_region_claim_values(jint claim_value) {
+  CheckClaimValuesClosure cl(claim_value);
+  heap_region_iterate(&cl);
+  return cl.failures() == 0;
+}
+#endif // ASSERT
+
+void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
+  HeapRegion* r = g1_policy()->collection_set();
+  while (r != NULL) {
+    HeapRegion* next = r->next_in_collection_set();
+    if (cl->doHeapRegion(r)) {
+      cl->incomplete();
+      return;
+    }
+    r = next;
+  }
+}
+
+void G1CollectedHeap::collection_set_iterate_from(HeapRegion* r,
+                                                  HeapRegionClosure *cl) {
+  assert(r->in_collection_set(),
+         "Start region must be a member of the collection set.");
+  HeapRegion* cur = r;
+  while (cur != NULL) {
+    HeapRegion* next = cur->next_in_collection_set();
+    if (cl->doHeapRegion(cur) && false) {
+      cl->incomplete();
+      return;
+    }
+    cur = next;
+  }
+  cur = g1_policy()->collection_set();
+  while (cur != r) {
+    HeapRegion* next = cur->next_in_collection_set();
+    if (cl->doHeapRegion(cur) && false) {
+      cl->incomplete();
+      return;
+    }
+    cur = next;
+  }
+}
+
+CompactibleSpace* G1CollectedHeap::first_compactible_space() {
+  return _hrs->length() > 0 ? _hrs->at(0) : NULL;
+}
+
+
+Space* G1CollectedHeap::space_containing(const void* addr) const {
+  Space* res = heap_region_containing(addr);
+  if (res == NULL)
+    res = perm_gen()->space_containing(addr);
+  return res;
+}
+
+HeapWord* G1CollectedHeap::block_start(const void* addr) const {
+  Space* sp = space_containing(addr);
+  if (sp != NULL) {
+    return sp->block_start(addr);
+  }
+  return NULL;
+}
+
+size_t G1CollectedHeap::block_size(const HeapWord* addr) const {
+  Space* sp = space_containing(addr);
+  assert(sp != NULL, "block_size of address outside of heap");
+  return sp->block_size(addr);
+}
+
+bool G1CollectedHeap::block_is_obj(const HeapWord* addr) const {
+  Space* sp = space_containing(addr);
+  return sp->block_is_obj(addr);
+}
+
+bool G1CollectedHeap::supports_tlab_allocation() const {
+  return true;
+}
+
+size_t G1CollectedHeap::tlab_capacity(Thread* ignored) const {
+  return HeapRegion::GrainBytes;
+}
+
+size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
+  // Return the remaining space in the cur alloc region, but not less than
+  // the min TLAB size.
+  // Also, no more than half the region size, since we can't allow tlabs to
+  // grow big enough to accomodate humongous objects.
+
+  // We need to story it locally, since it might change between when we
+  // test for NULL and when we use it later.
+  ContiguousSpace* cur_alloc_space = _cur_alloc_region;
+  if (cur_alloc_space == NULL) {
+    return HeapRegion::GrainBytes/2;
+  } else {
+    return MAX2(MIN2(cur_alloc_space->free(),
+                     (size_t)(HeapRegion::GrainBytes/2)),
+                (size_t)MinTLABSize);
+  }
+}
+
+HeapWord* G1CollectedHeap::allocate_new_tlab(size_t size) {
+  bool dummy;
+  return G1CollectedHeap::mem_allocate(size, false, true, &dummy);
+}
+
+bool G1CollectedHeap::allocs_are_zero_filled() {
+  return false;
+}
+
+size_t G1CollectedHeap::large_typearray_limit() {
+  // FIXME
+  return HeapRegion::GrainBytes/HeapWordSize;
+}
+
+size_t G1CollectedHeap::max_capacity() const {
+  return _g1_committed.byte_size();
+}
+
+jlong G1CollectedHeap::millis_since_last_gc() {
+  // assert(false, "NYI");
+  return 0;
+}
+
+
+void G1CollectedHeap::prepare_for_verify() {
+  if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
+    ensure_parsability(false);
+  }
+  g1_rem_set()->prepare_for_verify();
+}
+
+class VerifyLivenessOopClosure: public OopClosure {
+  G1CollectedHeap* g1h;
+public:
+  VerifyLivenessOopClosure(G1CollectedHeap* _g1h) {
+    g1h = _g1h;
+  }
+  void do_oop(narrowOop *p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop *p) {
+    oop obj = *p;
+    assert(obj == NULL || !g1h->is_obj_dead(obj),
+           "Dead object referenced by a not dead object");
+  }
+};
+
+class VerifyObjsInRegionClosure: public ObjectClosure {
+  G1CollectedHeap* _g1h;
+  size_t _live_bytes;
+  HeapRegion *_hr;
+public:
+  VerifyObjsInRegionClosure(HeapRegion *hr) : _live_bytes(0), _hr(hr) {
+    _g1h = G1CollectedHeap::heap();
+  }
+  void do_object(oop o) {
+    VerifyLivenessOopClosure isLive(_g1h);
+    assert(o != NULL, "Huh?");
+    if (!_g1h->is_obj_dead(o)) {
+      o->oop_iterate(&isLive);
+      if (!_hr->obj_allocated_since_prev_marking(o))
+        _live_bytes += (o->size() * HeapWordSize);
+    }
+  }
+  size_t live_bytes() { return _live_bytes; }
+};
+
+class PrintObjsInRegionClosure : public ObjectClosure {
+  HeapRegion *_hr;
+  G1CollectedHeap *_g1;
+public:
+  PrintObjsInRegionClosure(HeapRegion *hr) : _hr(hr) {
+    _g1 = G1CollectedHeap::heap();
+  };
+
+  void do_object(oop o) {
+    if (o != NULL) {
+      HeapWord *start = (HeapWord *) o;
+      size_t word_sz = o->size();
+      gclog_or_tty->print("\nPrinting obj "PTR_FORMAT" of size " SIZE_FORMAT
+                          " isMarkedPrev %d isMarkedNext %d isAllocSince %d\n",
+                          (void*) o, word_sz,
+                          _g1->isMarkedPrev(o),
+                          _g1->isMarkedNext(o),
+                          _hr->obj_allocated_since_prev_marking(o));
+      HeapWord *end = start + word_sz;
+      HeapWord *cur;
+      int *val;
+      for (cur = start; cur < end; cur++) {
+        val = (int *) cur;
+        gclog_or_tty->print("\t "PTR_FORMAT":"PTR_FORMAT"\n", val, *val);
+      }
+    }
+  }
+};
+
+class VerifyRegionClosure: public HeapRegionClosure {
+public:
+  bool _allow_dirty;
+  bool _par;
+  VerifyRegionClosure(bool allow_dirty, bool par = false)
+    : _allow_dirty(allow_dirty), _par(par) {}
+  bool doHeapRegion(HeapRegion* r) {
+    guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
+              "Should be unclaimed at verify points.");
+    if (r->isHumongous()) {
+      if (r->startsHumongous()) {
+        // Verify the single H object.
+        oop(r->bottom())->verify();
+        size_t word_sz = oop(r->bottom())->size();
+        guarantee(r->top() == r->bottom() + word_sz,
+                  "Only one object in a humongous region");
+      }
+    } else {
+      VerifyObjsInRegionClosure not_dead_yet_cl(r);
+      r->verify(_allow_dirty);
+      r->object_iterate(&not_dead_yet_cl);
+      guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(),
+                "More live objects than counted in last complete marking.");
+    }
+    return false;
+  }
+};
+
+class VerifyRootsClosure: public OopsInGenClosure {
+private:
+  G1CollectedHeap* _g1h;
+  bool             _failures;
+
+public:
+  VerifyRootsClosure() :
+    _g1h(G1CollectedHeap::heap()), _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop obj = *p;
+    if (obj != NULL) {
+      if (_g1h->is_obj_dead(obj)) {
+        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
+                               "points to dead obj "PTR_FORMAT, p, (void*) obj);
+        obj->print_on(gclog_or_tty);
+        _failures = true;
+      }
+    }
+  }
+};
+
+// This is the task used for parallel heap verification.
+
+class G1ParVerifyTask: public AbstractGangTask {
+private:
+  G1CollectedHeap* _g1h;
+  bool _allow_dirty;
+
+public:
+  G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty) :
+    AbstractGangTask("Parallel verify task"),
+    _g1h(g1h), _allow_dirty(allow_dirty) { }
+
+  void work(int worker_i) {
+    VerifyRegionClosure blk(_allow_dirty, true);
+    _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+                                          HeapRegion::ParVerifyClaimValue);
+  }
+};
+
+void G1CollectedHeap::verify(bool allow_dirty, bool silent) {
+  if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
+    if (!silent) { gclog_or_tty->print("roots "); }
+    VerifyRootsClosure rootsCl;
+    process_strong_roots(false,
+                         SharedHeap::SO_AllClasses,
+                         &rootsCl,
+                         &rootsCl);
+    rem_set()->invalidate(perm_gen()->used_region(), false);
+    if (!silent) { gclog_or_tty->print("heapRegions "); }
+    if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
+      assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+             "sanity check");
+
+      G1ParVerifyTask task(this, allow_dirty);
+      int n_workers = workers()->total_workers();
+      set_par_threads(n_workers);
+      workers()->run_task(&task);
+      set_par_threads(0);
+
+      assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
+             "sanity check");
+
+      reset_heap_region_claim_values();
+
+      assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+             "sanity check");
+    } else {
+      VerifyRegionClosure blk(allow_dirty);
+      _hrs->iterate(&blk);
+    }
+    if (!silent) gclog_or_tty->print("remset ");
+    rem_set()->verify();
+    guarantee(!rootsCl.failures(), "should not have had failures");
+  } else {
+    if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) ");
+  }
+}
+
+class PrintRegionClosure: public HeapRegionClosure {
+  outputStream* _st;
+public:
+  PrintRegionClosure(outputStream* st) : _st(st) {}
+  bool doHeapRegion(HeapRegion* r) {
+    r->print_on(_st);
+    return false;
+  }
+};
+
+void G1CollectedHeap::print() const { print_on(gclog_or_tty); }
+
+void G1CollectedHeap::print_on(outputStream* st) const {
+  PrintRegionClosure blk(st);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
+  if (ParallelGCThreads > 0) {
+    workers()->print_worker_threads();
+  }
+  st->print("\"G1 concurrent mark GC Thread\" ");
+  _cmThread->print();
+  st->cr();
+  st->print("\"G1 concurrent refinement GC Thread\" ");
+  _cg1r->cg1rThread()->print_on(st);
+  st->cr();
+  st->print("\"G1 zero-fill GC Thread\" ");
+  _czft->print_on(st);
+  st->cr();
+}
+
+void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
+  if (ParallelGCThreads > 0) {
+    workers()->threads_do(tc);
+  }
+  tc->do_thread(_cmThread);
+  tc->do_thread(_cg1r->cg1rThread());
+  tc->do_thread(_czft);
+}
+
+void G1CollectedHeap::print_tracing_info() const {
+  concurrent_g1_refine()->print_final_card_counts();
+
+  // We'll overload this to mean "trace GC pause statistics."
+  if (TraceGen0Time || TraceGen1Time) {
+    // The "G1CollectorPolicy" is keeping track of these stats, so delegate
+    // to that.
+    g1_policy()->print_tracing_info();
+  }
+  if (SummarizeG1RSStats) {
+    g1_rem_set()->print_summary_info();
+  }
+  if (SummarizeG1ConcMark) {
+    concurrent_mark()->print_summary_info();
+  }
+  if (SummarizeG1ZFStats) {
+    ConcurrentZFThread::print_summary_info();
+  }
+  if (G1SummarizePopularity) {
+    print_popularity_summary_info();
+  }
+  g1_policy()->print_yg_surv_rate_info();
+
+  GCOverheadReporter::printGCOverhead();
+
+  SpecializationStats::print();
+}
+
+
+int G1CollectedHeap::addr_to_arena_id(void* addr) const {
+  HeapRegion* hr = heap_region_containing(addr);
+  if (hr == NULL) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+G1CollectedHeap* G1CollectedHeap::heap() {
+  assert(_sh->kind() == CollectedHeap::G1CollectedHeap,
+         "not a garbage-first heap");
+  return _g1h;
+}
+
+void G1CollectedHeap::gc_prologue(bool full /* Ignored */) {
+  if (PrintHeapAtGC){
+    gclog_or_tty->print_cr(" {Heap before GC collections=%d:", total_collections());
+    Universe::print();
+  }
+  assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer");
+  // Call allocation profiler
+  AllocationProfiler::iterate_since_last_gc();
+  // Fill TLAB's and such
+  ensure_parsability(true);
+}
+
+void G1CollectedHeap::gc_epilogue(bool full /* Ignored */) {
+  // FIXME: what is this about?
+  // I'm ignoring the "fill_newgen()" call if "alloc_event_enabled"
+  // is set.
+  COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(),
+                        "derived pointer present"));
+
+  if (PrintHeapAtGC){
+    gclog_or_tty->print_cr(" Heap after GC collections=%d:", total_collections());
+    Universe::print();
+    gclog_or_tty->print("} ");
+  }
+}
+
+void G1CollectedHeap::do_collection_pause() {
+  // Read the GC count while holding the Heap_lock
+  // we need to do this _before_ wait_for_cleanup_complete(), to
+  // ensure that we do not give up the heap lock and potentially
+  // pick up the wrong count
+  int gc_count_before = SharedHeap::heap()->total_collections();
+
+  // Don't want to do a GC pause while cleanup is being completed!
+  wait_for_cleanup_complete();
+
+  g1_policy()->record_stop_world_start();
+  {
+    MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+    VM_G1IncCollectionPause op(gc_count_before);
+    VMThread::execute(&op);
+  }
+}
+
+void
+G1CollectedHeap::doConcurrentMark() {
+  if (G1ConcMark) {
+    MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+    if (!_cmThread->in_progress()) {
+      _cmThread->set_started();
+      CGC_lock->notify();
+    }
+  }
+}
+
+class VerifyMarkedObjsClosure: public ObjectClosure {
+    G1CollectedHeap* _g1h;
+    public:
+    VerifyMarkedObjsClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
+    void do_object(oop obj) {
+      assert(obj->mark()->is_marked() ? !_g1h->is_obj_dead(obj) : true,
+             "markandsweep mark should agree with concurrent deadness");
+    }
+};
+
+void
+G1CollectedHeap::checkConcurrentMark() {
+    VerifyMarkedObjsClosure verifycl(this);
+    doConcurrentMark();
+    //    MutexLockerEx x(getMarkBitMapLock(),
+    //              Mutex::_no_safepoint_check_flag);
+    object_iterate(&verifycl);
+}
+
+void G1CollectedHeap::do_sync_mark() {
+  _cm->checkpointRootsInitial();
+  _cm->markFromRoots();
+  _cm->checkpointRootsFinal(false);
+}
+
+// <NEW PREDICTION>
+
+double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr,
+                                                       bool young) {
+  return _g1_policy->predict_region_elapsed_time_ms(hr, young);
+}
+
+void G1CollectedHeap::check_if_region_is_too_expensive(double
+                                                           predicted_time_ms) {
+  _g1_policy->check_if_region_is_too_expensive(predicted_time_ms);
+}
+
+size_t G1CollectedHeap::pending_card_num() {
+  size_t extra_cards = 0;
+  JavaThread *curr = Threads::first();
+  while (curr != NULL) {
+    DirtyCardQueue& dcq = curr->dirty_card_queue();
+    extra_cards += dcq.size();
+    curr = curr->next();
+  }
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  size_t buffer_size = dcqs.buffer_size();
+  size_t buffer_num = dcqs.completed_buffers_num();
+  return buffer_size * buffer_num + extra_cards;
+}
+
+size_t G1CollectedHeap::max_pending_card_num() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  size_t buffer_size = dcqs.buffer_size();
+  size_t buffer_num  = dcqs.completed_buffers_num();
+  int thread_num  = Threads::number_of_threads();
+  return (buffer_num + thread_num) * buffer_size;
+}
+
+size_t G1CollectedHeap::cards_scanned() {
+  HRInto_G1RemSet* g1_rset = (HRInto_G1RemSet*) g1_rem_set();
+  return g1_rset->cardsScanned();
+}
+
+void
+G1CollectedHeap::setup_surviving_young_words() {
+  guarantee( _surviving_young_words == NULL, "pre-condition" );
+  size_t array_length = g1_policy()->young_cset_length();
+  _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length);
+  if (_surviving_young_words == NULL) {
+    vm_exit_out_of_memory(sizeof(size_t) * array_length,
+                          "Not enough space for young surv words summary.");
+  }
+  memset(_surviving_young_words, 0, array_length * sizeof(size_t));
+  for (size_t i = 0;  i < array_length; ++i) {
+    guarantee( _surviving_young_words[i] == 0, "invariant" );
+  }
+}
+
+void
+G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  size_t array_length = g1_policy()->young_cset_length();
+  for (size_t i = 0; i < array_length; ++i)
+    _surviving_young_words[i] += surv_young_words[i];
+}
+
+void
+G1CollectedHeap::cleanup_surviving_young_words() {
+  guarantee( _surviving_young_words != NULL, "pre-condition" );
+  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words);
+  _surviving_young_words = NULL;
+}
+
+// </NEW PREDICTION>
+
+void
+G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) {
+  char verbose_str[128];
+  sprintf(verbose_str, "GC pause ");
+  if (popular_region != NULL)
+    strcat(verbose_str, "(popular)");
+  else if (g1_policy()->in_young_gc_mode()) {
+    if (g1_policy()->full_young_gcs())
+      strcat(verbose_str, "(young)");
+    else
+      strcat(verbose_str, "(partial)");
+  }
+  bool reset_should_initiate_conc_mark = false;
+  if (popular_region != NULL && g1_policy()->should_initiate_conc_mark()) {
+    // we currently do not allow an initial mark phase to be piggy-backed
+    // on a popular pause
+    reset_should_initiate_conc_mark = true;
+    g1_policy()->unset_should_initiate_conc_mark();
+  }
+  if (g1_policy()->should_initiate_conc_mark())
+    strcat(verbose_str, " (initial-mark)");
+
+  GCCauseSetter x(this, (popular_region == NULL ?
+                         GCCause::_g1_inc_collection_pause :
+                         GCCause::_g1_pop_region_collection_pause));
+
+  // if PrintGCDetails is on, we'll print long statistics information
+  // in the collector policy code, so let's not print this as the output
+  // is messy if we do.
+  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+  TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
+
+  ResourceMark rm;
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
+  assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
+  guarantee(!is_gc_active(), "collection is not reentrant");
+  assert(regions_accounted_for(), "Region leakage!");
+
+  increment_gc_time_stamp();
+
+  if (g1_policy()->in_young_gc_mode()) {
+    assert(check_young_list_well_formed(),
+                "young list should be well formed");
+  }
+
+  if (GC_locker::is_active()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
+  bool abandoned = false;
+  { // Call to jvmpi::post_class_unload_events must occur outside of active GC
+    IsGCActiveMark x;
+
+    gc_prologue(false);
+    increment_total_collections();
+
+#if G1_REM_SET_LOGGING
+    gclog_or_tty->print_cr("\nJust chose CS, heap:");
+    print();
+#endif
+
+    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      prepare_for_verify();
+      gclog_or_tty->print(" VerifyBeforeGC:");
+      Universe::verify(false);
+    }
+
+    COMPILER2_PRESENT(DerivedPointerTable::clear());
+
+    // We want to turn off ref discovere, if necessary, and turn it back on
+    // on again later if we do.
+    bool was_enabled = ref_processor()->discovery_enabled();
+    if (was_enabled) ref_processor()->disable_discovery();
+
+    // Forget the current alloc region (we might even choose it to be part
+    // of the collection set!).
+    abandon_cur_alloc_region();
+
+    // The elapsed time induced by the start time below deliberately elides
+    // the possible verification above.
+    double start_time_sec = os::elapsedTime();
+    GCOverheadReporter::recordSTWStart(start_time_sec);
+    size_t start_used_bytes = used();
+    if (!G1ConcMark) {
+      do_sync_mark();
+    }
+
+    g1_policy()->record_collection_pause_start(start_time_sec,
+                                               start_used_bytes);
+
+#if SCAN_ONLY_VERBOSE
+    _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+    if (g1_policy()->should_initiate_conc_mark()) {
+      concurrent_mark()->checkpointRootsInitialPre();
+    }
+    save_marks();
+
+    // We must do this before any possible evacuation that should propogate
+    // marks, including evacuation of popular objects in a popular pause.
+    if (mark_in_progress()) {
+      double start_time_sec = os::elapsedTime();
+
+      _cm->drainAllSATBBuffers();
+      double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
+      g1_policy()->record_satb_drain_time(finish_mark_ms);
+
+    }
+    // Record the number of elements currently on the mark stack, so we
+    // only iterate over these.  (Since evacuation may add to the mark
+    // stack, doing more exposes race conditions.)  If no mark is in
+    // progress, this will be zero.
+    _cm->set_oops_do_bound();
+
+    assert(regions_accounted_for(), "Region leakage.");
+
+    bool abandoned = false;
+
+    if (mark_in_progress())
+      concurrent_mark()->newCSet();
+
+    // Now choose the CS.
+    if (popular_region == NULL) {
+      g1_policy()->choose_collection_set();
+    } else {
+      // We may be evacuating a single region (for popularity).
+      g1_policy()->record_popular_pause_preamble_start();
+      popularity_pause_preamble(popular_region);
+      g1_policy()->record_popular_pause_preamble_end();
+      abandoned = (g1_policy()->collection_set() == NULL);
+      // Now we allow more regions to be added (we have to collect
+      // all popular regions).
+      if (!abandoned) {
+        g1_policy()->choose_collection_set(popular_region);
+      }
+    }
+    // We may abandon a pause if we find no region that will fit in the MMU
+    // pause.
+    abandoned = (g1_policy()->collection_set() == NULL);
+
+    // Nothing to do if we were unable to choose a collection set.
+    if (!abandoned) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("\nAfter pause, heap:");
+      print();
+#endif
+
+      setup_surviving_young_words();
+
+      // Set up the gc allocation regions.
+      get_gc_alloc_regions();
+
+      // Actually do the work...
+      evacuate_collection_set();
+      free_collection_set(g1_policy()->collection_set());
+      g1_policy()->clear_collection_set();
+
+      if (popular_region != NULL) {
+        // We have to wait until now, because we don't want the region to
+        // be rescheduled for pop-evac during RS update.
+        popular_region->set_popular_pending(false);
+      }
+
+      release_gc_alloc_regions();
+
+      cleanup_surviving_young_words();
+
+      if (g1_policy()->in_young_gc_mode()) {
+        _young_list->reset_sampled_info();
+        assert(check_young_list_empty(true),
+               "young list should be empty");
+
+#if SCAN_ONLY_VERBOSE
+        _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+        _young_list->reset_auxilary_lists();
+      }
+    } else {
+      COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+    }
+
+    if (evacuation_failed()) {
+      _summary_bytes_used = recalculate_used();
+    } else {
+      // The "used" of the the collection set have already been subtracted
+      // when they were freed.  Add in the bytes evacuated.
+      _summary_bytes_used += g1_policy()->bytes_in_to_space();
+    }
+
+    if (g1_policy()->in_young_gc_mode() &&
+        g1_policy()->should_initiate_conc_mark()) {
+      concurrent_mark()->checkpointRootsInitialPost();
+      set_marking_started();
+      doConcurrentMark();
+    }
+
+#if SCAN_ONLY_VERBOSE
+    _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+    double end_time_sec = os::elapsedTime();
+    g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    GCOverheadReporter::recordSTWEnd(end_time_sec);
+    g1_policy()->record_collection_pause_end(popular_region != NULL,
+                                             abandoned);
+
+    assert(regions_accounted_for(), "Region leakage.");
+
+    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      gclog_or_tty->print(" VerifyAfterGC:");
+      Universe::verify(false);
+    }
+
+    if (was_enabled) ref_processor()->enable_discovery();
+
+    {
+      size_t expand_bytes = g1_policy()->expansion_amount();
+      if (expand_bytes > 0) {
+        size_t bytes_before = capacity();
+        expand(expand_bytes);
+      }
+    }
+
+    if (mark_in_progress())
+      concurrent_mark()->update_g1_committed();
+
+    gc_epilogue(false);
+  }
+
+  assert(verify_region_lists(), "Bad region lists.");
+
+  if (reset_should_initiate_conc_mark)
+    g1_policy()->set_should_initiate_conc_mark();
+
+  if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
+    gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
+    print_tracing_info();
+    vm_exit(-1);
+  }
+}
+
+void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
+  assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
+  HeapWord* original_top = NULL;
+  if (r != NULL)
+    original_top = r->top();
+
+  // We will want to record the used space in r as being there before gc.
+  // One we install it as a GC alloc region it's eligible for allocation.
+  // So record it now and use it later.
+  size_t r_used = 0;
+  if (r != NULL) {
+    r_used = r->used();
+
+    if (ParallelGCThreads > 0) {
+      // need to take the lock to guard against two threads calling
+      // get_gc_alloc_region concurrently (very unlikely but...)
+      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      r->save_marks();
+    }
+  }
+  HeapRegion* old_alloc_region = _gc_alloc_regions[purpose];
+  _gc_alloc_regions[purpose] = r;
+  if (old_alloc_region != NULL) {
+    // Replace aliases too.
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      if (_gc_alloc_regions[ap] == old_alloc_region) {
+        _gc_alloc_regions[ap] = r;
+      }
+    }
+  }
+  if (r != NULL) {
+    push_gc_alloc_region(r);
+    if (mark_in_progress() && original_top != r->next_top_at_mark_start()) {
+      // We are using a region as a GC alloc region after it has been used
+      // as a mutator allocation region during the current marking cycle.
+      // The mutator-allocated objects are currently implicitly marked, but
+      // when we move hr->next_top_at_mark_start() forward at the the end
+      // of the GC pause, they won't be.  We therefore mark all objects in
+      // the "gap".  We do this object-by-object, since marking densely
+      // does not currently work right with marking bitmap iteration.  This
+      // means we rely on TLAB filling at the start of pauses, and no
+      // "resuscitation" of filled TLAB's.  If we want to do this, we need
+      // to fix the marking bitmap iteration.
+      HeapWord* curhw = r->next_top_at_mark_start();
+      HeapWord* t = original_top;
+
+      while (curhw < t) {
+        oop cur = (oop)curhw;
+        // We'll assume parallel for generality.  This is rare code.
+        concurrent_mark()->markAndGrayObjectIfNecessary(cur); // can't we just mark them?
+        curhw = curhw + cur->size();
+      }
+      assert(curhw == t, "Should have parsed correctly.");
+    }
+    if (G1PolicyVerbose > 1) {
+      gclog_or_tty->print("New alloc region ["PTR_FORMAT", "PTR_FORMAT", " PTR_FORMAT") "
+                          "for survivors:", r->bottom(), original_top, r->end());
+      r->print();
+    }
+    g1_policy()->record_before_bytes(r_used);
+  }
+}
+
+void G1CollectedHeap::push_gc_alloc_region(HeapRegion* hr) {
+  assert(Thread::current()->is_VM_thread() ||
+         par_alloc_during_gc_lock()->owned_by_self(), "Precondition");
+  assert(!hr->is_gc_alloc_region() && !hr->in_collection_set(),
+         "Precondition.");
+  hr->set_is_gc_alloc_region(true);
+  hr->set_next_gc_alloc_region(_gc_alloc_region_list);
+  _gc_alloc_region_list = hr;
+}
+
+#ifdef G1_DEBUG
+class FindGCAllocRegion: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_gc_alloc_region()) {
+      gclog_or_tty->print_cr("Region %d ["PTR_FORMAT"...] is still a gc_alloc_region.",
+                             r->hrs_index(), r->bottom());
+    }
+    return false;
+  }
+};
+#endif // G1_DEBUG
+
+void G1CollectedHeap::forget_alloc_region_list() {
+  assert(Thread::current()->is_VM_thread(), "Precondition");
+  while (_gc_alloc_region_list != NULL) {
+    HeapRegion* r = _gc_alloc_region_list;
+    assert(r->is_gc_alloc_region(), "Invariant.");
+    _gc_alloc_region_list = r->next_gc_alloc_region();
+    r->set_next_gc_alloc_region(NULL);
+    r->set_is_gc_alloc_region(false);
+    if (r->is_empty()) {
+      ++_free_regions;
+    }
+  }
+#ifdef G1_DEBUG
+  FindGCAllocRegion fa;
+  heap_region_iterate(&fa);
+#endif // G1_DEBUG
+}
+
+
+bool G1CollectedHeap::check_gc_alloc_regions() {
+  // TODO: allocation regions check
+  return true;
+}
+
+void G1CollectedHeap::get_gc_alloc_regions() {
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    // Create new GC alloc regions.
+    HeapRegion* alloc_region = _gc_alloc_regions[ap];
+    // Clear this alloc region, so that in case it turns out to be
+    // unacceptable, we end up with no allocation region, rather than a bad
+    // one.
+    _gc_alloc_regions[ap] = NULL;
+    if (alloc_region == NULL || alloc_region->in_collection_set()) {
+      // Can't re-use old one.  Allocate a new one.
+      alloc_region = newAllocRegionWithExpansion(ap, 0);
+    }
+    if (alloc_region != NULL) {
+      set_gc_alloc_region(ap, alloc_region);
+    }
+  }
+  // Set alternative regions for allocation purposes that have reached
+  // thier limit.
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap);
+    if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) {
+      _gc_alloc_regions[ap] = _gc_alloc_regions[alt_purpose];
+    }
+  }
+  assert(check_gc_alloc_regions(), "alloc regions messed up");
+}
+
+void G1CollectedHeap::release_gc_alloc_regions() {
+  // We keep a separate list of all regions that have been alloc regions in
+  // the current collection pause.  Forget that now.
+  forget_alloc_region_list();
+
+  // The current alloc regions contain objs that have survived
+  // collection. Make them no longer GC alloc regions.
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r != NULL && r->is_empty()) {
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        r->set_zero_fill_complete();
+        put_free_region_on_list_locked(r);
+      }
+    }
+    // set_gc_alloc_region will also NULLify all aliases to the region
+    set_gc_alloc_region(ap, NULL);
+    _gc_alloc_region_counts[ap] = 0;
+  }
+}
+
+void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
+  _drain_in_progress = false;
+  set_evac_failure_closure(cl);
+  _evac_failure_scan_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+}
+
+void G1CollectedHeap::finalize_for_evac_failure() {
+  assert(_evac_failure_scan_stack != NULL &&
+         _evac_failure_scan_stack->length() == 0,
+         "Postcondition");
+  assert(!_drain_in_progress, "Postcondition");
+  // Don't have to delete, since the scan stack is a resource object.
+  _evac_failure_scan_stack = NULL;
+}
+
+
+
+// *** Sequential G1 Evacuation
+
+HeapWord* G1CollectedHeap::allocate_during_gc(GCAllocPurpose purpose, size_t word_size) {
+  HeapRegion* alloc_region = _gc_alloc_regions[purpose];
+  // let the caller handle alloc failure
+  if (alloc_region == NULL) return NULL;
+  assert(isHumongous(word_size) || !alloc_region->isHumongous(),
+         "Either the object is humongous or the region isn't");
+  HeapWord* block = alloc_region->allocate(word_size);
+  if (block == NULL) {
+    block = allocate_during_gc_slow(purpose, alloc_region, false, word_size);
+  }
+  return block;
+}
+
+class G1IsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1IsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  void do_object(oop p) { assert(false, "Do not call."); }
+  bool do_object_b(oop p) {
+    // It is reachable if it is outside the collection set, or is inside
+    // and forwarded.
+
+#ifdef G1_DEBUG
+    gclog_or_tty->print_cr("is alive "PTR_FORMAT" in CS %d forwarded %d overall %d",
+                           (void*) p, _g1->obj_in_cs(p), p->is_forwarded(),
+                           !_g1->obj_in_cs(p) || p->is_forwarded());
+#endif // G1_DEBUG
+
+    return !_g1->obj_in_cs(p) || p->is_forwarded();
+  }
+};
+
+class G1KeepAliveClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+#ifdef G1_DEBUG
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("keep alive *"PTR_FORMAT" = "PTR_FORMAT" "PTR_FORMAT,
+                             p, (void*) obj, (void*) *p);
+    }
+#endif // G1_DEBUG
+
+    if (_g1->obj_in_cs(obj)) {
+      assert( obj->is_forwarded(), "invariant" );
+      *p = obj->forwardee();
+
+#ifdef G1_DEBUG
+      gclog_or_tty->print_cr("     in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT,
+                             (void*) obj, (void*) *p);
+#endif // G1_DEBUG
+    }
+  }
+};
+
+class RecreateRSetEntriesClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem_set;
+  HeapRegion* _from;
+public:
+  RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) :
+    _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from)
+  {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (*p != NULL) {
+      _g1_rem_set->write_ref(_from, p);
+    }
+  }
+};
+
+class RemoveSelfPointerClosure: public ObjectClosure {
+private:
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  HeapRegion* _hr;
+  size_t _prev_marked_bytes;
+  size_t _next_marked_bytes;
+public:
+  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) :
+    _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr),
+    _prev_marked_bytes(0), _next_marked_bytes(0)
+  {}
+
+  size_t prev_marked_bytes() { return _prev_marked_bytes; }
+  size_t next_marked_bytes() { return _next_marked_bytes; }
+
+  // The original idea here was to coalesce evacuated and dead objects.
+  // However that caused complications with the block offset table (BOT).
+  // In particular if there were two TLABs, one of them partially refined.
+  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
+  // The BOT entries of the unrefined part of TLAB_2 point to the start
+  // of TLAB_2. If the last object of the TLAB_1 and the first object
+  // of TLAB_2 are coalesced, then the cards of the unrefined part
+  // would point into middle of the filler object.
+  //
+  // The current approach is to not coalesce and leave the BOT contents intact.
+  void do_object(oop obj) {
+    if (obj->is_forwarded() && obj->forwardee() == obj) {
+      // The object failed to move.
+      assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
+      _cm->markPrev(obj);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+      _prev_marked_bytes += (obj->size() * HeapWordSize);
+      if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
+        _cm->markAndGrayObjectIfNecessary(obj);
+      }
+      obj->set_mark(markOopDesc::prototype());
+      // While we were processing RSet buffers during the
+      // collection, we actually didn't scan any cards on the
+      // collection set, since we didn't want to update remebered
+      // sets with entries that point into the collection set, given
+      // that live objects fromthe collection set are about to move
+      // and such entries will be stale very soon. This change also
+      // dealt with a reliability issue which involved scanning a
+      // card in the collection set and coming across an array that
+      // was being chunked and looking malformed. The problem is
+      // that, if evacuation fails, we might have remembered set
+      // entries missing given that we skipped cards on the
+      // collection set. So, we'll recreate such entries now.
+      RecreateRSetEntriesClosure cl(_g1, _hr);
+      obj->oop_iterate(&cl);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+    } else {
+      // The object has been either evacuated or is dead. Fill it with a
+      // dummy object.
+      MemRegion mr((HeapWord*)obj, obj->size());
+      SharedHeap::fill_region_with_object(mr);
+      _cm->clearRangeBothMaps(mr);
+    }
+  }
+};
+
+void G1CollectedHeap::remove_self_forwarding_pointers() {
+  HeapRegion* cur = g1_policy()->collection_set();
+
+  while (cur != NULL) {
+    assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+    if (cur->evacuation_failed()) {
+      RemoveSelfPointerClosure rspc(_g1h, cur);
+      assert(cur->in_collection_set(), "bad CS");
+      cur->object_iterate(&rspc);
+
+      // A number of manipulations to make the TAMS be the current top,
+      // and the marked bytes be the ones observed in the iteration.
+      if (_g1h->concurrent_mark()->at_least_one_mark_complete()) {
+        // The comments below are the postconditions achieved by the
+        // calls.  Note especially the last such condition, which says that
+        // the count of marked bytes has been properly restored.
+        cur->note_start_of_marking(false);
+        // _next_top_at_mark_start == top, _next_marked_bytes == 0
+        cur->add_to_marked_bytes(rspc.prev_marked_bytes());
+        // _next_marked_bytes == prev_marked_bytes.
+        cur->note_end_of_marking();
+        // _prev_top_at_mark_start == top(),
+        // _prev_marked_bytes == prev_marked_bytes
+      }
+      // If there is no mark in progress, we modified the _next variables
+      // above needlessly, but harmlessly.
+      if (_g1h->mark_in_progress()) {
+        cur->note_start_of_marking(false);
+        // _next_top_at_mark_start == top, _next_marked_bytes == 0
+        // _next_marked_bytes == next_marked_bytes.
+      }
+
+      // Now make sure the region has the right index in the sorted array.
+      g1_policy()->note_change_in_marked_bytes(cur);
+    }
+    cur = cur->next_in_collection_set();
+  }
+  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+  // Now restore saved marks, if any.
+  if (_objs_with_preserved_marks != NULL) {
+    assert(_preserved_marks_of_objs != NULL, "Both or none.");
+    assert(_objs_with_preserved_marks->length() ==
+           _preserved_marks_of_objs->length(), "Both or none.");
+    guarantee(_objs_with_preserved_marks->length() ==
+              _preserved_marks_of_objs->length(), "Both or none.");
+    for (int i = 0; i < _objs_with_preserved_marks->length(); i++) {
+      oop obj   = _objs_with_preserved_marks->at(i);
+      markOop m = _preserved_marks_of_objs->at(i);
+      obj->set_mark(m);
+    }
+    // Delete the preserved marks growable arrays (allocated on the C heap).
+    delete _objs_with_preserved_marks;
+    delete _preserved_marks_of_objs;
+    _objs_with_preserved_marks = NULL;
+    _preserved_marks_of_objs = NULL;
+  }
+}
+
+void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) {
+  _evac_failure_scan_stack->push(obj);
+}
+
+void G1CollectedHeap::drain_evac_failure_scan_stack() {
+  assert(_evac_failure_scan_stack != NULL, "precondition");
+
+  while (_evac_failure_scan_stack->length() > 0) {
+     oop obj = _evac_failure_scan_stack->pop();
+     _evac_failure_closure->set_region(heap_region_containing(obj));
+     obj->oop_iterate_backwards(_evac_failure_closure);
+  }
+}
+
+void G1CollectedHeap::handle_evacuation_failure(oop old) {
+  markOop m = old->mark();
+  // forward to self
+  assert(!old->is_forwarded(), "precondition");
+
+  old->forward_to(old);
+  handle_evacuation_failure_common(old, m);
+}
+
+oop
+G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
+                                               oop old) {
+  markOop m = old->mark();
+  oop forward_ptr = old->forward_to_atomic(old);
+  if (forward_ptr == NULL) {
+    // Forward-to-self succeeded.
+    if (_evac_failure_closure != cl) {
+      MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
+      assert(!_drain_in_progress,
+             "Should only be true while someone holds the lock.");
+      // Set the global evac-failure closure to the current thread's.
+      assert(_evac_failure_closure == NULL, "Or locking has failed.");
+      set_evac_failure_closure(cl);
+      // Now do the common part.
+      handle_evacuation_failure_common(old, m);
+      // Reset to NULL.
+      set_evac_failure_closure(NULL);
+    } else {
+      // The lock is already held, and this is recursive.
+      assert(_drain_in_progress, "This should only be the recursive case.");
+      handle_evacuation_failure_common(old, m);
+    }
+    return old;
+  } else {
+    // Someone else had a place to copy it.
+    return forward_ptr;
+  }
+}
+
+void G1CollectedHeap::handle_evacuation_failure_common(oop old, markOop m) {
+  set_evacuation_failed(true);
+
+  preserve_mark_if_necessary(old, m);
+
+  HeapRegion* r = heap_region_containing(old);
+  if (!r->evacuation_failed()) {
+    r->set_evacuation_failed(true);
+    if (G1TraceRegions) {
+      gclog_or_tty->print("evacuation failed in heap region "PTR_FORMAT" "
+                          "["PTR_FORMAT","PTR_FORMAT")\n",
+                          r, r->bottom(), r->end());
+    }
+  }
+
+  push_on_evac_failure_scan_stack(old);
+
+  if (!_drain_in_progress) {
+    // prevent recursion in copy_to_survivor_space()
+    _drain_in_progress = true;
+    drain_evac_failure_scan_stack();
+    _drain_in_progress = false;
+  }
+}
+
+void G1CollectedHeap::preserve_mark_if_necessary(oop obj, markOop m) {
+  if (m != markOopDesc::prototype()) {
+    if (_objs_with_preserved_marks == NULL) {
+      assert(_preserved_marks_of_objs == NULL, "Both or none.");
+      _objs_with_preserved_marks =
+        new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+      _preserved_marks_of_objs =
+        new (ResourceObj::C_HEAP) GrowableArray<markOop>(40, true);
+    }
+    _objs_with_preserved_marks->push(obj);
+    _preserved_marks_of_objs->push(m);
+  }
+}
+
+// *** Parallel G1 Evacuation
+
+HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose,
+                                                  size_t word_size) {
+  HeapRegion* alloc_region = _gc_alloc_regions[purpose];
+  // let the caller handle alloc failure
+  if (alloc_region == NULL) return NULL;
+
+  HeapWord* block = alloc_region->par_allocate(word_size);
+  if (block == NULL) {
+    MutexLockerEx x(par_alloc_during_gc_lock(),
+                    Mutex::_no_safepoint_check_flag);
+    block = allocate_during_gc_slow(purpose, alloc_region, true, word_size);
+  }
+  return block;
+}
+
+HeapWord*
+G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose,
+                                         HeapRegion*    alloc_region,
+                                         bool           par,
+                                         size_t         word_size) {
+  HeapWord* block = NULL;
+  // In the parallel case, a previous thread to obtain the lock may have
+  // already assigned a new gc_alloc_region.
+  if (alloc_region != _gc_alloc_regions[purpose]) {
+    assert(par, "But should only happen in parallel case.");
+    alloc_region = _gc_alloc_regions[purpose];
+    if (alloc_region == NULL) return NULL;
+    block = alloc_region->par_allocate(word_size);
+    if (block != NULL) return block;
+    // Otherwise, continue; this new region is empty, too.
+  }
+  assert(alloc_region != NULL, "We better have an allocation region");
+  // Another thread might have obtained alloc_region for the given
+  // purpose, and might be attempting to allocate in it, and might
+  // succeed.  Therefore, we can't do the "finalization" stuff on the
+  // region below until we're sure the last allocation has happened.
+  // We ensure this by allocating the remaining space with a garbage
+  // object.
+  if (par) par_allocate_remaining_space(alloc_region);
+  // Now we can do the post-GC stuff on the region.
+  alloc_region->note_end_of_copying();
+  g1_policy()->record_after_bytes(alloc_region->used());
+
+  if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) {
+    // Cannot allocate more regions for the given purpose.
+    GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(purpose);
+    // Is there an alternative?
+    if (purpose != alt_purpose) {
+      HeapRegion* alt_region = _gc_alloc_regions[alt_purpose];
+      // Has not the alternative region been aliased?
+      if (alloc_region != alt_region) {
+        // Try to allocate in the alternative region.
+        if (par) {
+          block = alt_region->par_allocate(word_size);
+        } else {
+          block = alt_region->allocate(word_size);
+        }
+        // Make an alias.
+        _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose];
+      }
+      if (block != NULL) {
+        return block;
+      }
+      // Both the allocation region and the alternative one are full
+      // and aliased, replace them with a new allocation region.
+      purpose = alt_purpose;
+    } else {
+      set_gc_alloc_region(purpose, NULL);
+      return NULL;
+    }
+  }
+
+  // Now allocate a new region for allocation.
+  alloc_region = newAllocRegionWithExpansion(purpose, word_size, false /*zero_filled*/);
+
+  // let the caller handle alloc failure
+  if (alloc_region != NULL) {
+
+    assert(check_gc_alloc_regions(), "alloc regions messed up");
+    assert(alloc_region->saved_mark_at_top(),
+           "Mark should have been saved already.");
+    // We used to assert that the region was zero-filled here, but no
+    // longer.
+
+    // This must be done last: once it's installed, other regions may
+    // allocate in it (without holding the lock.)
+    set_gc_alloc_region(purpose, alloc_region);
+
+    if (par) {
+      block = alloc_region->par_allocate(word_size);
+    } else {
+      block = alloc_region->allocate(word_size);
+    }
+    // Caller handles alloc failure.
+  } else {
+    // This sets other apis using the same old alloc region to NULL, also.
+    set_gc_alloc_region(purpose, NULL);
+  }
+  return block;  // May be NULL.
+}
+
+void G1CollectedHeap::par_allocate_remaining_space(HeapRegion* r) {
+  HeapWord* block = NULL;
+  size_t free_words;
+  do {
+    free_words = r->free()/HeapWordSize;
+    // If there's too little space, no one can allocate, so we're done.
+    if (free_words < (size_t)oopDesc::header_size()) return;
+    // Otherwise, try to claim it.
+    block = r->par_allocate(free_words);
+  } while (block == NULL);
+  SharedHeap::fill_region_with_object(MemRegion(block, free_words));
+}
+
+#define use_local_bitmaps         1
+#define verify_local_bitmaps      0
+
+#ifndef PRODUCT
+
+class GCLabBitMap;
+class GCLabBitMapClosure: public BitMapClosure {
+private:
+  ConcurrentMark* _cm;
+  GCLabBitMap*    _bitmap;
+
+public:
+  GCLabBitMapClosure(ConcurrentMark* cm,
+                     GCLabBitMap* bitmap) {
+    _cm     = cm;
+    _bitmap = bitmap;
+  }
+
+  virtual bool do_bit(size_t offset);
+};
+
+#endif // PRODUCT
+
+#define oop_buffer_length 256
+
+class GCLabBitMap: public BitMap {
+private:
+  ConcurrentMark* _cm;
+
+  int       _shifter;
+  size_t    _bitmap_word_covers_words;
+
+  // beginning of the heap
+  HeapWord* _heap_start;
+
+  // this is the actual start of the GCLab
+  HeapWord* _real_start_word;
+
+  // this is the actual end of the GCLab
+  HeapWord* _real_end_word;
+
+  // this is the first word, possibly located before the actual start
+  // of the GCLab, that corresponds to the first bit of the bitmap
+  HeapWord* _start_word;
+
+  // size of a GCLab in words
+  size_t _gclab_word_size;
+
+  static int shifter() {
+    return MinObjAlignment - 1;
+  }
+
+  // how many heap words does a single bitmap word corresponds to?
+  static size_t bitmap_word_covers_words() {
+    return BitsPerWord << shifter();
+  }
+
+  static size_t gclab_word_size() {
+    return ParallelGCG1AllocBufferSize / HeapWordSize;
+  }
+
+  static size_t bitmap_size_in_bits() {
+    size_t bits_in_bitmap = gclab_word_size() >> shifter();
+    // We are going to ensure that the beginning of a word in this
+    // bitmap also corresponds to the beginning of a word in the
+    // global marking bitmap. To handle the case where a GCLab
+    // starts from the middle of the bitmap, we need to add enough
+    // space (i.e. up to a bitmap word) to ensure that we have
+    // enough bits in the bitmap.
+    return bits_in_bitmap + BitsPerWord - 1;
+  }
+public:
+  GCLabBitMap(HeapWord* heap_start)
+    : BitMap(bitmap_size_in_bits()),
+      _cm(G1CollectedHeap::heap()->concurrent_mark()),
+      _shifter(shifter()),
+      _bitmap_word_covers_words(bitmap_word_covers_words()),
+      _heap_start(heap_start),
+      _gclab_word_size(gclab_word_size()),
+      _real_start_word(NULL),
+      _real_end_word(NULL),
+      _start_word(NULL)
+  {
+    guarantee( size_in_words() >= bitmap_size_in_words(),
+               "just making sure");
+  }
+
+  inline unsigned heapWordToOffset(HeapWord* addr) {
+    unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter;
+    assert(offset < size(), "offset should be within bounds");
+    return offset;
+  }
+
+  inline HeapWord* offsetToHeapWord(size_t offset) {
+    HeapWord* addr =  _start_word + (offset << _shifter);
+    assert(_real_start_word <= addr && addr < _real_end_word, "invariant");
+    return addr;
+  }
+
+  bool fields_well_formed() {
+    bool ret1 = (_real_start_word == NULL) &&
+                (_real_end_word == NULL) &&
+                (_start_word == NULL);
+    if (ret1)
+      return true;
+
+    bool ret2 = _real_start_word >= _start_word &&
+      _start_word < _real_end_word &&
+      (_real_start_word + _gclab_word_size) == _real_end_word &&
+      (_start_word + _gclab_word_size + _bitmap_word_covers_words)
+                                                              > _real_end_word;
+    return ret2;
+  }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (addr >= _real_start_word && addr < _real_end_word) {
+      assert(!isMarked(addr), "should not have already been marked");
+
+      // first mark it on the bitmap
+      at_put(heapWordToOffset(addr), true);
+
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool isMarked(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    return at(heapWordToOffset(addr));
+  }
+
+  void set_buffer(HeapWord* start) {
+    guarantee(use_local_bitmaps, "invariant");
+    clear();
+
+    assert(start != NULL, "invariant");
+    _real_start_word = start;
+    _real_end_word   = start + _gclab_word_size;
+
+    size_t diff =
+      pointer_delta(start, _heap_start) % _bitmap_word_covers_words;
+    _start_word = start - diff;
+
+    assert(fields_well_formed(), "invariant");
+  }
+
+#ifndef PRODUCT
+  void verify() {
+    // verify that the marks have been propagated
+    GCLabBitMapClosure cl(_cm, this);
+    iterate(&cl);
+  }
+#endif // PRODUCT
+
+  void retire() {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (_start_word != NULL) {
+      CMBitMap*       mark_bitmap = _cm->nextMarkBitMap();
+
+      // this means that the bitmap was set up for the GCLab
+      assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
+
+      mark_bitmap->mostly_disjoint_range_union(this,
+                                0, // always start from the start of the bitmap
+                                _start_word,
+                                size_in_words());
+      _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word));
+
+#ifndef PRODUCT
+      if (use_local_bitmaps && verify_local_bitmaps)
+        verify();
+#endif // PRODUCT
+    } else {
+      assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
+    }
+  }
+
+  static size_t bitmap_size_in_words() {
+    return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord;
+  }
+};
+
+#ifndef PRODUCT
+
+bool GCLabBitMapClosure::do_bit(size_t offset) {
+  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
+  guarantee(_cm->isMarked(oop(addr)), "it should be!");
+  return true;
+}
+
+#endif // PRODUCT
+
+class G1ParGCAllocBuffer: public ParGCAllocBuffer {
+private:
+  bool        _retired;
+  bool        _during_marking;
+  GCLabBitMap _bitmap;
+
+public:
+  G1ParGCAllocBuffer() :
+    ParGCAllocBuffer(ParallelGCG1AllocBufferSize / HeapWordSize),
+    _during_marking(G1CollectedHeap::heap()->mark_in_progress()),
+    _bitmap(G1CollectedHeap::heap()->reserved_region().start()),
+    _retired(false)
+  { }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(_during_marking, "invariant");
+    return _bitmap.mark(addr);
+  }
+
+  inline void set_buf(HeapWord* buf) {
+    if (use_local_bitmaps && _during_marking)
+      _bitmap.set_buffer(buf);
+    ParGCAllocBuffer::set_buf(buf);
+    _retired = false;
+  }
+
+  inline void retire(bool end_of_gc, bool retain) {
+    if (_retired)
+      return;
+    if (use_local_bitmaps && _during_marking) {
+      _bitmap.retire();
+    }
+    ParGCAllocBuffer::retire(end_of_gc, retain);
+    _retired = true;
+  }
+};
+
+
+class G1ParScanThreadState : public StackObj {
+protected:
+  G1CollectedHeap* _g1h;
+  RefToScanQueue*  _refs;
+
+  typedef GrowableArray<oop*> OverflowQueue;
+  OverflowQueue* _overflowed_refs;
+
+  G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+
+  size_t           _alloc_buffer_waste;
+  size_t           _undo_waste;
+
+  OopsInHeapRegionClosure*      _evac_failure_cl;
+  G1ParScanHeapEvacClosure*     _evac_cl;
+  G1ParScanPartialArrayClosure* _partial_scan_cl;
+
+  int _hash_seed;
+  int _queue_num;
+
+  int _term_attempts;
+#if G1_DETAILED_STATS
+  int _pushes, _pops, _steals, _steal_attempts;
+  int _overflow_pushes;
+#endif
+
+  double _start;
+  double _start_strong_roots;
+  double _strong_roots_time;
+  double _start_term;
+  double _term_time;
+
+  // Map from young-age-index (0 == not young, 1 is youngest) to
+  // surviving words. base is what we get back from the malloc call
+  size_t* _surviving_young_words_base;
+  // this points into the array, as we use the first few entries for padding
+  size_t* _surviving_young_words;
+
+#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+
+  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
+
+  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
+
+public:
+  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
+    : _g1h(g1h),
+      _refs(g1h->task_queue(queue_num)),
+      _hash_seed(17), _queue_num(queue_num),
+      _term_attempts(0),
+#if G1_DETAILED_STATS
+      _pushes(0), _pops(0), _steals(0),
+      _steal_attempts(0),  _overflow_pushes(0),
+#endif
+      _strong_roots_time(0), _term_time(0),
+      _alloc_buffer_waste(0), _undo_waste(0)
+  {
+    // we allocate G1YoungSurvRateNumRegions plus one entries, since
+    // we "sacrifice" entry 0 to keep track of surviving bytes for
+    // non-young regions (where the age is -1)
+    // We also add a few elements at the beginning and at the end in
+    // an attempt to eliminate cache contention
+    size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
+    size_t array_length = PADDING_ELEM_NUM +
+                          real_length +
+                          PADDING_ELEM_NUM;
+    _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length);
+    if (_surviving_young_words_base == NULL)
+      vm_exit_out_of_memory(array_length * sizeof(size_t),
+                            "Not enough space for young surv histo.");
+    _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
+    memset(_surviving_young_words, 0, real_length * sizeof(size_t));
+
+    _overflowed_refs = new OverflowQueue(10);
+
+    _start = os::elapsedTime();
+  }
+
+  ~G1ParScanThreadState() {
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
+  }
+
+  RefToScanQueue*   refs()            { return _refs;             }
+  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
+
+  inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+    return &_alloc_buffers[purpose];
+  }
+
+  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
+  size_t undo_waste()                            { return _undo_waste; }
+
+  void push_on_queue(oop* ref) {
+    if (!refs()->push(ref)) {
+      overflowed_refs()->push(ref);
+      IF_G1_DETAILED_STATS(note_overflow_push());
+    } else {
+      IF_G1_DETAILED_STATS(note_push());
+    }
+  }
+
+  void pop_from_queue(oop*& ref) {
+    if (!refs()->pop_local(ref)) {
+      ref = NULL;
+    } else {
+      IF_G1_DETAILED_STATS(note_pop());
+    }
+  }
+
+  void pop_from_overflow_queue(oop*& ref) {
+    ref = overflowed_refs()->pop();
+  }
+
+  int refs_to_scan()                             { return refs()->size();                 }
+  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+
+  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
+
+    HeapWord* obj = NULL;
+    if (word_sz * 100 <
+        (size_t)(ParallelGCG1AllocBufferSize / HeapWordSize) *
+                                                  ParallelGCBufferWastePct) {
+      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
+      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
+      alloc_buf->retire(false, false);
+
+      HeapWord* buf =
+        _g1h->par_allocate_during_gc(purpose, ParallelGCG1AllocBufferSize / HeapWordSize);
+      if (buf == NULL) return NULL; // Let caller handle allocation failure.
+      // Otherwise.
+      alloc_buf->set_buf(buf);
+
+      obj = alloc_buf->allocate(word_sz);
+      assert(obj != NULL, "buffer was definitely big enough...");
+    }
+    else {
+      obj = _g1h->par_allocate_during_gc(purpose, word_sz);
+    }
+    return obj;
+  }
+
+  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) {
+    HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+    if (obj != NULL) return obj;
+    return allocate_slow(purpose, word_sz);
+  }
+
+  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
+    if (alloc_buffer(purpose)->contains(obj)) {
+      guarantee(alloc_buffer(purpose)->contains(obj + word_sz - 1),
+                "should contain whole object");
+      alloc_buffer(purpose)->undo_allocation(obj, word_sz);
+    }
+    else {
+      SharedHeap::fill_region_with_object(MemRegion(obj, word_sz));
+      add_to_undo_waste(word_sz);
+    }
+  }
+
+  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
+    _evac_failure_cl = evac_failure_cl;
+  }
+  OopsInHeapRegionClosure* evac_failure_closure() {
+    return _evac_failure_cl;
+  }
+
+  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
+    _evac_cl = evac_cl;
+  }
+
+  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
+    _partial_scan_cl = partial_scan_cl;
+  }
+
+  int* hash_seed() { return &_hash_seed; }
+  int  queue_num() { return _queue_num; }
+
+  int term_attempts()   { return _term_attempts; }
+  void note_term_attempt()  { _term_attempts++; }
+
+#if G1_DETAILED_STATS
+  int pushes()          { return _pushes; }
+  int pops()            { return _pops; }
+  int steals()          { return _steals; }
+  int steal_attempts()  { return _steal_attempts; }
+  int overflow_pushes() { return _overflow_pushes; }
+
+  void note_push()          { _pushes++; }
+  void note_pop()           { _pops++; }
+  void note_steal()         { _steals++; }
+  void note_steal_attempt() { _steal_attempts++; }
+  void note_overflow_push() { _overflow_pushes++; }
+#endif
+
+  void start_strong_roots() {
+    _start_strong_roots = os::elapsedTime();
+  }
+  void end_strong_roots() {
+    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
+  }
+  double strong_roots_time() { return _strong_roots_time; }
+
+  void start_term_time() {
+    note_term_attempt();
+    _start_term = os::elapsedTime();
+  }
+  void end_term_time() {
+    _term_time += (os::elapsedTime() - _start_term);
+  }
+  double term_time() { return _term_time; }
+
+  double elapsed() {
+    return os::elapsedTime() - _start;
+  }
+
+  size_t* surviving_young_words() {
+    // We add on to hide entry 0 which accumulates surviving words for
+    // age -1 regions (i.e. non-young ones)
+    return _surviving_young_words;
+  }
+
+  void retire_alloc_buffers() {
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      size_t waste = _alloc_buffers[ap].words_remaining();
+      add_to_alloc_buffer_waste(waste);
+      _alloc_buffers[ap].retire(true, false);
+    }
+  }
+
+  void trim_queue() {
+    while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
+      oop *ref_to_scan = NULL;
+      if (overflowed_refs_to_scan() == 0) {
+        pop_from_queue(ref_to_scan);
+      } else {
+        pop_from_overflow_queue(ref_to_scan);
+      }
+      if (ref_to_scan != NULL) {
+        if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) {
+          _partial_scan_cl->do_oop_nv(ref_to_scan);
+        } else {
+          // Note: we can use "raw" versions of "region_containing" because
+          // "obj_to_scan" is definitely in the heap, and is not in a
+          // humongous region.
+          HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+          _evac_cl->set_region(r);
+          _evac_cl->do_oop_nv(ref_to_scan);
+        }
+      }
+    }
+  }
+};
+
+
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+  _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
+  _par_scan_state(par_scan_state) { }
+
+// This closure is applied to the fields of the objects that have just been copied.
+// Should probably be made inline and moved in g1OopClosures.inline.hpp.
+void G1ParScanClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL) {
+    if (_g1->obj_in_cs(obj)) {
+      if (obj->is_forwarded()) {
+        *p = obj->forwardee();
+      } else {
+        _par_scan_state->push_on_queue(p);
+        return;
+      }
+    }
+    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+  }
+}
+
+void G1ParCopyHelper::mark_forwardee(oop* p) {
+  // This is called _after_ do_oop_work has been called, hence after
+  // the object has been relocated to its new location and *p points
+  // to its new location.
+
+  oop thisOop = *p;
+  if (thisOop != NULL) {
+    assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(thisOop)),
+           "shouldn't still be in the CSet if evacuation didn't fail.");
+    HeapWord* addr = (HeapWord*)thisOop;
+    if (_g1->is_in_g1_reserved(addr))
+      _cm->grayRoot(oop(addr));
+  }
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
+  size_t    word_sz = old->size();
+  HeapRegion* from_region = _g1->heap_region_containing_raw(old);
+  // +1 to make the -1 indexes valid...
+  int       young_index = from_region->young_index_in_cset()+1;
+  assert( (from_region->is_young() && young_index > 0) ||
+          (!from_region->is_young() && young_index == 0), "invariant" );
+  G1CollectorPolicy* g1p = _g1->g1_policy();
+  markOop m = old->mark();
+  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(),
+                                                             word_sz);
+  HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
+  oop       obj     = oop(obj_ptr);
+
+  if (obj_ptr == NULL) {
+    // This will either forward-to-self, or detect that someone else has
+    // installed a forwarding pointer.
+    OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
+    return _g1->handle_evacuation_failure_par(cl, old);
+  }
+
+  oop forward_ptr = old->forward_to_atomic(obj);
+  if (forward_ptr == NULL) {
+    Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
+    obj->set_mark(m);
+    if (g1p->track_object_age(alloc_purpose)) {
+      obj->incr_age();
+    }
+    // preserve "next" mark bit
+    if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
+      if (!use_local_bitmaps ||
+          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
+        // if we couldn't mark it on the local bitmap (this happens when
+        // the object was not allocated in the GCLab), we have to bite
+        // the bullet and do the standard parallel mark
+        _cm->markAndGrayObjectIfNecessary(obj);
+      }
+#if 1
+      if (_g1->isMarkedNext(old)) {
+        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
+      }
+#endif
+    }
+
+    size_t* surv_young_words = _par_scan_state->surviving_young_words();
+    surv_young_words[young_index] += word_sz;
+
+    if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
+      arrayOop(old)->set_length(0);
+      _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK));
+    } else {
+      _scanner->set_region(_g1->heap_region_containing(obj));
+      obj->oop_iterate_backwards(_scanner);
+    }
+  } else {
+    _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz);
+    obj = forward_ptr;
+  }
+  return obj;
+}
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_forwardee>::do_oop_work(oop* p) {
+  oop obj = *p;
+  assert(barrier != G1BarrierRS || obj != NULL,
+         "Precondition: G1BarrierRS implies obj is nonNull");
+
+  if (obj != NULL) {
+    if (_g1->obj_in_cs(obj)) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.",
+                             p, (void*) obj);
+#endif
+      if (obj->is_forwarded()) {
+        *p = obj->forwardee();
+      } else {
+        *p = copy_to_survivor_space(obj);
+      }
+      // When scanning the RS, we only care about objs in CS.
+      if (barrier == G1BarrierRS) {
+        _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+      }
+    }
+    // When scanning moved objs, must look at all oops.
+    if (barrier == G1BarrierEvac) {
+      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+    }
+
+    if (do_gen_barrier) {
+      par_do_barrier(p);
+    }
+  }
+}
+
+template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
+
+template <class T> void G1ParScanPartialArrayClosure::process_array_chunk(
+  oop obj, int start, int end) {
+  // process our set of indices (include header in first chunk)
+  assert(start < end, "invariant");
+  T* const base      = (T*)objArrayOop(obj)->base();
+  T* const start_addr = base + start;
+  T* const end_addr   = base + end;
+  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
+  _scanner.set_region(_g1->heap_region_containing(obj));
+  obj->oop_iterate(&_scanner, mr);
+}
+
+void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
+  assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops");
+  oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK);
+  assert(old->is_objArray(), "must be obj array");
+  assert(old->is_forwarded(), "must be forwarded");
+  assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
+
+  objArrayOop obj = objArrayOop(old->forwardee());
+  assert((void*)old != (void*)old->forwardee(), "self forwarding here?");
+  // Process ParGCArrayScanChunk elements now
+  // and push the remainder back onto queue
+  int start     = arrayOop(old)->length();
+  int end       = obj->length();
+  int remainder = end - start;
+  assert(start <= end, "just checking");
+  if (remainder > 2 * ParGCArrayScanChunk) {
+    // Test above combines last partial chunk with a full chunk
+    end = start + ParGCArrayScanChunk;
+    arrayOop(old)->set_length(end);
+    // Push remainder.
+    _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK));
+  } else {
+    // Restore length so that the heap remains parsable in
+    // case of evacuation failure.
+    arrayOop(old)->set_length(end);
+  }
+
+  // process our set of indices (include header in first chunk)
+  process_array_chunk<oop>(obj, start, end);
+  oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr<oop>(start);
+  oop* end_addr   = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length
+  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
+  _scanner.set_region(_g1->heap_region_containing(obj));
+  obj->oop_iterate(&_scanner, mr);
+}
+
+int G1ScanAndBalanceClosure::_nq = 0;
+
+class G1ParEvacuateFollowersClosure : public VoidClosure {
+protected:
+  G1CollectedHeap*              _g1h;
+  G1ParScanThreadState*         _par_scan_state;
+  RefToScanQueueSet*            _queues;
+  ParallelTaskTerminator*       _terminator;
+
+  G1ParScanThreadState*   par_scan_state() { return _par_scan_state; }
+  RefToScanQueueSet*      queues()         { return _queues; }
+  ParallelTaskTerminator* terminator()     { return _terminator; }
+
+public:
+  G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h,
+                                G1ParScanThreadState* par_scan_state,
+                                RefToScanQueueSet* queues,
+                                ParallelTaskTerminator* terminator)
+    : _g1h(g1h), _par_scan_state(par_scan_state),
+      _queues(queues), _terminator(terminator) {}
+
+  void do_void() {
+    G1ParScanThreadState* pss = par_scan_state();
+    while (true) {
+      oop* ref_to_scan;
+      pss->trim_queue();
+      IF_G1_DETAILED_STATS(pss->note_steal_attempt());
+      if (queues()->steal(pss->queue_num(),
+                          pss->hash_seed(),
+                          ref_to_scan)) {
+        IF_G1_DETAILED_STATS(pss->note_steal());
+        pss->push_on_queue(ref_to_scan);
+        continue;
+      }
+      pss->start_term_time();
+      if (terminator()->offer_termination()) break;
+      pss->end_term_time();
+    }
+    pss->end_term_time();
+    pss->retire_alloc_buffers();
+  }
+};
+
+class G1ParTask : public AbstractGangTask {
+protected:
+  G1CollectedHeap*       _g1h;
+  RefToScanQueueSet      *_queues;
+  ParallelTaskTerminator _terminator;
+
+  Mutex _stats_lock;
+  Mutex* stats_lock() { return &_stats_lock; }
+
+  size_t getNCards() {
+    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
+      / G1BlockOffsetSharedArray::N_bytes;
+  }
+
+public:
+  G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues)
+    : AbstractGangTask("G1 collection"),
+      _g1h(g1h),
+      _queues(task_queues),
+      _terminator(workers, _queues),
+      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
+  {}
+
+  RefToScanQueueSet* queues() { return _queues; }
+
+  RefToScanQueue *work_queue(int i) {
+    return queues()->queue(i);
+  }
+
+  void work(int i) {
+    ResourceMark rm;
+    HandleMark   hm;
+
+    G1ParScanThreadState pss(_g1h, i);
+    G1ParScanHeapEvacClosure     scan_evac_cl(_g1h, &pss);
+    G1ParScanHeapEvacClosure     evac_failure_cl(_g1h, &pss);
+    G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss);
+
+    pss.set_evac_closure(&scan_evac_cl);
+    pss.set_evac_failure_closure(&evac_failure_cl);
+    pss.set_partial_scan_closure(&partial_scan_cl);
+
+    G1ParScanExtRootClosure         only_scan_root_cl(_g1h, &pss);
+    G1ParScanPermClosure            only_scan_perm_cl(_g1h, &pss);
+    G1ParScanHeapRSClosure          only_scan_heap_rs_cl(_g1h, &pss);
+    G1ParScanAndMarkExtRootClosure  scan_mark_root_cl(_g1h, &pss);
+    G1ParScanAndMarkPermClosure     scan_mark_perm_cl(_g1h, &pss);
+    G1ParScanAndMarkHeapRSClosure   scan_mark_heap_rs_cl(_g1h, &pss);
+
+    OopsInHeapRegionClosure        *scan_root_cl;
+    OopsInHeapRegionClosure        *scan_perm_cl;
+    OopsInHeapRegionClosure        *scan_so_cl;
+
+    if (_g1h->g1_policy()->should_initiate_conc_mark()) {
+      scan_root_cl = &scan_mark_root_cl;
+      scan_perm_cl = &scan_mark_perm_cl;
+      scan_so_cl   = &scan_mark_heap_rs_cl;
+    } else {
+      scan_root_cl = &only_scan_root_cl;
+      scan_perm_cl = &only_scan_perm_cl;
+      scan_so_cl   = &only_scan_heap_rs_cl;
+    }
+
+    pss.start_strong_roots();
+    _g1h->g1_process_strong_roots(/* not collecting perm */ false,
+                                  SharedHeap::SO_AllClasses,
+                                  scan_root_cl,
+                                  &only_scan_heap_rs_cl,
+                                  scan_so_cl,
+                                  scan_perm_cl,
+                                  i);
+    pss.end_strong_roots();
+    {
+      double start = os::elapsedTime();
+      G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
+      evac.do_void();
+      double elapsed_ms = (os::elapsedTime()-start)*1000.0;
+      double term_ms = pss.term_time()*1000.0;
+      _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
+      _g1h->g1_policy()->record_termination_time(i, term_ms);
+    }
+    _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
+
+    // Clean up any par-expanded rem sets.
+    HeapRegionRemSet::par_cleanup();
+
+    MutexLocker x(stats_lock());
+    if (ParallelGCVerbose) {
+      gclog_or_tty->print("Thread %d complete:\n", i);
+#if G1_DETAILED_STATS
+      gclog_or_tty->print("  Pushes: %7d    Pops: %7d   Overflows: %7d   Steals %7d (in %d attempts)\n",
+                          pss.pushes(),
+                          pss.pops(),
+                          pss.overflow_pushes(),
+                          pss.steals(),
+                          pss.steal_attempts());
+#endif
+      double elapsed      = pss.elapsed();
+      double strong_roots = pss.strong_roots_time();
+      double term         = pss.term_time();
+      gclog_or_tty->print("  Elapsed: %7.2f ms.\n"
+                          "    Strong roots: %7.2f ms (%6.2f%%)\n"
+                          "    Termination:  %7.2f ms (%6.2f%%) (in %d entries)\n",
+                          elapsed * 1000.0,
+                          strong_roots * 1000.0, (strong_roots*100.0/elapsed),
+                          term * 1000.0, (term*100.0/elapsed),
+                          pss.term_attempts());
+      size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste();
+      gclog_or_tty->print("  Waste: %8dK\n"
+                 "    Alloc Buffer: %8dK\n"
+                 "    Undo: %8dK\n",
+                 (total_waste * HeapWordSize) / K,
+                 (pss.alloc_buffer_waste() * HeapWordSize) / K,
+                 (pss.undo_waste() * HeapWordSize) / K);
+    }
+
+    assert(pss.refs_to_scan() == 0, "Task queue should be empty");
+    assert(pss.overflowed_refs_to_scan() == 0, "Overflow queue should be empty");
+  }
+};
+
+// *** Common G1 Evacuation Stuff
+
+class G1CountClosure: public OopsInHeapRegionClosure {
+public:
+  int n;
+  G1CountClosure() : n(0) {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    assert(obj != NULL && G1CollectedHeap::heap()->obj_in_cs(obj),
+           "Rem set closure called on non-rem-set pointer.");
+    n++;
+  }
+};
+
+static G1CountClosure count_closure;
+
+void
+G1CollectedHeap::
+g1_process_strong_roots(bool collecting_perm_gen,
+                        SharedHeap::ScanningOption so,
+                        OopClosure* scan_non_heap_roots,
+                        OopsInHeapRegionClosure* scan_rs,
+                        OopsInHeapRegionClosure* scan_so,
+                        OopsInGenClosure* scan_perm,
+                        int worker_i) {
+  // First scan the strong roots, including the perm gen.
+  double ext_roots_start = os::elapsedTime();
+  double closure_app_time_sec = 0.0;
+
+  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
+  BufferingOopsInGenClosure buf_scan_perm(scan_perm);
+  buf_scan_perm.set_generation(perm_gen());
+
+  process_strong_roots(collecting_perm_gen, so,
+                       &buf_scan_non_heap_roots,
+                       &buf_scan_perm);
+  // Finish up any enqueued closure apps.
+  buf_scan_non_heap_roots.done();
+  buf_scan_perm.done();
+  double ext_roots_end = os::elapsedTime();
+  g1_policy()->reset_obj_copy_time(worker_i);
+  double obj_copy_time_sec =
+    buf_scan_non_heap_roots.closure_app_seconds() +
+    buf_scan_perm.closure_app_seconds();
+  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
+  double ext_root_time_ms =
+    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
+  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
+
+  // Scan strong roots in mark stack.
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
+    concurrent_mark()->oops_do(scan_non_heap_roots);
+  }
+  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+
+  // XXX What should this be doing in the parallel case?
+  g1_policy()->record_collection_pause_end_CH_strong_roots();
+  if (G1VerifyRemSet) {
+    // :::: FIXME ::::
+    // The stupid remembered set doesn't know how to filter out dead
+    // objects, which the smart one does, and so when it is created
+    // and then compared the number of entries in each differs and
+    // the verification code fails.
+    guarantee(false, "verification code is broken, see note");
+
+    // Let's make sure that the current rem set agrees with the stupidest
+    // one possible!
+    bool refs_enabled = ref_processor()->discovery_enabled();
+    if (refs_enabled) ref_processor()->disable_discovery();
+    StupidG1RemSet stupid(this);
+    count_closure.n = 0;
+    stupid.oops_into_collection_set_do(&count_closure, worker_i);
+    int stupid_n = count_closure.n;
+    count_closure.n = 0;
+    g1_rem_set()->oops_into_collection_set_do(&count_closure, worker_i);
+    guarantee(count_closure.n == stupid_n, "Old and new rem sets differ.");
+    gclog_or_tty->print_cr("\nFound %d pointers in heap RS.", count_closure.n);
+    if (refs_enabled) ref_processor()->enable_discovery();
+  }
+  if (scan_so != NULL) {
+    scan_scan_only_set(scan_so, worker_i);
+  }
+  // Now scan the complement of the collection set.
+  if (scan_rs != NULL) {
+    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
+  }
+  // Finish with the ref_processor roots.
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
+    ref_processor()->oops_do(scan_non_heap_roots);
+  }
+  g1_policy()->record_collection_pause_end_G1_strong_roots();
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void
+G1CollectedHeap::scan_scan_only_region(HeapRegion* r,
+                                       OopsInHeapRegionClosure* oc,
+                                       int worker_i) {
+  HeapWord* startAddr = r->bottom();
+  HeapWord* endAddr = r->used_region().end();
+
+  oc->set_region(r);
+
+  HeapWord* p = r->bottom();
+  HeapWord* t = r->top();
+  guarantee( p == r->next_top_at_mark_start(), "invariant" );
+  while (p < t) {
+    oop obj = oop(p);
+    p += obj->oop_iterate(oc);
+  }
+}
+
+void
+G1CollectedHeap::scan_scan_only_set(OopsInHeapRegionClosure* oc,
+                                    int worker_i) {
+  double start = os::elapsedTime();
+
+  BufferingOopsInHeapRegionClosure boc(oc);
+
+  FilterInHeapRegionAndIntoCSClosure scan_only(this, &boc);
+  FilterAndMarkInHeapRegionAndIntoCSClosure scan_and_mark(this, &boc, concurrent_mark());
+
+  OopsInHeapRegionClosure *foc;
+  if (g1_policy()->should_initiate_conc_mark())
+    foc = &scan_and_mark;
+  else
+    foc = &scan_only;
+
+  HeapRegion* hr;
+  int n = 0;
+  while ((hr = _young_list->par_get_next_scan_only_region()) != NULL) {
+    scan_scan_only_region(hr, foc, worker_i);
+    ++n;
+  }
+  boc.done();
+
+  double closure_app_s = boc.closure_app_seconds();
+  g1_policy()->record_obj_copy_time(worker_i, closure_app_s * 1000.0);
+  double ms = (os::elapsedTime() - start - closure_app_s)*1000.0;
+  g1_policy()->record_scan_only_time(worker_i, ms, n);
+}
+
+void
+G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure,
+                                       OopClosure* non_root_closure) {
+  SharedHeap::process_weak_roots(root_closure, non_root_closure);
+}
+
+
+class SaveMarksClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    r->save_marks();
+    return false;
+  }
+};
+
+void G1CollectedHeap::save_marks() {
+  if (ParallelGCThreads == 0) {
+    SaveMarksClosure sm;
+    heap_region_iterate(&sm);
+  }
+  // We do this even in the parallel case
+  perm_gen()->save_marks();
+}
+
+void G1CollectedHeap::evacuate_collection_set() {
+  set_evacuation_failed(false);
+
+  g1_rem_set()->prepare_for_oops_into_collection_set_do();
+  concurrent_g1_refine()->set_use_cache(false);
+  int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
+
+  set_par_threads(n_workers);
+  G1ParTask g1_par_task(this, n_workers, _task_queues);
+
+  init_for_evac_failure(NULL);
+
+  change_strong_roots_parity();  // In preparation for parallel strong roots.
+  rem_set()->prepare_for_younger_refs_iterate(true);
+  double start_par = os::elapsedTime();
+
+  if (ParallelGCThreads > 0) {
+    // The individual threads will set their evac-failure closures.
+    workers()->run_task(&g1_par_task);
+  } else {
+    g1_par_task.work(0);
+  }
+
+  double par_time = (os::elapsedTime() - start_par) * 1000.0;
+  g1_policy()->record_par_time(par_time);
+  set_par_threads(0);
+  // Is this the right thing to do here?  We don't save marks
+  // on individual heap regions when we allocate from
+  // them in parallel, so this seems like the correct place for this.
+  all_alloc_regions_note_end_of_copying();
+  {
+    G1IsAliveClosure is_alive(this);
+    G1KeepAliveClosure keep_alive(this);
+    JNIHandles::weak_oops_do(&is_alive, &keep_alive);
+  }
+
+  g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+  concurrent_g1_refine()->set_use_cache(true);
+
+  finalize_for_evac_failure();
+
+  // Must do this before removing self-forwarding pointers, which clears
+  // the per-region evac-failure flags.
+  concurrent_mark()->complete_marking_in_collection_set();
+
+  if (evacuation_failed()) {
+    remove_self_forwarding_pointers();
+
+    if (PrintGCDetails) {
+      gclog_or_tty->print(" (evacuation failed)");
+    } else if (PrintGC) {
+      gclog_or_tty->print("--");
+    }
+  }
+
+  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+}
+
+void G1CollectedHeap::free_region(HeapRegion* hr) {
+  size_t pre_used = 0;
+  size_t cleared_h_regions = 0;
+  size_t freed_regions = 0;
+  UncleanRegionList local_list;
+
+  HeapWord* start = hr->bottom();
+  HeapWord* end   = hr->prev_top_at_mark_start();
+  size_t used_bytes = hr->used();
+  size_t live_bytes = hr->max_live_bytes();
+  if (used_bytes > 0) {
+    guarantee( live_bytes <= used_bytes, "invariant" );
+  } else {
+    guarantee( live_bytes == 0, "invariant" );
+  }
+
+  size_t garbage_bytes = used_bytes - live_bytes;
+  if (garbage_bytes > 0)
+    g1_policy()->decrease_known_garbage_bytes(garbage_bytes);
+
+  free_region_work(hr, pre_used, cleared_h_regions, freed_regions,
+                   &local_list);
+  finish_free_region_work(pre_used, cleared_h_regions, freed_regions,
+                          &local_list);
+}
+
+void
+G1CollectedHeap::free_region_work(HeapRegion* hr,
+                                  size_t& pre_used,
+                                  size_t& cleared_h_regions,
+                                  size_t& freed_regions,
+                                  UncleanRegionList* list,
+                                  bool par) {
+  assert(!hr->popular(), "should not free popular regions");
+  pre_used += hr->used();
+  if (hr->isHumongous()) {
+    assert(hr->startsHumongous(),
+           "Only the start of a humongous region should be freed.");
+    int ind = _hrs->find(hr);
+    assert(ind != -1, "Should have an index.");
+    // Clear the start region.
+    hr->hr_clear(par, true /*clear_space*/);
+    list->insert_before_head(hr);
+    cleared_h_regions++;
+    freed_regions++;
+    // Clear any continued regions.
+    ind++;
+    while ((size_t)ind < n_regions()) {
+      HeapRegion* hrc = _hrs->at(ind);
+      if (!hrc->continuesHumongous()) break;
+      // Otherwise, does continue the H region.
+      assert(hrc->humongous_start_region() == hr, "Huh?");
+      hrc->hr_clear(par, true /*clear_space*/);
+      cleared_h_regions++;
+      freed_regions++;
+      list->insert_before_head(hrc);
+      ind++;
+    }
+  } else {
+    hr->hr_clear(par, true /*clear_space*/);
+    list->insert_before_head(hr);
+    freed_regions++;
+    // If we're using clear2, this should not be enabled.
+    // assert(!hr->in_cohort(), "Can't be both free and in a cohort.");
+  }
+}
+
+void G1CollectedHeap::finish_free_region_work(size_t pre_used,
+                                              size_t cleared_h_regions,
+                                              size_t freed_regions,
+                                              UncleanRegionList* list) {
+  if (list != NULL && list->sz() > 0) {
+    prepend_region_list_on_unclean_list(list);
+  }
+  // Acquire a lock, if we're parallel, to update possibly-shared
+  // variables.
+  Mutex* lock = (n_par_threads() > 0) ? ParGCRareEvent_lock : NULL;
+  {
+    MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
+    _summary_bytes_used -= pre_used;
+    _num_humongous_regions -= (int) cleared_h_regions;
+    _free_regions += freed_regions;
+  }
+}
+
+
+void G1CollectedHeap::dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list) {
+  while (list != NULL) {
+    guarantee( list->is_young(), "invariant" );
+
+    HeapWord* bottom = list->bottom();
+    HeapWord* end = list->end();
+    MemRegion mr(bottom, end);
+    ct_bs->dirty(mr);
+
+    list = list->get_next_young_region();
+  }
+}
+
+void G1CollectedHeap::cleanUpCardTable() {
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
+  double start = os::elapsedTime();
+
+  ct_bs->clear(_g1_committed);
+
+  // now, redirty the cards of the scan-only and survivor regions
+  // (it seemed faster to do it this way, instead of iterating over
+  // all regions and then clearing / dirtying as approprite)
+  dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region());
+  dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region());
+
+  double elapsed = os::elapsedTime() - start;
+  g1_policy()->record_clear_ct_time( elapsed * 1000.0);
+}
+
+
+void G1CollectedHeap::do_collection_pause_if_appropriate(size_t word_size) {
+  // First do any popular regions.
+  HeapRegion* hr;
+  while ((hr = popular_region_to_evac()) != NULL) {
+    evac_popular_region(hr);
+  }
+  // Now do heuristic pauses.
+  if (g1_policy()->should_do_collection_pause(word_size)) {
+    do_collection_pause();
+  }
+}
+
+void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) {
+  double young_time_ms     = 0.0;
+  double non_young_time_ms = 0.0;
+
+  G1CollectorPolicy* policy = g1_policy();
+
+  double start_sec = os::elapsedTime();
+  bool non_young = true;
+
+  HeapRegion* cur = cs_head;
+  int age_bound = -1;
+  size_t rs_lengths = 0;
+
+  while (cur != NULL) {
+    if (non_young) {
+      if (cur->is_young()) {
+        double end_sec = os::elapsedTime();
+        double elapsed_ms = (end_sec - start_sec) * 1000.0;
+        non_young_time_ms += elapsed_ms;
+
+        start_sec = os::elapsedTime();
+        non_young = false;
+      }
+    } else {
+      if (!cur->is_on_free_list()) {
+        double end_sec = os::elapsedTime();
+        double elapsed_ms = (end_sec - start_sec) * 1000.0;
+        young_time_ms += elapsed_ms;
+
+        start_sec = os::elapsedTime();
+        non_young = true;
+      }
+    }
+
+    rs_lengths += cur->rem_set()->occupied();
+
+    HeapRegion* next = cur->next_in_collection_set();
+    assert(cur->in_collection_set(), "bad CS");
+    cur->set_next_in_collection_set(NULL);
+    cur->set_in_collection_set(false);
+
+    if (cur->is_young()) {
+      int index = cur->young_index_in_cset();
+      guarantee( index != -1, "invariant" );
+      guarantee( (size_t)index < policy->young_cset_length(), "invariant" );
+      size_t words_survived = _surviving_young_words[index];
+      cur->record_surv_words_in_group(words_survived);
+    } else {
+      int index = cur->young_index_in_cset();
+      guarantee( index == -1, "invariant" );
+    }
+
+    assert( (cur->is_young() && cur->young_index_in_cset() > -1) ||
+            (!cur->is_young() && cur->young_index_in_cset() == -1),
+            "invariant" );
+
+    if (!cur->evacuation_failed()) {
+      // And the region is empty.
+      assert(!cur->is_empty(),
+             "Should not have empty regions in a CS.");
+      free_region(cur);
+    } else {
+      guarantee( !cur->is_scan_only(), "should not be scan only" );
+      cur->uninstall_surv_rate_group();
+      if (cur->is_young())
+        cur->set_young_index_in_cset(-1);
+      cur->set_not_young();
+      cur->set_evacuation_failed(false);
+    }
+    cur = next;
+  }
+
+  policy->record_max_rs_lengths(rs_lengths);
+  policy->cset_regions_freed();
+
+  double end_sec = os::elapsedTime();
+  double elapsed_ms = (end_sec - start_sec) * 1000.0;
+  if (non_young)
+    non_young_time_ms += elapsed_ms;
+  else
+    young_time_ms += elapsed_ms;
+
+  policy->record_young_free_cset_time_ms(young_time_ms);
+  policy->record_non_young_free_cset_time_ms(non_young_time_ms);
+}
+
+HeapRegion*
+G1CollectedHeap::alloc_region_from_unclean_list_locked(bool zero_filled) {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  HeapRegion* res = pop_unclean_region_list_locked();
+  if (res != NULL) {
+    assert(!res->continuesHumongous() &&
+           res->zero_fill_state() != HeapRegion::Allocated,
+           "Only free regions on unclean list.");
+    if (zero_filled) {
+      res->ensure_zero_filled_locked();
+      res->set_zero_fill_allocated();
+    }
+  }
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::alloc_region_from_unclean_list(bool zero_filled) {
+  MutexLockerEx zx(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return alloc_region_from_unclean_list_locked(zero_filled);
+}
+
+void G1CollectedHeap::put_region_on_unclean_list(HeapRegion* r) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  put_region_on_unclean_list_locked(r);
+  if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread.
+}
+
+void G1CollectedHeap::set_unclean_regions_coming(bool b) {
+  MutexLockerEx x(Cleanup_mon);
+  set_unclean_regions_coming_locked(b);
+}
+
+void G1CollectedHeap::set_unclean_regions_coming_locked(bool b) {
+  assert(Cleanup_mon->owned_by_self(), "Precondition");
+  _unclean_regions_coming = b;
+  // Wake up mutator threads that might be waiting for completeCleanup to
+  // finish.
+  if (!b) Cleanup_mon->notify_all();
+}
+
+void G1CollectedHeap::wait_for_cleanup_complete() {
+  MutexLockerEx x(Cleanup_mon);
+  wait_for_cleanup_complete_locked();
+}
+
+void G1CollectedHeap::wait_for_cleanup_complete_locked() {
+  assert(Cleanup_mon->owned_by_self(), "precondition");
+  while (_unclean_regions_coming) {
+    Cleanup_mon->wait();
+  }
+}
+
+void
+G1CollectedHeap::put_region_on_unclean_list_locked(HeapRegion* r) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  _unclean_region_list.insert_before_head(r);
+}
+
+void
+G1CollectedHeap::prepend_region_list_on_unclean_list(UncleanRegionList* list) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  prepend_region_list_on_unclean_list_locked(list);
+  if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread.
+}
+
+void
+G1CollectedHeap::
+prepend_region_list_on_unclean_list_locked(UncleanRegionList* list) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  _unclean_region_list.prepend_list(list);
+}
+
+HeapRegion* G1CollectedHeap::pop_unclean_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  HeapRegion* res = _unclean_region_list.pop();
+  if (res != NULL) {
+    // Inform ZF thread that there's a new unclean head.
+    if (_unclean_region_list.hd() != NULL && should_zf())
+      ZF_mon->notify_all();
+  }
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::peek_unclean_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  return _unclean_region_list.hd();
+}
+
+
+bool G1CollectedHeap::move_cleaned_region_to_free_list_locked() {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  HeapRegion* r = peek_unclean_region_list_locked();
+  if (r != NULL && r->zero_fill_state() == HeapRegion::ZeroFilled) {
+    // Result of below must be equal to "r", since we hold the lock.
+    (void)pop_unclean_region_list_locked();
+    put_free_region_on_list_locked(r);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool G1CollectedHeap::move_cleaned_region_to_free_list() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return move_cleaned_region_to_free_list_locked();
+}
+
+
+void G1CollectedHeap::put_free_region_on_list_locked(HeapRegion* r) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+  assert(r->zero_fill_state() == HeapRegion::ZeroFilled,
+        "Regions on free list must be zero filled");
+  assert(!r->isHumongous(), "Must not be humongous.");
+  assert(r->is_empty(), "Better be empty");
+  assert(!r->is_on_free_list(),
+         "Better not already be on free list");
+  assert(!r->is_on_unclean_list(),
+         "Better not already be on unclean list");
+  r->set_on_free_list(true);
+  r->set_next_on_free_list(_free_region_list);
+  _free_region_list = r;
+  _free_region_list_size++;
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+}
+
+void G1CollectedHeap::put_free_region_on_list(HeapRegion* r) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  put_free_region_on_list_locked(r);
+}
+
+HeapRegion* G1CollectedHeap::pop_free_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+  HeapRegion* res = _free_region_list;
+  if (res != NULL) {
+    _free_region_list = res->next_from_free_list();
+    _free_region_list_size--;
+    res->set_on_free_list(false);
+    res->set_next_on_free_list(NULL);
+    assert(_free_region_list_size == free_region_list_length(), "Inv");
+  }
+  return res;
+}
+
+
+HeapRegion* G1CollectedHeap::alloc_free_region_from_lists(bool zero_filled) {
+  // By self, or on behalf of self.
+  assert(Heap_lock->is_locked(), "Precondition");
+  HeapRegion* res = NULL;
+  bool first = true;
+  while (res == NULL) {
+    if (zero_filled || !first) {
+      MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+      res = pop_free_region_list_locked();
+      if (res != NULL) {
+        assert(!res->zero_fill_is_allocated(),
+               "No allocated regions on free list.");
+        res->set_zero_fill_allocated();
+      } else if (!first) {
+        break;  // We tried both, time to return NULL.
+      }
+    }
+
+    if (res == NULL) {
+      res = alloc_region_from_unclean_list(zero_filled);
+    }
+    assert(res == NULL ||
+           !zero_filled ||
+           res->zero_fill_is_allocated(),
+           "We must have allocated the region we're returning");
+    first = false;
+  }
+  return res;
+}
+
+void G1CollectedHeap::remove_allocated_regions_from_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  {
+    HeapRegion* prev = NULL;
+    HeapRegion* cur = _unclean_region_list.hd();
+    while (cur != NULL) {
+      HeapRegion* next = cur->next_from_unclean_list();
+      if (cur->zero_fill_is_allocated()) {
+        // Remove from the list.
+        if (prev == NULL) {
+          (void)_unclean_region_list.pop();
+        } else {
+          _unclean_region_list.delete_after(prev);
+        }
+        cur->set_on_unclean_list(false);
+        cur->set_next_on_unclean_list(NULL);
+      } else {
+        prev = cur;
+      }
+      cur = next;
+    }
+    assert(_unclean_region_list.sz() == unclean_region_list_length(),
+           "Inv");
+  }
+
+  {
+    HeapRegion* prev = NULL;
+    HeapRegion* cur = _free_region_list;
+    while (cur != NULL) {
+      HeapRegion* next = cur->next_from_free_list();
+      if (cur->zero_fill_is_allocated()) {
+        // Remove from the list.
+        if (prev == NULL) {
+          _free_region_list = cur->next_from_free_list();
+        } else {
+          prev->set_next_on_free_list(cur->next_from_free_list());
+        }
+        cur->set_on_free_list(false);
+        cur->set_next_on_free_list(NULL);
+        _free_region_list_size--;
+      } else {
+        prev = cur;
+      }
+      cur = next;
+    }
+    assert(_free_region_list_size == free_region_list_length(), "Inv");
+  }
+}
+
+bool G1CollectedHeap::verify_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return verify_region_lists_locked();
+}
+
+bool G1CollectedHeap::verify_region_lists_locked() {
+  HeapRegion* unclean = _unclean_region_list.hd();
+  while (unclean != NULL) {
+    guarantee(unclean->is_on_unclean_list(), "Well, it is!");
+    guarantee(!unclean->is_on_free_list(), "Well, it shouldn't be!");
+    guarantee(unclean->zero_fill_state() != HeapRegion::Allocated,
+              "Everything else is possible.");
+    unclean = unclean->next_from_unclean_list();
+  }
+  guarantee(_unclean_region_list.sz() == unclean_region_list_length(), "Inv");
+
+  HeapRegion* free_r = _free_region_list;
+  while (free_r != NULL) {
+    assert(free_r->is_on_free_list(), "Well, it is!");
+    assert(!free_r->is_on_unclean_list(), "Well, it shouldn't be!");
+    switch (free_r->zero_fill_state()) {
+    case HeapRegion::NotZeroFilled:
+    case HeapRegion::ZeroFilling:
+      guarantee(false, "Should not be on free list.");
+      break;
+    default:
+      // Everything else is possible.
+      break;
+    }
+    free_r = free_r->next_from_free_list();
+  }
+  guarantee(_free_region_list_size == free_region_list_length(), "Inv");
+  // If we didn't do an assertion...
+  return true;
+}
+
+size_t G1CollectedHeap::free_region_list_length() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  size_t len = 0;
+  HeapRegion* cur = _free_region_list;
+  while (cur != NULL) {
+    len++;
+    cur = cur->next_from_free_list();
+  }
+  return len;
+}
+
+size_t G1CollectedHeap::unclean_region_list_length() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  return _unclean_region_list.length();
+}
+
+size_t G1CollectedHeap::n_regions() {
+  return _hrs->length();
+}
+
+size_t G1CollectedHeap::max_regions() {
+  return
+    (size_t)align_size_up(g1_reserved_obj_bytes(), HeapRegion::GrainBytes) /
+    HeapRegion::GrainBytes;
+}
+
+size_t G1CollectedHeap::free_regions() {
+  /* Possibly-expensive assert.
+  assert(_free_regions == count_free_regions(),
+         "_free_regions is off.");
+  */
+  return _free_regions;
+}
+
+bool G1CollectedHeap::should_zf() {
+  return _free_region_list_size < (size_t) G1ConcZFMaxRegions;
+}
+
+class RegionCounter: public HeapRegionClosure {
+  size_t _n;
+public:
+  RegionCounter() : _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_empty() && !r->popular()) {
+      assert(!r->isHumongous(), "H regions should not be empty.");
+      _n++;
+    }
+    return false;
+  }
+  int res() { return (int) _n; }
+};
+
+size_t G1CollectedHeap::count_free_regions() {
+  RegionCounter rc;
+  heap_region_iterate(&rc);
+  size_t n = rc.res();
+  if (_cur_alloc_region != NULL && _cur_alloc_region->is_empty())
+    n--;
+  return n;
+}
+
+size_t G1CollectedHeap::count_free_regions_list() {
+  size_t n = 0;
+  size_t o = 0;
+  ZF_mon->lock_without_safepoint_check();
+  HeapRegion* cur = _free_region_list;
+  while (cur != NULL) {
+    cur = cur->next_from_free_list();
+    n++;
+  }
+  size_t m = unclean_region_list_length();
+  ZF_mon->unlock();
+  return n + m;
+}
+
+bool G1CollectedHeap::should_set_young_locked() {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  return  (g1_policy()->in_young_gc_mode() &&
+           g1_policy()->should_add_next_region_to_young_list());
+}
+
+void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  _young_list->push_region(hr);
+  g1_policy()->set_region_short_lived(hr);
+}
+
+class NoYoungRegionsClosure: public HeapRegionClosure {
+private:
+  bool _success;
+public:
+  NoYoungRegionsClosure() : _success(true) { }
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_young()) {
+      gclog_or_tty->print_cr("Region ["PTR_FORMAT", "PTR_FORMAT") tagged as young",
+                             r->bottom(), r->end());
+      _success = false;
+    }
+    return false;
+  }
+  bool success() { return _success; }
+};
+
+bool G1CollectedHeap::check_young_list_empty(bool ignore_scan_only_list,
+                                             bool check_sample) {
+  bool ret = true;
+
+  ret = _young_list->check_list_empty(ignore_scan_only_list, check_sample);
+  if (!ignore_scan_only_list) {
+    NoYoungRegionsClosure closure;
+    heap_region_iterate(&closure);
+    ret = ret && closure.success();
+  }
+
+  return ret;
+}
+
+void G1CollectedHeap::empty_young_list() {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  assert(g1_policy()->in_young_gc_mode(), "should be in young GC mode");
+
+  _young_list->empty_list();
+}
+
+bool G1CollectedHeap::all_alloc_regions_no_allocs_since_save_marks() {
+  bool no_allocs = true;
+  for (int ap = 0; ap < GCAllocPurposeCount && no_allocs; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    no_allocs = r == NULL || r->saved_mark_at_top();
+  }
+  return no_allocs;
+}
+
+void G1CollectedHeap::all_alloc_regions_note_end_of_copying() {
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r != NULL) {
+      // Check for aliases.
+      bool has_processed_alias = false;
+      for (int i = 0; i < ap; ++i) {
+        if (_gc_alloc_regions[i] == r) {
+          has_processed_alias = true;
+          break;
+        }
+      }
+      if (!has_processed_alias) {
+        r->note_end_of_copying();
+        g1_policy()->record_after_bytes(r->used());
+      }
+    }
+  }
+}
+
+
+// Done at the start of full GC.
+void G1CollectedHeap::tear_down_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  while (pop_unclean_region_list_locked() != NULL) ;
+  assert(_unclean_region_list.hd() == NULL && _unclean_region_list.sz() == 0,
+         "Postconditions of loop.")
+  while (pop_free_region_list_locked() != NULL) ;
+  assert(_free_region_list == NULL, "Postcondition of loop.");
+  if (_free_region_list_size != 0) {
+    gclog_or_tty->print_cr("Size is %d.", _free_region_list_size);
+    print();
+  }
+  assert(_free_region_list_size == 0, "Postconditions of loop.");
+}
+
+
+class RegionResetter: public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+public:
+  RegionResetter() : _g1(G1CollectedHeap::heap()), _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->top() > r->bottom()) {
+      if (r->top() < r->end()) {
+        Copy::fill_to_words(r->top(),
+                          pointer_delta(r->end(), r->top()));
+      }
+      r->set_zero_fill_allocated();
+    } else {
+      assert(r->is_empty(), "tautology");
+      if (r->popular()) {
+        if (r->zero_fill_state() != HeapRegion::Allocated) {
+          r->ensure_zero_filled_locked();
+          r->set_zero_fill_allocated();
+        }
+      } else {
+        _n++;
+        switch (r->zero_fill_state()) {
+        case HeapRegion::NotZeroFilled:
+        case HeapRegion::ZeroFilling:
+          _g1->put_region_on_unclean_list_locked(r);
+          break;
+        case HeapRegion::Allocated:
+          r->set_zero_fill_complete();
+          // no break; go on to put on free list.
+        case HeapRegion::ZeroFilled:
+          _g1->put_free_region_on_list_locked(r);
+          break;
+        }
+      }
+    }
+    return false;
+  }
+
+  int getFreeRegionCount() {return _n;}
+};
+
+// Done at the end of full GC.
+void G1CollectedHeap::rebuild_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  // This needs to go at the end of the full GC.
+  RegionResetter rs;
+  heap_region_iterate(&rs);
+  _free_regions = rs.getFreeRegionCount();
+  // Tell the ZF thread it may have work to do.
+  if (should_zf()) ZF_mon->notify_all();
+}
+
+class UsedRegionsNeedZeroFillSetter: public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+public:
+  UsedRegionsNeedZeroFillSetter() : _g1(G1CollectedHeap::heap()), _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->top() > r->bottom()) {
+      // There are assertions in "set_zero_fill_needed()" below that
+      // require top() == bottom(), so this is technically illegal.
+      // We'll skirt the law here, by making that true temporarily.
+      DEBUG_ONLY(HeapWord* save_top = r->top();
+                 r->set_top(r->bottom()));
+      r->set_zero_fill_needed();
+      DEBUG_ONLY(r->set_top(save_top));
+    }
+    return false;
+  }
+};
+
+// Done at the start of full GC.
+void G1CollectedHeap::set_used_regions_to_need_zero_fill() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  // This needs to go at the end of the full GC.
+  UsedRegionsNeedZeroFillSetter rs;
+  heap_region_iterate(&rs);
+}
+
+class CountObjClosure: public ObjectClosure {
+  size_t _n;
+public:
+  CountObjClosure() : _n(0) {}
+  void do_object(oop obj) { _n++; }
+  size_t n() { return _n; }
+};
+
+size_t G1CollectedHeap::pop_object_used_objs() {
+  size_t sum_objs = 0;
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    CountObjClosure cl;
+    _hrs->at(i)->object_iterate(&cl);
+    sum_objs += cl.n();
+  }
+  return sum_objs;
+}
+
+size_t G1CollectedHeap::pop_object_used_bytes() {
+  size_t sum_bytes = 0;
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    sum_bytes += _hrs->at(i)->used();
+  }
+  return sum_bytes;
+}
+
+
+static int nq = 0;
+
+HeapWord* G1CollectedHeap::allocate_popular_object(size_t word_size) {
+  while (_cur_pop_hr_index < G1NumPopularRegions) {
+    HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index);
+    HeapWord* res = cur_pop_region->allocate(word_size);
+    if (res != NULL) {
+      // We account for popular objs directly in the used summary:
+      _summary_bytes_used += (word_size * HeapWordSize);
+      return res;
+    }
+    // Otherwise, try the next region (first making sure that we remember
+    // the last "top" value as the "next_top_at_mark_start", so that
+    // objects made popular during markings aren't automatically considered
+    // live).
+    cur_pop_region->note_end_of_copying();
+    // Otherwise, try the next region.
+    _cur_pop_hr_index++;
+  }
+  // XXX: For now !!!
+  vm_exit_out_of_memory(word_size,
+                        "Not enough pop obj space (To Be Fixed)");
+  return NULL;
+}
+
+class HeapRegionList: public CHeapObj {
+  public:
+  HeapRegion* hr;
+  HeapRegionList* next;
+};
+
+void G1CollectedHeap::schedule_popular_region_evac(HeapRegion* r) {
+  // This might happen during parallel GC, so protect by this lock.
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  // We don't schedule regions whose evacuations are already pending, or
+  // are already being evacuated.
+  if (!r->popular_pending() && !r->in_collection_set()) {
+    r->set_popular_pending(true);
+    if (G1TracePopularity) {
+      gclog_or_tty->print_cr("Scheduling region "PTR_FORMAT" "
+                             "["PTR_FORMAT", "PTR_FORMAT") for pop-object evacuation.",
+                             r, r->bottom(), r->end());
+    }
+    HeapRegionList* hrl = new HeapRegionList;
+    hrl->hr = r;
+    hrl->next = _popular_regions_to_be_evacuated;
+    _popular_regions_to_be_evacuated = hrl;
+  }
+}
+
+HeapRegion* G1CollectedHeap::popular_region_to_evac() {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  HeapRegion* res = NULL;
+  while (_popular_regions_to_be_evacuated != NULL && res == NULL) {
+    HeapRegionList* hrl = _popular_regions_to_be_evacuated;
+    _popular_regions_to_be_evacuated = hrl->next;
+    res = hrl->hr;
+    // The G1RSPopLimit may have increased, so recheck here...
+    if (res->rem_set()->occupied() < (size_t) G1RSPopLimit) {
+      // Hah: don't need to schedule.
+      if (G1TracePopularity) {
+        gclog_or_tty->print_cr("Unscheduling region "PTR_FORMAT" "
+                               "["PTR_FORMAT", "PTR_FORMAT") "
+                               "for pop-object evacuation (size %d < limit %d)",
+                               res, res->bottom(), res->end(),
+                               res->rem_set()->occupied(), G1RSPopLimit);
+      }
+      res->set_popular_pending(false);
+      res = NULL;
+    }
+    // We do not reset res->popular() here; if we did so, it would allow
+    // the region to be "rescheduled" for popularity evacuation.  Instead,
+    // this is done in the collection pause, with the world stopped.
+    // So the invariant is that the regions in the list have the popularity
+    // boolean set, but having the boolean set does not imply membership
+    // on the list (though there can at most one such pop-pending region
+    // not on the list at any time).
+    delete hrl;
+  }
+  return res;
+}
+
+void G1CollectedHeap::evac_popular_region(HeapRegion* hr) {
+  while (true) {
+    // Don't want to do a GC pause while cleanup is being completed!
+    wait_for_cleanup_complete();
+
+    // Read the GC count while holding the Heap_lock
+    int gc_count_before = SharedHeap::heap()->total_collections();
+    g1_policy()->record_stop_world_start();
+
+    {
+      MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+      VM_G1PopRegionCollectionPause op(gc_count_before, hr);
+      VMThread::execute(&op);
+
+      // If the prolog succeeded, we didn't do a GC for this.
+      if (op.prologue_succeeded()) break;
+    }
+    // Otherwise we didn't.  We should recheck the size, though, since
+    // the limit may have increased...
+    if (hr->rem_set()->occupied() < (size_t) G1RSPopLimit) {
+      hr->set_popular_pending(false);
+      break;
+    }
+  }
+}
+
+void G1CollectedHeap::atomic_inc_obj_rc(oop obj) {
+  Atomic::inc(obj_rc_addr(obj));
+}
+
+class CountRCClosure: public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  bool _parallel;
+public:
+  CountRCClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _parallel(ParallelGCThreads > 0)
+  {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    assert(obj != NULL, "Precondition.");
+    if (_parallel) {
+      // We go sticky at the limit to avoid excess contention.
+      // If we want to track the actual RC's further, we'll need to keep a
+      // per-thread hash table or something for the popular objects.
+      if (_g1h->obj_rc(obj) < G1ObjPopLimit) {
+        _g1h->atomic_inc_obj_rc(obj);
+      }
+    } else {
+      _g1h->inc_obj_rc(obj);
+    }
+  }
+};
+
+class EvacPopObjClosure: public ObjectClosure {
+  G1CollectedHeap* _g1h;
+  size_t _pop_objs;
+  size_t _max_rc;
+public:
+  EvacPopObjClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _pop_objs(0), _max_rc(0) {}
+
+  void do_object(oop obj) {
+    size_t rc = _g1h->obj_rc(obj);
+    _max_rc = MAX2(rc, _max_rc);
+    if (rc >= (size_t) G1ObjPopLimit) {
+      _g1h->_pop_obj_rc_at_copy.add((double)rc);
+      size_t word_sz = obj->size();
+      HeapWord* new_pop_loc = _g1h->allocate_popular_object(word_sz);
+      oop new_pop_obj = (oop)new_pop_loc;
+      Copy::aligned_disjoint_words((HeapWord*)obj, new_pop_loc, word_sz);
+      obj->forward_to(new_pop_obj);
+      G1ScanAndBalanceClosure scan_and_balance(_g1h);
+      new_pop_obj->oop_iterate_backwards(&scan_and_balance);
+      // preserve "next" mark bit if marking is in progress.
+      if (_g1h->mark_in_progress() && !_g1h->is_obj_ill(obj)) {
+        _g1h->concurrent_mark()->markAndGrayObjectIfNecessary(new_pop_obj);
+      }
+
+      if (G1TracePopularity) {
+        gclog_or_tty->print_cr("Found obj " PTR_FORMAT " of word size " SIZE_FORMAT
+                               " pop (%d), move to " PTR_FORMAT,
+                               (void*) obj, word_sz,
+                               _g1h->obj_rc(obj), (void*) new_pop_obj);
+      }
+      _pop_objs++;
+    }
+  }
+  size_t pop_objs() { return _pop_objs; }
+  size_t max_rc() { return _max_rc; }
+};
+
+class G1ParCountRCTask : public AbstractGangTask {
+  G1CollectedHeap* _g1h;
+  BitMap _bm;
+
+  size_t getNCards() {
+    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
+      / G1BlockOffsetSharedArray::N_bytes;
+  }
+  CountRCClosure _count_rc_closure;
+public:
+  G1ParCountRCTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Par RC Count task"),
+    _g1h(g1h), _bm(getNCards()), _count_rc_closure(g1h)
+  {}
+
+  void work(int i) {
+    ResourceMark rm;
+    HandleMark   hm;
+    _g1h->g1_rem_set()->oops_into_collection_set_do(&_count_rc_closure, i);
+  }
+};
+
+void G1CollectedHeap::popularity_pause_preamble(HeapRegion* popular_region) {
+  // We're evacuating a single region (for popularity).
+  if (G1TracePopularity) {
+    gclog_or_tty->print_cr("Doing pop region pause for ["PTR_FORMAT", "PTR_FORMAT")",
+                           popular_region->bottom(), popular_region->end());
+  }
+  g1_policy()->set_single_region_collection_set(popular_region);
+  size_t max_rc;
+  if (!compute_reference_counts_and_evac_popular(popular_region,
+                                                 &max_rc)) {
+    // We didn't evacuate any popular objects.
+    // We increase the RS popularity limit, to prevent this from
+    // happening in the future.
+    if (G1RSPopLimit < (1 << 30)) {
+      G1RSPopLimit *= 2;
+    }
+    // For now, interesting enough for a message:
+#if 1
+    gclog_or_tty->print_cr("In pop region pause for ["PTR_FORMAT", "PTR_FORMAT"), "
+                           "failed to find a pop object (max = %d).",
+                           popular_region->bottom(), popular_region->end(),
+                           max_rc);
+    gclog_or_tty->print_cr("Increased G1RSPopLimit to %d.", G1RSPopLimit);
+#endif // 0
+    // Also, we reset the collection set to NULL, to make the rest of
+    // the collection do nothing.
+    assert(popular_region->next_in_collection_set() == NULL,
+           "should be single-region.");
+    popular_region->set_in_collection_set(false);
+    popular_region->set_popular_pending(false);
+    g1_policy()->clear_collection_set();
+  }
+}
+
+bool G1CollectedHeap::
+compute_reference_counts_and_evac_popular(HeapRegion* popular_region,
+                                          size_t* max_rc) {
+  HeapWord* rc_region_bot;
+  HeapWord* rc_region_end;
+
+  // Set up the reference count region.
+  HeapRegion* rc_region = newAllocRegion(HeapRegion::GrainWords);
+  if (rc_region != NULL) {
+    rc_region_bot = rc_region->bottom();
+    rc_region_end = rc_region->end();
+  } else {
+    rc_region_bot = NEW_C_HEAP_ARRAY(HeapWord, HeapRegion::GrainWords);
+    if (rc_region_bot == NULL) {
+      vm_exit_out_of_memory(HeapRegion::GrainWords,
+                            "No space for RC region.");
+    }
+    rc_region_end = rc_region_bot + HeapRegion::GrainWords;
+  }
+
+  if (G1TracePopularity)
+    gclog_or_tty->print_cr("RC region is ["PTR_FORMAT", "PTR_FORMAT")",
+                           rc_region_bot, rc_region_end);
+  if (rc_region_bot > popular_region->bottom()) {
+    _rc_region_above = true;
+    _rc_region_diff =
+      pointer_delta(rc_region_bot, popular_region->bottom(), 1);
+  } else {
+    assert(rc_region_bot < popular_region->bottom(), "Can't be equal.");
+    _rc_region_above = false;
+    _rc_region_diff =
+      pointer_delta(popular_region->bottom(), rc_region_bot, 1);
+  }
+  g1_policy()->record_pop_compute_rc_start();
+  // Count external references.
+  g1_rem_set()->prepare_for_oops_into_collection_set_do();
+  if (ParallelGCThreads > 0) {
+
+    set_par_threads(workers()->total_workers());
+    G1ParCountRCTask par_count_rc_task(this);
+    workers()->run_task(&par_count_rc_task);
+    set_par_threads(0);
+
+  } else {
+    CountRCClosure count_rc_closure(this);
+    g1_rem_set()->oops_into_collection_set_do(&count_rc_closure, 0);
+  }
+  g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+  g1_policy()->record_pop_compute_rc_end();
+
+  // Now evacuate popular objects.
+  g1_policy()->record_pop_evac_start();
+  EvacPopObjClosure evac_pop_obj_cl(this);
+  popular_region->object_iterate(&evac_pop_obj_cl);
+  *max_rc = evac_pop_obj_cl.max_rc();
+
+  // Make sure the last "top" value of the current popular region is copied
+  // as the "next_top_at_mark_start", so that objects made popular during
+  // markings aren't automatically considered live.
+  HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index);
+  cur_pop_region->note_end_of_copying();
+
+  if (rc_region != NULL) {
+    free_region(rc_region);
+  } else {
+    FREE_C_HEAP_ARRAY(HeapWord, rc_region_bot);
+  }
+  g1_policy()->record_pop_evac_end();
+
+  return evac_pop_obj_cl.pop_objs() > 0;
+}
+
+class CountPopObjInfoClosure: public HeapRegionClosure {
+  size_t _objs;
+  size_t _bytes;
+
+  class CountObjClosure: public ObjectClosure {
+    int _n;
+  public:
+    CountObjClosure() : _n(0) {}
+    void do_object(oop obj) { _n++; }
+    size_t n() { return _n; }
+  };
+
+public:
+  CountPopObjInfoClosure() : _objs(0), _bytes(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _bytes += r->used();
+    CountObjClosure blk;
+    r->object_iterate(&blk);
+    _objs += blk.n();
+    return false;
+  }
+  size_t objs() { return _objs; }
+  size_t bytes() { return _bytes; }
+};
+
+
+void G1CollectedHeap::print_popularity_summary_info() const {
+  CountPopObjInfoClosure blk;
+  for (int i = 0; i <= _cur_pop_hr_index; i++) {
+    blk.doHeapRegion(_hrs->at(i));
+  }
+  gclog_or_tty->print_cr("\nPopular objects: %d objs, %d bytes.",
+                         blk.objs(), blk.bytes());
+  gclog_or_tty->print_cr("   RC at copy = [avg = %5.2f, max = %5.2f, sd = %5.2f].",
+                _pop_obj_rc_at_copy.avg(),
+                _pop_obj_rc_at_copy.maximum(),
+                _pop_obj_rc_at_copy.sd());
+}
+
+void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) {
+  _refine_cte_cl->set_concurrent(concurrent);
+}
+
+#ifndef PRODUCT
+
+class PrintHeapRegionClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion *r) {
+    gclog_or_tty->print("Region: "PTR_FORMAT":", r);
+    if (r != NULL) {
+      if (r->is_on_free_list())
+        gclog_or_tty->print("Free ");
+      if (r->is_young())
+        gclog_or_tty->print("Young ");
+      if (r->isHumongous())
+        gclog_or_tty->print("Is Humongous ");
+      r->print();
+    }
+    return false;
+  }
+};
+
+class SortHeapRegionClosure : public HeapRegionClosure {
+  size_t young_regions,free_regions, unclean_regions;
+  size_t hum_regions, count;
+  size_t unaccounted, cur_unclean, cur_alloc;
+  size_t total_free;
+  HeapRegion* cur;
+public:
+  SortHeapRegionClosure(HeapRegion *_cur) : cur(_cur), young_regions(0),
+    free_regions(0), unclean_regions(0),
+    hum_regions(0),
+    count(0), unaccounted(0),
+    cur_alloc(0), total_free(0)
+  {}
+  bool doHeapRegion(HeapRegion *r) {
+    count++;
+    if (r->is_on_free_list()) free_regions++;
+    else if (r->is_on_unclean_list()) unclean_regions++;
+    else if (r->isHumongous())  hum_regions++;
+    else if (r->is_young()) young_regions++;
+    else if (r == cur) cur_alloc++;
+    else unaccounted++;
+    return false;
+  }
+  void print() {
+    total_free = free_regions + unclean_regions;
+    gclog_or_tty->print("%d regions\n", count);
+    gclog_or_tty->print("%d free: free_list = %d unclean = %d\n",
+                        total_free, free_regions, unclean_regions);
+    gclog_or_tty->print("%d humongous %d young\n",
+                        hum_regions, young_regions);
+    gclog_or_tty->print("%d cur_alloc\n", cur_alloc);
+    gclog_or_tty->print("UHOH unaccounted = %d\n", unaccounted);
+  }
+};
+
+void G1CollectedHeap::print_region_counts() {
+  SortHeapRegionClosure sc(_cur_alloc_region);
+  PrintHeapRegionClosure cl;
+  heap_region_iterate(&cl);
+  heap_region_iterate(&sc);
+  sc.print();
+  print_region_accounting_info();
+};
+
+bool G1CollectedHeap::regions_accounted_for() {
+  // TODO: regions accounting for young/survivor/tenured
+  return true;
+}
+
+bool G1CollectedHeap::print_region_accounting_info() {
+  gclog_or_tty->print_cr("P regions: %d.", G1NumPopularRegions);
+  gclog_or_tty->print_cr("Free regions: %d (count: %d count list %d) (clean: %d unclean: %d).",
+                         free_regions(),
+                         count_free_regions(), count_free_regions_list(),
+                         _free_region_list_size, _unclean_region_list.sz());
+  gclog_or_tty->print_cr("cur_alloc: %d.",
+                         (_cur_alloc_region == NULL ? 0 : 1));
+  gclog_or_tty->print_cr("H regions: %d.", _num_humongous_regions);
+
+  // TODO: check regions accounting for young/survivor/tenured
+  return true;
+}
+
+bool G1CollectedHeap::is_in_closed_subset(const void* p) const {
+  HeapRegion* hr = heap_region_containing(p);
+  if (hr == NULL) {
+    return is_in_permanent(p);
+  } else {
+    return hr->is_in(p);
+  }
+}
+#endif // PRODUCT
+
+void G1CollectedHeap::g1_unimplemented() {
+  // Unimplemented();
+}
+
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,1203 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A "G1CollectedHeap" is an implementation of a java heap for HotSpot.
+// It uses the "Garbage First" heap organization and algorithm, which
+// may combine concurrent marking with parallel, incremental compaction of
+// heap subsets that will yield large amounts of garbage.
+
+class HeapRegion;
+class HeapRegionSeq;
+class HeapRegionList;
+class PermanentGenerationSpec;
+class GenerationSpec;
+class OopsInHeapRegionClosure;
+class G1ScanHeapEvacClosure;
+class ObjectClosure;
+class SpaceClosure;
+class CompactibleSpaceClosure;
+class Space;
+class G1CollectorPolicy;
+class GenRemSet;
+class G1RemSet;
+class HeapRegionRemSetIterator;
+class ConcurrentMark;
+class ConcurrentMarkThread;
+class ConcurrentG1Refine;
+class ConcurrentZFThread;
+
+// If want to accumulate detailed statistics on work queues
+// turn this on.
+#define G1_DETAILED_STATS 0
+
+#if G1_DETAILED_STATS
+#  define IF_G1_DETAILED_STATS(code) code
+#else
+#  define IF_G1_DETAILED_STATS(code)
+#endif
+
+typedef GenericTaskQueue<oop*>    RefToScanQueue;
+typedef GenericTaskQueueSet<oop*> RefToScanQueueSet;
+
+enum G1GCThreadGroups {
+  G1CRGroup = 0,
+  G1ZFGroup = 1,
+  G1CMGroup = 2,
+  G1CLGroup = 3
+};
+
+enum GCAllocPurpose {
+  GCAllocForTenured,
+  GCAllocForSurvived,
+  GCAllocPurposeCount
+};
+
+class YoungList : public CHeapObj {
+private:
+  G1CollectedHeap* _g1h;
+
+  HeapRegion* _head;
+
+  HeapRegion* _scan_only_head;
+  HeapRegion* _scan_only_tail;
+  size_t      _length;
+  size_t      _scan_only_length;
+
+  size_t      _last_sampled_rs_lengths;
+  size_t      _sampled_rs_lengths;
+  HeapRegion* _curr;
+  HeapRegion* _curr_scan_only;
+
+  HeapRegion* _survivor_head;
+  HeapRegion* _survivors_tail;
+  size_t      _survivor_length;
+
+  void          empty_list(HeapRegion* list);
+
+public:
+  YoungList(G1CollectedHeap* g1h);
+
+  void          push_region(HeapRegion* hr);
+  void          add_survivor_region(HeapRegion* hr);
+  HeapRegion*   pop_region();
+  void          empty_list();
+  bool          is_empty() { return _length == 0; }
+  size_t        length() { return _length; }
+  size_t        scan_only_length() { return _scan_only_length; }
+
+  void rs_length_sampling_init();
+  bool rs_length_sampling_more();
+  void rs_length_sampling_next();
+
+  void reset_sampled_info() {
+    _last_sampled_rs_lengths =   0;
+  }
+  size_t sampled_rs_lengths() { return _last_sampled_rs_lengths; }
+
+  // for development purposes
+  void reset_auxilary_lists();
+  HeapRegion* first_region() { return _head; }
+  HeapRegion* first_scan_only_region() { return _scan_only_head; }
+  HeapRegion* first_survivor_region() { return _survivor_head; }
+  HeapRegion* par_get_next_scan_only_region() {
+    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+    HeapRegion* ret = _curr_scan_only;
+    if (ret != NULL)
+      _curr_scan_only = ret->get_next_young_region();
+    return ret;
+  }
+
+  // debugging
+  bool          check_list_well_formed();
+  bool          check_list_empty(bool ignore_scan_only_list,
+                                 bool check_sample = true);
+  void          print();
+};
+
+class RefineCardTableEntryClosure;
+class G1CollectedHeap : public SharedHeap {
+  friend class VM_G1CollectForAllocation;
+  friend class VM_GenCollectForPermanentAllocation;
+  friend class VM_G1CollectFull;
+  friend class VM_G1IncCollectionPause;
+  friend class VM_G1PopRegionCollectionPause;
+  friend class VMStructs;
+
+  // Closures used in implementation.
+  friend class G1ParCopyHelper;
+  friend class G1IsAliveClosure;
+  friend class G1EvacuateFollowersClosure;
+  friend class G1ParScanThreadState;
+  friend class G1ParScanClosureSuper;
+  friend class G1ParEvacuateFollowersClosure;
+  friend class G1ParTask;
+  friend class G1FreeGarbageRegionClosure;
+  friend class RefineCardTableEntryClosure;
+  friend class G1PrepareCompactClosure;
+  friend class RegionSorter;
+  friend class CountRCClosure;
+  friend class EvacPopObjClosure;
+
+  // Other related classes.
+  friend class G1MarkSweep;
+
+private:
+  enum SomePrivateConstants {
+    VeryLargeInBytes = HeapRegion::GrainBytes/2,
+    VeryLargeInWords = VeryLargeInBytes/HeapWordSize,
+    MinHeapDeltaBytes = 10 * HeapRegion::GrainBytes,      // FIXME
+    NumAPIs = HeapRegion::MaxAge
+  };
+
+
+  // The one and only G1CollectedHeap, so static functions can find it.
+  static G1CollectedHeap* _g1h;
+
+  // Storage for the G1 heap (excludes the permanent generation).
+  VirtualSpace _g1_storage;
+  MemRegion    _g1_reserved;
+
+  // The part of _g1_storage that is currently committed.
+  MemRegion _g1_committed;
+
+  // The maximum part of _g1_storage that has ever been committed.
+  MemRegion _g1_max_committed;
+
+  // The number of regions that are completely free.
+  size_t _free_regions;
+
+  // The number of regions we could create by expansion.
+  size_t _expansion_regions;
+
+  // Return the number of free regions in the heap (by direct counting.)
+  size_t count_free_regions();
+  // Return the number of free regions on the free and unclean lists.
+  size_t count_free_regions_list();
+
+  // The block offset table for the G1 heap.
+  G1BlockOffsetSharedArray* _bot_shared;
+
+  // Move all of the regions off the free lists, then rebuild those free
+  // lists, before and after full GC.
+  void tear_down_region_lists();
+  void rebuild_region_lists();
+  // This sets all non-empty regions to need zero-fill (which they will if
+  // they are empty after full collection.)
+  void set_used_regions_to_need_zero_fill();
+
+  // The sequence of all heap regions in the heap.
+  HeapRegionSeq* _hrs;
+
+  // The region from which normal-sized objects are currently being
+  // allocated.  May be NULL.
+  HeapRegion* _cur_alloc_region;
+
+  // Postcondition: cur_alloc_region == NULL.
+  void abandon_cur_alloc_region();
+
+  // The to-space memory regions into which objects are being copied during
+  // a GC.
+  HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
+  uint _gc_alloc_region_counts[GCAllocPurposeCount];
+
+  // A list of the regions that have been set to be alloc regions in the
+  // current collection.
+  HeapRegion* _gc_alloc_region_list;
+
+  // When called by par thread, require par_alloc_during_gc_lock() to be held.
+  void push_gc_alloc_region(HeapRegion* hr);
+
+  // This should only be called single-threaded.  Undeclares all GC alloc
+  // regions.
+  void forget_alloc_region_list();
+
+  // Should be used to set an alloc region, because there's other
+  // associated bookkeeping.
+  void set_gc_alloc_region(int purpose, HeapRegion* r);
+
+  // Check well-formedness of alloc region list.
+  bool check_gc_alloc_regions();
+
+  // Outside of GC pauses, the number of bytes used in all regions other
+  // than the current allocation region.
+  size_t _summary_bytes_used;
+
+  // Summary information about popular objects; method to print it.
+  NumberSeq _pop_obj_rc_at_copy;
+  void print_popularity_summary_info() const;
+
+  volatile unsigned _gc_time_stamp;
+
+  size_t* _surviving_young_words;
+
+  void setup_surviving_young_words();
+  void update_surviving_young_words(size_t* surv_young_words);
+  void cleanup_surviving_young_words();
+
+protected:
+
+  // Returns "true" iff none of the gc alloc regions have any allocations
+  // since the last call to "save_marks".
+  bool all_alloc_regions_no_allocs_since_save_marks();
+  // Calls "note_end_of_copying on all gc alloc_regions.
+  void all_alloc_regions_note_end_of_copying();
+
+  // The number of regions allocated to hold humongous objects.
+  int         _num_humongous_regions;
+  YoungList*  _young_list;
+
+  // The current policy object for the collector.
+  G1CollectorPolicy* _g1_policy;
+
+  // Parallel allocation lock to protect the current allocation region.
+  Mutex  _par_alloc_during_gc_lock;
+  Mutex* par_alloc_during_gc_lock() { return &_par_alloc_during_gc_lock; }
+
+  // If possible/desirable, allocate a new HeapRegion for normal object
+  // allocation sufficient for an allocation of the given "word_size".
+  // If "do_expand" is true, will attempt to expand the heap if necessary
+  // to to satisfy the request.  If "zero_filled" is true, requires a
+  // zero-filled region.
+  // (Returning NULL will trigger a GC.)
+  virtual HeapRegion* newAllocRegion_work(size_t word_size,
+                                          bool do_expand,
+                                          bool zero_filled);
+
+  virtual HeapRegion* newAllocRegion(size_t word_size,
+                                     bool zero_filled = true) {
+    return newAllocRegion_work(word_size, false, zero_filled);
+  }
+  virtual HeapRegion* newAllocRegionWithExpansion(int purpose,
+                                                  size_t word_size,
+                                                  bool zero_filled = true);
+
+  // Attempt to allocate an object of the given (very large) "word_size".
+  // Returns "NULL" on failure.
+  virtual HeapWord* humongousObjAllocate(size_t word_size);
+
+  // If possible, allocate a block of the given word_size, else return "NULL".
+  // Returning NULL will trigger GC or heap expansion.
+  // These two methods have rather awkward pre- and
+  // post-conditions. If they are called outside a safepoint, then
+  // they assume that the caller is holding the heap lock. Upon return
+  // they release the heap lock, if they are returning a non-NULL
+  // value. attempt_allocation_slow() also dirties the cards of a
+  // newly-allocated young region after it releases the heap
+  // lock. This change in interface was the neatest way to achieve
+  // this card dirtying without affecting mem_allocate(), which is a
+  // more frequently called method. We tried two or three different
+  // approaches, but they were even more hacky.
+  HeapWord* attempt_allocation(size_t word_size,
+                               bool permit_collection_pause = true);
+
+  HeapWord* attempt_allocation_slow(size_t word_size,
+                                    bool permit_collection_pause = true);
+
+  // Allocate blocks during garbage collection. Will ensure an
+  // allocation region, either by picking one or expanding the
+  // heap, and then allocate a block of the given size. The block
+  // may not be a humongous - it must fit into a single heap region.
+  HeapWord* allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
+  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
+
+  HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
+                                    HeapRegion*    alloc_region,
+                                    bool           par,
+                                    size_t         word_size);
+
+  // Ensure that no further allocations can happen in "r", bearing in mind
+  // that parallel threads might be attempting allocations.
+  void par_allocate_remaining_space(HeapRegion* r);
+
+  // Helper function for two callbacks below.
+  // "full", if true, indicates that the GC is for a System.gc() request,
+  // and should collect the entire heap.  If "clear_all_soft_refs" is true,
+  // all soft references are cleared during the GC.  If "full" is false,
+  // "word_size" describes the allocation that the GC should
+  // attempt (at least) to satisfy.
+  void do_collection(bool full, bool clear_all_soft_refs,
+                     size_t word_size);
+
+  // Callback from VM_G1CollectFull operation.
+  // Perform a full collection.
+  void do_full_collection(bool clear_all_soft_refs);
+
+  // Resize the heap if necessary after a full collection.  If this is
+  // after a collect-for allocation, "word_size" is the allocation size,
+  // and will be considered part of the used portion of the heap.
+  void resize_if_necessary_after_full_collection(size_t word_size);
+
+  // Callback from VM_G1CollectForAllocation operation.
+  // This function does everything necessary/possible to satisfy a
+  // failed allocation request (including collection, expansion, etc.)
+  HeapWord* satisfy_failed_allocation(size_t word_size);
+
+  // Attempting to expand the heap sufficiently
+  // to support an allocation of the given "word_size".  If
+  // successful, perform the allocation and return the address of the
+  // allocated block, or else "NULL".
+  virtual HeapWord* expand_and_allocate(size_t word_size);
+
+public:
+  // Expand the garbage-first heap by at least the given size (in bytes!).
+  // (Rounds up to a HeapRegion boundary.)
+  virtual void expand(size_t expand_bytes);
+
+  // Do anything common to GC's.
+  virtual void gc_prologue(bool full);
+  virtual void gc_epilogue(bool full);
+
+protected:
+
+  // Shrink the garbage-first heap by at most the given size (in bytes!).
+  // (Rounds down to a HeapRegion boundary.)
+  virtual void shrink(size_t expand_bytes);
+  void shrink_helper(size_t expand_bytes);
+
+  // Do an incremental collection: identify a collection set, and evacuate
+  // its live objects elsewhere.
+  virtual void do_collection_pause();
+
+  // The guts of the incremental collection pause, executed by the vm
+  // thread.  If "popular_region" is non-NULL, this pause should evacuate
+  // this single region whose remembered set has gotten large, moving
+  // any popular objects to one of the popular regions.
+  virtual void do_collection_pause_at_safepoint(HeapRegion* popular_region);
+
+  // Actually do the work of evacuating the collection set.
+  virtual void evacuate_collection_set();
+
+  // If this is an appropriate right time, do a collection pause.
+  // The "word_size" argument, if non-zero, indicates the size of an
+  // allocation request that is prompting this query.
+  void do_collection_pause_if_appropriate(size_t word_size);
+
+  // The g1 remembered set of the heap.
+  G1RemSet* _g1_rem_set;
+  // And it's mod ref barrier set, used to track updates for the above.
+  ModRefBarrierSet* _mr_bs;
+
+  // The Heap Region Rem Set Iterator.
+  HeapRegionRemSetIterator** _rem_set_iterator;
+
+  // The closure used to refine a single card.
+  RefineCardTableEntryClosure* _refine_cte_cl;
+
+  // A function to check the consistency of dirty card logs.
+  void check_ct_logs_at_safepoint();
+
+  // After a collection pause, make the regions in the CS into free
+  // regions.
+  void free_collection_set(HeapRegion* cs_head);
+
+  // Applies "scan_non_heap_roots" to roots outside the heap,
+  // "scan_rs" to roots inside the heap (having done "set_region" to
+  // indicate the region in which the root resides), and does "scan_perm"
+  // (setting the generation to the perm generation.)  If "scan_rs" is
+  // NULL, then this step is skipped.  The "worker_i"
+  // param is for use with parallel roots processing, and should be
+  // the "i" of the calling parallel worker thread's work(i) function.
+  // In the sequential case this param will be ignored.
+  void g1_process_strong_roots(bool collecting_perm_gen,
+                               SharedHeap::ScanningOption so,
+                               OopClosure* scan_non_heap_roots,
+                               OopsInHeapRegionClosure* scan_rs,
+                               OopsInHeapRegionClosure* scan_so,
+                               OopsInGenClosure* scan_perm,
+                               int worker_i);
+
+  void scan_scan_only_set(OopsInHeapRegionClosure* oc,
+                          int worker_i);
+  void scan_scan_only_region(HeapRegion* hr,
+                             OopsInHeapRegionClosure* oc,
+                             int worker_i);
+
+  // Apply "blk" to all the weak roots of the system.  These include
+  // JNI weak roots, the code cache, system dictionary, symbol table,
+  // string table, and referents of reachable weak refs.
+  void g1_process_weak_roots(OopClosure* root_closure,
+                             OopClosure* non_root_closure);
+
+  // Invoke "save_marks" on all heap regions.
+  void save_marks();
+
+  // Free a heap region.
+  void free_region(HeapRegion* hr);
+  // A component of "free_region", exposed for 'batching'.
+  // All the params after "hr" are out params: the used bytes of the freed
+  // region(s), the number of H regions cleared, the number of regions
+  // freed, and pointers to the head and tail of a list of freed contig
+  // regions, linked throught the "next_on_unclean_list" field.
+  void free_region_work(HeapRegion* hr,
+                        size_t& pre_used,
+                        size_t& cleared_h,
+                        size_t& freed_regions,
+                        UncleanRegionList* list,
+                        bool par = false);
+
+
+  // The concurrent marker (and the thread it runs in.)
+  ConcurrentMark* _cm;
+  ConcurrentMarkThread* _cmThread;
+  bool _mark_in_progress;
+
+  // The concurrent refiner.
+  ConcurrentG1Refine* _cg1r;
+
+  // The concurrent zero-fill thread.
+  ConcurrentZFThread* _czft;
+
+  // The parallel task queues
+  RefToScanQueueSet *_task_queues;
+
+  // True iff a evacuation has failed in the current collection.
+  bool _evacuation_failed;
+
+  // Set the attribute indicating whether evacuation has failed in the
+  // current collection.
+  void set_evacuation_failed(bool b) { _evacuation_failed = b; }
+
+  // Failed evacuations cause some logical from-space objects to have
+  // forwarding pointers to themselves.  Reset them.
+  void remove_self_forwarding_pointers();
+
+  // When one is non-null, so is the other.  Together, they each pair is
+  // an object with a preserved mark, and its mark value.
+  GrowableArray<oop>*     _objs_with_preserved_marks;
+  GrowableArray<markOop>* _preserved_marks_of_objs;
+
+  // Preserve the mark of "obj", if necessary, in preparation for its mark
+  // word being overwritten with a self-forwarding-pointer.
+  void preserve_mark_if_necessary(oop obj, markOop m);
+
+  // The stack of evac-failure objects left to be scanned.
+  GrowableArray<oop>*    _evac_failure_scan_stack;
+  // The closure to apply to evac-failure objects.
+
+  OopsInHeapRegionClosure* _evac_failure_closure;
+  // Set the field above.
+  void
+  set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_closure) {
+    _evac_failure_closure = evac_failure_closure;
+  }
+
+  // Push "obj" on the scan stack.
+  void push_on_evac_failure_scan_stack(oop obj);
+  // Process scan stack entries until the stack is empty.
+  void drain_evac_failure_scan_stack();
+  // True iff an invocation of "drain_scan_stack" is in progress; to
+  // prevent unnecessary recursion.
+  bool _drain_in_progress;
+
+  // Do any necessary initialization for evacuation-failure handling.
+  // "cl" is the closure that will be used to process evac-failure
+  // objects.
+  void init_for_evac_failure(OopsInHeapRegionClosure* cl);
+  // Do any necessary cleanup for evacuation-failure handling data
+  // structures.
+  void finalize_for_evac_failure();
+
+  // An attempt to evacuate "obj" has failed; take necessary steps.
+  void handle_evacuation_failure(oop obj);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
+  void handle_evacuation_failure_common(oop obj, markOop m);
+
+
+  // Ensure that the relevant gc_alloc regions are set.
+  void get_gc_alloc_regions();
+  // We're done with GC alloc regions; release them, as appropriate.
+  void release_gc_alloc_regions();
+
+  // ("Weak") Reference processing support
+  ReferenceProcessor* _ref_processor;
+
+  enum G1H_process_strong_roots_tasks {
+    G1H_PS_mark_stack_oops_do,
+    G1H_PS_refProcessor_oops_do,
+    // Leave this one last.
+    G1H_PS_NumElements
+  };
+
+  SubTasksDone* _process_strong_tasks;
+
+  // Allocate space to hold a popular object.  Result is guaranteed below
+  // "popular_object_boundary()".  Note: CURRENTLY halts the system if we
+  // run out of space to hold popular objects.
+  HeapWord* allocate_popular_object(size_t word_size);
+
+  // The boundary between popular and non-popular objects.
+  HeapWord* _popular_object_boundary;
+
+  HeapRegionList* _popular_regions_to_be_evacuated;
+
+  // Compute which objects in "single_region" are popular.  If any are,
+  // evacuate them to a popular region, leaving behind forwarding pointers,
+  // and select "popular_region" as the single collection set region.
+  // Otherwise, leave the collection set null.
+  void popularity_pause_preamble(HeapRegion* populer_region);
+
+  // Compute which objects in "single_region" are popular, and evacuate
+  // them to a popular region, leaving behind forwarding pointers.
+  // Returns "true" if at least one popular object is discovered and
+  // evacuated.  In any case, "*max_rc" is set to the maximum reference
+  // count of an object in the region.
+  bool compute_reference_counts_and_evac_popular(HeapRegion* populer_region,
+                                                 size_t* max_rc);
+  // Subroutines used in the above.
+  bool _rc_region_above;
+  size_t _rc_region_diff;
+  jint* obj_rc_addr(oop obj) {
+    uintptr_t obj_addr = (uintptr_t)obj;
+    if (_rc_region_above) {
+      jint* res = (jint*)(obj_addr + _rc_region_diff);
+      assert((uintptr_t)res > obj_addr, "RC region is above.");
+      return res;
+    } else {
+      jint* res = (jint*)(obj_addr - _rc_region_diff);
+      assert((uintptr_t)res < obj_addr, "RC region is below.");
+      return res;
+    }
+  }
+  jint obj_rc(oop obj) {
+    return *obj_rc_addr(obj);
+  }
+  void inc_obj_rc(oop obj) {
+    (*obj_rc_addr(obj))++;
+  }
+  void atomic_inc_obj_rc(oop obj);
+
+
+  // Number of popular objects and bytes (latter is cheaper!).
+  size_t pop_object_used_objs();
+  size_t pop_object_used_bytes();
+
+  // Index of the popular region in which allocation is currently being
+  // done.
+  int _cur_pop_hr_index;
+
+  // List of regions which require zero filling.
+  UncleanRegionList _unclean_region_list;
+  bool _unclean_regions_coming;
+
+  bool check_age_cohort_well_formed_work(int a, HeapRegion* hr);
+
+public:
+  void set_refine_cte_cl_concurrency(bool concurrent);
+
+  RefToScanQueue *task_queue(int i);
+
+  // Create a G1CollectedHeap with the specified policy.
+  // Must call the initialize method afterwards.
+  // May not return if something goes wrong.
+  G1CollectedHeap(G1CollectorPolicy* policy);
+
+  // Initialize the G1CollectedHeap to have the initial and
+  // maximum sizes, permanent generation, and remembered and barrier sets
+  // specified by the policy object.
+  jint initialize();
+
+  void ref_processing_init();
+
+  void set_par_threads(int t) {
+    SharedHeap::set_par_threads(t);
+    _process_strong_tasks->set_par_threads(t);
+  }
+
+  virtual CollectedHeap::Name kind() const {
+    return CollectedHeap::G1CollectedHeap;
+  }
+
+  // The current policy object for the collector.
+  G1CollectorPolicy* g1_policy() const { return _g1_policy; }
+
+  // Adaptive size policy.  No such thing for g1.
+  virtual AdaptiveSizePolicy* size_policy() { return NULL; }
+
+  // The rem set and barrier set.
+  G1RemSet* g1_rem_set() const { return _g1_rem_set; }
+  ModRefBarrierSet* mr_bs() const { return _mr_bs; }
+
+  // The rem set iterator.
+  HeapRegionRemSetIterator* rem_set_iterator(int i) {
+    return _rem_set_iterator[i];
+  }
+
+  HeapRegionRemSetIterator* rem_set_iterator() {
+    return _rem_set_iterator[0];
+  }
+
+  unsigned get_gc_time_stamp() {
+    return _gc_time_stamp;
+  }
+
+  void reset_gc_time_stamp() {
+    _gc_time_stamp = 0;
+    OrderAccess::fence();
+  }
+
+  void increment_gc_time_stamp() {
+    ++_gc_time_stamp;
+    OrderAccess::fence();
+  }
+
+  void iterate_dirty_card_closure(bool concurrent, int worker_i);
+
+  // The shared block offset table array.
+  G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; }
+
+  // Reference Processing accessor
+  ReferenceProcessor* ref_processor() { return _ref_processor; }
+
+  // Reserved (g1 only; super method includes perm), capacity and the used
+  // portion in bytes.
+  size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); }
+  virtual size_t capacity() const;
+  virtual size_t used() const;
+  size_t recalculate_used() const;
+#ifndef PRODUCT
+  size_t recalculate_used_regions() const;
+#endif // PRODUCT
+
+  // These virtual functions do the actual allocation.
+  virtual HeapWord* mem_allocate(size_t word_size,
+                                 bool   is_noref,
+                                 bool   is_tlab,
+                                 bool* gc_overhead_limit_was_exceeded);
+
+  // Some heaps may offer a contiguous region for shared non-blocking
+  // allocation, via inlined code (by exporting the address of the top and
+  // end fields defining the extent of the contiguous allocation region.)
+  // But G1CollectedHeap doesn't yet support this.
+
+  // Return an estimate of the maximum allocation that could be performed
+  // without triggering any collection or expansion activity.  In a
+  // generational collector, for example, this is probably the largest
+  // allocation that could be supported (without expansion) in the youngest
+  // generation.  It is "unsafe" because no locks are taken; the result
+  // should be treated as an approximation, not a guarantee, for use in
+  // heuristic resizing decisions.
+  virtual size_t unsafe_max_alloc();
+
+  virtual bool is_maximal_no_gc() const {
+    return _g1_storage.uncommitted_size() == 0;
+  }
+
+  // The total number of regions in the heap.
+  size_t n_regions();
+
+  // The number of regions that are completely free.
+  size_t max_regions();
+
+  // The number of regions that are completely free.
+  size_t free_regions();
+
+  // The number of regions that are not completely free.
+  size_t used_regions() { return n_regions() - free_regions(); }
+
+  // True iff the ZF thread should run.
+  bool should_zf();
+
+  // The number of regions available for "regular" expansion.
+  size_t expansion_regions() { return _expansion_regions; }
+
+#ifndef PRODUCT
+  bool regions_accounted_for();
+  bool print_region_accounting_info();
+  void print_region_counts();
+#endif
+
+  HeapRegion* alloc_region_from_unclean_list(bool zero_filled);
+  HeapRegion* alloc_region_from_unclean_list_locked(bool zero_filled);
+
+  void put_region_on_unclean_list(HeapRegion* r);
+  void put_region_on_unclean_list_locked(HeapRegion* r);
+
+  void prepend_region_list_on_unclean_list(UncleanRegionList* list);
+  void prepend_region_list_on_unclean_list_locked(UncleanRegionList* list);
+
+  void set_unclean_regions_coming(bool b);
+  void set_unclean_regions_coming_locked(bool b);
+  // Wait for cleanup to be complete.
+  void wait_for_cleanup_complete();
+  // Like above, but assumes that the calling thread owns the Heap_lock.
+  void wait_for_cleanup_complete_locked();
+
+  // Return the head of the unclean list.
+  HeapRegion* peek_unclean_region_list_locked();
+  // Remove and return the head of the unclean list.
+  HeapRegion* pop_unclean_region_list_locked();
+
+  // List of regions which are zero filled and ready for allocation.
+  HeapRegion* _free_region_list;
+  // Number of elements on the free list.
+  size_t _free_region_list_size;
+
+  // If the head of the unclean list is ZeroFilled, move it to the free
+  // list.
+  bool move_cleaned_region_to_free_list_locked();
+  bool move_cleaned_region_to_free_list();
+
+  void put_free_region_on_list_locked(HeapRegion* r);
+  void put_free_region_on_list(HeapRegion* r);
+
+  // Remove and return the head element of the free list.
+  HeapRegion* pop_free_region_list_locked();
+
+  // If "zero_filled" is true, we first try the free list, then we try the
+  // unclean list, zero-filling the result.  If "zero_filled" is false, we
+  // first try the unclean list, then the zero-filled list.
+  HeapRegion* alloc_free_region_from_lists(bool zero_filled);
+
+  // Verify the integrity of the region lists.
+  void remove_allocated_regions_from_lists();
+  bool verify_region_lists();
+  bool verify_region_lists_locked();
+  size_t unclean_region_list_length();
+  size_t free_region_list_length();
+
+  // Perform a collection of the heap; intended for use in implementing
+  // "System.gc".  This probably implies as full a collection as the
+  // "CollectedHeap" supports.
+  virtual void collect(GCCause::Cause cause);
+
+  // The same as above but assume that the caller holds the Heap_lock.
+  void collect_locked(GCCause::Cause cause);
+
+  // This interface assumes that it's being called by the
+  // vm thread. It collects the heap assuming that the
+  // heap lock is already held and that we are executing in
+  // the context of the vm thread.
+  virtual void collect_as_vm_thread(GCCause::Cause cause);
+
+  // True iff a evacuation has failed in the most-recent collection.
+  bool evacuation_failed() { return _evacuation_failed; }
+
+  // Free a region if it is totally full of garbage.  Returns the number of
+  // bytes freed (0 ==> didn't free it).
+  size_t free_region_if_totally_empty(HeapRegion *hr);
+  void free_region_if_totally_empty_work(HeapRegion *hr,
+                                         size_t& pre_used,
+                                         size_t& cleared_h_regions,
+                                         size_t& freed_regions,
+                                         UncleanRegionList* list,
+                                         bool par = false);
+
+  // If we've done free region work that yields the given changes, update
+  // the relevant global variables.
+  void finish_free_region_work(size_t pre_used,
+                               size_t cleared_h_regions,
+                               size_t freed_regions,
+                               UncleanRegionList* list);
+
+
+  // Returns "TRUE" iff "p" points into the allocated area of the heap.
+  virtual bool is_in(const void* p) const;
+
+  // Return "TRUE" iff the given object address is within the collection
+  // set.
+  inline bool obj_in_cs(oop obj);
+
+  // Return "TRUE" iff the given object address is in the reserved
+  // region of g1 (excluding the permanent generation).
+  bool is_in_g1_reserved(const void* p) const {
+    return _g1_reserved.contains(p);
+  }
+
+  // Returns a MemRegion that corresponds to the space that  has been
+  // committed in the heap
+  MemRegion g1_committed() {
+    return _g1_committed;
+  }
+
+  NOT_PRODUCT( bool is_in_closed_subset(const void* p) const; )
+
+  // Dirty card table entries covering a list of young regions.
+  void dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list);
+
+  // This resets the card table to all zeros.  It is used after
+  // a collection pause which used the card table to claim cards.
+  void cleanUpCardTable();
+
+  // Iteration functions.
+
+  // Iterate over all the ref-containing fields of all objects, calling
+  // "cl.do_oop" on each.
+  virtual void oop_iterate(OopClosure* cl);
+
+  // Same as above, restricted to a memory region.
+  virtual void oop_iterate(MemRegion mr, OopClosure* cl);
+
+  // Iterate over all objects, calling "cl.do_object" on each.
+  virtual void object_iterate(ObjectClosure* cl);
+
+  // Iterate over all objects allocated since the last collection, calling
+  // "cl.do_object" on each.  The heap must have been initialized properly
+  // to support this function, or else this call will fail.
+  virtual void object_iterate_since_last_GC(ObjectClosure* cl);
+
+  // Iterate over all spaces in use in the heap, in ascending address order.
+  virtual void space_iterate(SpaceClosure* cl);
+
+  // Iterate over heap regions, in address order, terminating the
+  // iteration early if the "doHeapRegion" method returns "true".
+  void heap_region_iterate(HeapRegionClosure* blk);
+
+  // Iterate over heap regions starting with r (or the first region if "r"
+  // is NULL), in address order, terminating early if the "doHeapRegion"
+  // method returns "true".
+  void heap_region_iterate_from(HeapRegion* r, HeapRegionClosure* blk);
+
+  // As above but starting from the region at index idx.
+  void heap_region_iterate_from(int idx, HeapRegionClosure* blk);
+
+  HeapRegion* region_at(size_t idx);
+
+  // Divide the heap region sequence into "chunks" of some size (the number
+  // of regions divided by the number of parallel threads times some
+  // overpartition factor, currently 4).  Assumes that this will be called
+  // in parallel by ParallelGCThreads worker threads with discinct worker
+  // ids in the range [0..max(ParallelGCThreads-1, 1)], that all parallel
+  // calls will use the same "claim_value", and that that claim value is
+  // different from the claim_value of any heap region before the start of
+  // the iteration.  Applies "blk->doHeapRegion" to each of the regions, by
+  // attempting to claim the first region in each chunk, and, if
+  // successful, applying the closure to each region in the chunk (and
+  // setting the claim value of the second and subsequent regions of the
+  // chunk.)  For now requires that "doHeapRegion" always returns "false",
+  // i.e., that a closure never attempt to abort a traversal.
+  void heap_region_par_iterate_chunked(HeapRegionClosure* blk,
+                                       int worker,
+                                       jint claim_value);
+
+  // It resets all the region claim values to the default.
+  void reset_heap_region_claim_values();
+
+#ifdef ASSERT
+  bool check_heap_region_claim_values(jint claim_value);
+#endif // ASSERT
+
+  // Iterate over the regions (if any) in the current collection set.
+  void collection_set_iterate(HeapRegionClosure* blk);
+
+  // As above but starting from region r
+  void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk);
+
+  // Returns the first (lowest address) compactible space in the heap.
+  virtual CompactibleSpace* first_compactible_space();
+
+  // A CollectedHeap will contain some number of spaces.  This finds the
+  // space containing a given address, or else returns NULL.
+  virtual Space* space_containing(const void* addr) const;
+
+  // A G1CollectedHeap will contain some number of heap regions.  This
+  // finds the region containing a given address, or else returns NULL.
+  HeapRegion* heap_region_containing(const void* addr) const;
+
+  // Like the above, but requires "addr" to be in the heap (to avoid a
+  // null-check), and unlike the above, may return an continuing humongous
+  // region.
+  HeapRegion* heap_region_containing_raw(const void* addr) const;
+
+  // A CollectedHeap is divided into a dense sequence of "blocks"; that is,
+  // each address in the (reserved) heap is a member of exactly
+  // one block.  The defining characteristic of a block is that it is
+  // possible to find its size, and thus to progress forward to the next
+  // block.  (Blocks may be of different sizes.)  Thus, blocks may
+  // represent Java objects, or they might be free blocks in a
+  // free-list-based heap (or subheap), as long as the two kinds are
+  // distinguishable and the size of each is determinable.
+
+  // Returns the address of the start of the "block" that contains the
+  // address "addr".  We say "blocks" instead of "object" since some heaps
+  // may not pack objects densely; a chunk may either be an object or a
+  // non-object.
+  virtual HeapWord* block_start(const void* addr) const;
+
+  // Requires "addr" to be the start of a chunk, and returns its size.
+  // "addr + size" is required to be the start of a new chunk, or the end
+  // of the active area of the heap.
+  virtual size_t block_size(const HeapWord* addr) const;
+
+  // Requires "addr" to be the start of a block, and returns "TRUE" iff
+  // the block is an object.
+  virtual bool block_is_obj(const HeapWord* addr) const;
+
+  // Does this heap support heap inspection? (+PrintClassHistogram)
+  virtual bool supports_heap_inspection() const { return true; }
+
+  // Section on thread-local allocation buffers (TLABs)
+  // See CollectedHeap for semantics.
+
+  virtual bool supports_tlab_allocation() const;
+  virtual size_t tlab_capacity(Thread* thr) const;
+  virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
+  virtual HeapWord* allocate_new_tlab(size_t size);
+
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    // Since G1's TLAB's may, on occasion, come from non-young regions
+    // as well. (Is there a flag controlling that? XXX)
+    return false;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    // At least until perm gen collection is also G1-ified, at
+    // which point this should return false.
+    return true;
+  }
+
+  virtual bool allocs_are_zero_filled();
+
+  // The boundary between a "large" and "small" array of primitives, in
+  // words.
+  virtual size_t large_typearray_limit();
+
+  // All popular objects are guaranteed to have addresses below this
+  // boundary.
+  HeapWord* popular_object_boundary() {
+    return _popular_object_boundary;
+  }
+
+  // Declare the region as one that should be evacuated because its
+  // remembered set is too large.
+  void schedule_popular_region_evac(HeapRegion* r);
+  // If there is a popular region to evacuate it, remove it from the list
+  // and return it.
+  HeapRegion* popular_region_to_evac();
+  // Evacuate the given popular region.
+  void evac_popular_region(HeapRegion* r);
+
+  // Returns "true" iff the given word_size is "very large".
+  static bool isHumongous(size_t word_size) {
+    return word_size >= VeryLargeInWords;
+  }
+
+  // Update mod union table with the set of dirty cards.
+  void updateModUnion();
+
+  // Set the mod union bits corresponding to the given memRegion.  Note
+  // that this is always a safe operation, since it doesn't clear any
+  // bits.
+  void markModUnionRange(MemRegion mr);
+
+  // Records the fact that a marking phase is no longer in progress.
+  void set_marking_complete() {
+    _mark_in_progress = false;
+  }
+  void set_marking_started() {
+    _mark_in_progress = true;
+  }
+  bool mark_in_progress() {
+    return _mark_in_progress;
+  }
+
+  // Print the maximum heap capacity.
+  virtual size_t max_capacity() const;
+
+  virtual jlong millis_since_last_gc();
+
+  // Perform any cleanup actions necessary before allowing a verification.
+  virtual void prepare_for_verify();
+
+  // Perform verification.
+  virtual void verify(bool allow_dirty, bool silent);
+  virtual void print() const;
+  virtual void print_on(outputStream* st) const;
+
+  virtual void print_gc_threads_on(outputStream* st) const;
+  virtual void gc_threads_do(ThreadClosure* tc) const;
+
+  // Override
+  void print_tracing_info() const;
+
+  // If "addr" is a pointer into the (reserved?) heap, returns a positive
+  // number indicating the "arena" within the heap in which "addr" falls.
+  // Or else returns 0.
+  virtual int addr_to_arena_id(void* addr) const;
+
+  // Convenience function to be used in situations where the heap type can be
+  // asserted to be this type.
+  static G1CollectedHeap* heap();
+
+  void empty_young_list();
+  bool should_set_young_locked();
+
+  void set_region_short_lived_locked(HeapRegion* hr);
+  // add appropriate methods for any other surv rate groups
+
+  void young_list_rs_length_sampling_init() {
+    _young_list->rs_length_sampling_init();
+  }
+  bool young_list_rs_length_sampling_more() {
+    return _young_list->rs_length_sampling_more();
+  }
+  void young_list_rs_length_sampling_next() {
+    _young_list->rs_length_sampling_next();
+  }
+  size_t young_list_sampled_rs_lengths() {
+    return _young_list->sampled_rs_lengths();
+  }
+
+  size_t young_list_length()   { return _young_list->length(); }
+  size_t young_list_scan_only_length() {
+                                      return _young_list->scan_only_length(); }
+
+  HeapRegion* pop_region_from_young_list() {
+    return _young_list->pop_region();
+  }
+
+  HeapRegion* young_list_first_region() {
+    return _young_list->first_region();
+  }
+
+  // debugging
+  bool check_young_list_well_formed() {
+    return _young_list->check_list_well_formed();
+  }
+  bool check_young_list_empty(bool ignore_scan_only_list,
+                              bool check_sample = true);
+
+  // *** Stuff related to concurrent marking.  It's not clear to me that so
+  // many of these need to be public.
+
+  // The functions below are helper functions that a subclass of
+  // "CollectedHeap" can use in the implementation of its virtual
+  // functions.
+  // This performs a concurrent marking of the live objects in a
+  // bitmap off to the side.
+  void doConcurrentMark();
+
+  // This is called from the marksweep collector which then does
+  // a concurrent mark and verifies that the results agree with
+  // the stop the world marking.
+  void checkConcurrentMark();
+  void do_sync_mark();
+
+  bool isMarkedPrev(oop obj) const;
+  bool isMarkedNext(oop obj) const;
+
+  // Determine if an object is dead, given the object and also
+  // the region to which the object belongs. An object is dead
+  // iff a) it was not allocated since the last mark and b) it
+  // is not marked.
+
+  bool is_obj_dead(const oop obj, const HeapRegion* hr) const {
+    return
+      !hr->obj_allocated_since_prev_marking(obj) &&
+      !isMarkedPrev(obj);
+  }
+
+  // This is used when copying an object to survivor space.
+  // If the object is marked live, then we mark the copy live.
+  // If the object is allocated since the start of this mark
+  // cycle, then we mark the copy live.
+  // If the object has been around since the previous mark
+  // phase, and hasn't been marked yet during this phase,
+  // then we don't mark it, we just wait for the
+  // current marking cycle to get to it.
+
+  // This function returns true when an object has been
+  // around since the previous marking and hasn't yet
+  // been marked during this marking.
+
+  bool is_obj_ill(const oop obj, const HeapRegion* hr) const {
+    return
+      !hr->obj_allocated_since_next_marking(obj) &&
+      !isMarkedNext(obj);
+  }
+
+  // Determine if an object is dead, given only the object itself.
+  // This will find the region to which the object belongs and
+  // then call the region version of the same function.
+
+  // Added if it is in permanent gen it isn't dead.
+  // Added if it is NULL it isn't dead.
+
+  bool is_obj_dead(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    if (hr == NULL) {
+      if (Universe::heap()->is_in_permanent(obj))
+        return false;
+      else if (obj == NULL) return false;
+      else return true;
+    }
+    else return is_obj_dead(obj, hr);
+  }
+
+  bool is_obj_ill(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    if (hr == NULL) {
+      if (Universe::heap()->is_in_permanent(obj))
+        return false;
+      else if (obj == NULL) return false;
+      else return true;
+    }
+    else return is_obj_ill(obj, hr);
+  }
+
+  // The following is just to alert the verification code
+  // that a full collection has occurred and that the
+  // remembered sets are no longer up to date.
+  bool _full_collection;
+  void set_full_collection() { _full_collection = true;}
+  void clear_full_collection() {_full_collection = false;}
+  bool full_collection() {return _full_collection;}
+
+  ConcurrentMark* concurrent_mark() const { return _cm; }
+  ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
+
+public:
+  void stop_conc_gc_threads();
+
+  // <NEW PREDICTION>
+
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+  void check_if_region_is_too_expensive(double predicted_time_ms);
+  size_t pending_card_num();
+  size_t max_pending_card_num();
+  size_t cards_scanned();
+
+  // </NEW PREDICTION>
+
+protected:
+  size_t _max_heap_capacity;
+
+//  debug_only(static void check_for_valid_allocation_state();)
+
+public:
+  // Temporary: call to mark things unimplemented for the G1 heap (e.g.,
+  // MemoryService).  In productization, we can make this assert false
+  // to catch such places (as well as searching for calls to this...)
+  static void g1_unimplemented();
+
+};
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Inline functions for G1CollectedHeap
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing(const void* addr) const {
+  HeapRegion* hr = _hrs->addr_to_region(addr);
+  // hr can be null if addr in perm_gen
+  if (hr != NULL && hr->continuesHumongous()) {
+    hr = hr->humongous_start_region();
+  }
+  return hr;
+}
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
+  HeapRegion* res = _hrs->addr_to_region(addr);
+  assert(res != NULL, "addr outside of heap?");
+  return res;
+}
+
+inline bool G1CollectedHeap::obj_in_cs(oop obj) {
+  HeapRegion* r = _hrs->addr_to_region(obj);
+  return r != NULL && r->in_collection_set();
+}
+
+inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
+                                              bool permit_collection_pause) {
+  HeapWord* res = NULL;
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          Heap_lock->owned_by_self(), "pre-condition of the call" );
+
+  if (_cur_alloc_region != NULL) {
+
+    // If this allocation causes a region to become non empty,
+    // then we need to update our free_regions count.
+
+    if (_cur_alloc_region->is_empty()) {
+      res = _cur_alloc_region->allocate(word_size);
+      if (res != NULL)
+        _free_regions--;
+    } else {
+      res = _cur_alloc_region->allocate(word_size);
+    }
+  }
+  if (res != NULL) {
+    if (!SafepointSynchronize::is_at_safepoint()) {
+      assert( Heap_lock->owned_by_self(), "invariant" );
+      Heap_lock->unlock();
+    }
+    return res;
+  }
+  // attempt_allocation_slow will also unlock the heap lock when appropriate.
+  return attempt_allocation_slow(word_size, permit_collection_pause);
+}
+
+inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
+  return _task_queues->queue(i);
+}
+
+
+inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
+  return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
+}
+
+inline bool G1CollectedHeap::isMarkedNext(oop obj) const {
+  return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,3163 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1CollectorPolicy.cpp.incl"
+
+#define PREDICTIONS_VERBOSE 0
+
+// <NEW PREDICTION>
+
+// Different defaults for different number of GC threads
+// They were chosen by running GCOld and SPECjbb on debris with different
+//   numbers of GC threads and choosing them based on the results
+
+// all the same
+static double rs_length_diff_defaults[] = {
+  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+};
+
+static double cost_per_card_ms_defaults[] = {
+  0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
+};
+
+static double cost_per_scan_only_region_ms_defaults[] = {
+  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
+};
+
+// all the same
+static double fully_young_cards_per_entry_ratio_defaults[] = {
+  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
+};
+
+static double cost_per_entry_ms_defaults[] = {
+  0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
+};
+
+static double cost_per_byte_ms_defaults[] = {
+  0.00006, 0.00003, 0.00003, 0.000015, 0.000015, 0.00001, 0.00001, 0.000009
+};
+
+// these should be pretty consistent
+static double constant_other_time_ms_defaults[] = {
+  5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
+};
+
+
+static double young_other_cost_per_region_ms_defaults[] = {
+  0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1
+};
+
+static double non_young_other_cost_per_region_ms_defaults[] = {
+  1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30
+};
+
+// </NEW PREDICTION>
+
+G1CollectorPolicy::G1CollectorPolicy() :
+  _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1),
+  _n_pauses(0),
+  _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _all_pause_times_ms(new NumberSeq()),
+  _stop_world_start(0.0),
+  _all_stop_world_times_ms(new NumberSeq()),
+  _all_yield_times_ms(new NumberSeq()),
+
+  _all_mod_union_times_ms(new NumberSeq()),
+
+  _non_pop_summary(new NonPopSummary()),
+  _pop_summary(new PopSummary()),
+  _non_pop_abandoned_summary(new NonPopAbandonedSummary()),
+  _pop_abandoned_summary(new PopAbandonedSummary()),
+
+  _cur_clear_ct_time_ms(0.0),
+
+  _region_num_young(0),
+  _region_num_tenured(0),
+  _prev_region_num_young(0),
+  _prev_region_num_tenured(0),
+
+  _aux_num(10),
+  _all_aux_times_ms(new NumberSeq[_aux_num]),
+  _cur_aux_start_times_ms(new double[_aux_num]),
+  _cur_aux_times_ms(new double[_aux_num]),
+  _cur_aux_times_set(new bool[_aux_num]),
+
+  _pop_compute_rc_start(0.0),
+  _pop_evac_start(0.0),
+
+  _concurrent_mark_init_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  // <NEW PREDICTION>
+
+  _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _prev_collection_pause_end_ms(0.0),
+  _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_scan_only_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _fully_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _partially_young_cards_per_entry_ratio_seq(
+                                         new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _partially_young_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_byte_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_scan_only_region_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _non_young_other_cost_per_region_ms_seq(
+                                         new TruncatedSeq(TruncatedSeqLength)),
+
+  _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)),
+
+  _pause_time_target_ms((double) G1MaxPauseTimeMS),
+
+  // </NEW PREDICTION>
+
+  _in_young_gc_mode(false),
+  _full_young_gcs(true),
+  _full_young_pause_num(0),
+  _partial_young_pause_num(0),
+
+  _during_marking(false),
+  _in_marking_window(false),
+  _in_marking_window_im(false),
+
+  _known_garbage_ratio(0.0),
+  _known_garbage_bytes(0),
+
+  _young_gc_eff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _target_pause_time_ms(-1.0),
+
+   _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  _recent_avg_pause_time_ratio(0.0),
+  _num_markings(0),
+  _n_marks(0),
+  _n_pauses_at_mark_end(0),
+
+  _all_full_gc_times_ms(new NumberSeq()),
+
+  _conc_refine_enabled(0),
+  _conc_refine_zero_traversals(0),
+  _conc_refine_max_traversals(0),
+  _conc_refine_current_delta(G1ConcRefineInitialDelta),
+
+  // G1PausesBtwnConcMark defaults to -1
+  // so the hack is to do the cast  QQQ FIXME
+  _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
+  _n_marks_since_last_pause(0),
+  _conc_mark_initiated(false),
+  _should_initiate_conc_mark(false),
+  _should_revert_to_full_young_gcs(false),
+  _last_full_young_gc(false),
+
+  _prev_collection_pause_used_at_end_bytes(0),
+
+  _collection_set(NULL),
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+  _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived",
+                                                 G1YoungSurvRateNumRegionsSummary)),
+  _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor",
+                                              G1YoungSurvRateNumRegionsSummary))
+  // add here any more surv rate groups
+{
+  _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
+  _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
+
+  _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_only_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_only_regions_scanned = new double[_parallel_gc_threads];
+
+  _par_last_update_rs_start_times_ms = new double[_parallel_gc_threads];
+  _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
+  _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
+
+  _par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads];
+
+  _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
+
+  _par_last_termination_times_ms = new double[_parallel_gc_threads];
+
+  // we store the data from the first pass during popularity pauses
+  _pop_par_last_update_rs_start_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_update_rs_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
+
+  _pop_par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
+
+  _pop_par_last_closure_app_times_ms = new double[_parallel_gc_threads];
+
+  // start conservatively
+  _expensive_region_limit_ms = 0.5 * (double) G1MaxPauseTimeMS;
+
+  // <NEW PREDICTION>
+
+  int index;
+  if (ParallelGCThreads == 0)
+    index = 0;
+  else if (ParallelGCThreads > 8)
+    index = 7;
+  else
+    index = ParallelGCThreads - 1;
+
+  _pending_card_diff_seq->add(0.0);
+  _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
+  _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
+  _cost_per_scan_only_region_ms_seq->add(
+                                 cost_per_scan_only_region_ms_defaults[index]);
+  _fully_young_cards_per_entry_ratio_seq->add(
+                            fully_young_cards_per_entry_ratio_defaults[index]);
+  _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]);
+  _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
+  _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
+  _young_other_cost_per_region_ms_seq->add(
+                               young_other_cost_per_region_ms_defaults[index]);
+  _non_young_other_cost_per_region_ms_seq->add(
+                           non_young_other_cost_per_region_ms_defaults[index]);
+
+  // </NEW PREDICTION>
+
+  double time_slice  = (double) G1TimeSliceMS / 1000.0;
+  double max_gc_time = (double) G1MaxPauseTimeMS / 1000.0;
+  guarantee(max_gc_time < time_slice,
+            "Max GC time should not be greater than the time slice");
+  _mmu_tracker = new G1MMUTrackerQueue(time_slice, max_gc_time);
+  _sigma = (double) G1ConfidencePerc / 100.0;
+
+  // start conservatively (around 50ms is about right)
+  _concurrent_mark_init_times_ms->add(0.05);
+  _concurrent_mark_remark_times_ms->add(0.05);
+  _concurrent_mark_cleanup_times_ms->add(0.20);
+  _tenuring_threshold = MaxTenuringThreshold;
+
+  initialize_all();
+}
+
+// Increment "i", mod "len"
+static void inc_mod(int& i, int len) {
+  i++; if (i == len) i = 0;
+}
+
+void G1CollectorPolicy::initialize_flags() {
+  set_min_alignment(HeapRegion::GrainBytes);
+  set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name()));
+  CollectorPolicy::initialize_flags();
+}
+
+void G1CollectorPolicy::init() {
+  // Set aside an initial future to_space.
+  _g1 = G1CollectedHeap::heap();
+  size_t regions = Universe::heap()->capacity() / HeapRegion::GrainBytes;
+
+  assert(Heap_lock->owned_by_self(), "Locking discipline.");
+
+  if (G1SteadyStateUsed < 50) {
+    vm_exit_during_initialization("G1SteadyStateUsed must be at least 50%.");
+  }
+  if (UseConcMarkSweepGC) {
+    vm_exit_during_initialization("-XX:+UseG1GC is incompatible with "
+                                  "-XX:+UseConcMarkSweepGC.");
+  }
+
+  if (G1Gen) {
+    _in_young_gc_mode = true;
+
+    if (G1YoungGenSize == 0) {
+      set_adaptive_young_list_length(true);
+      _young_list_fixed_length = 0;
+    } else {
+      set_adaptive_young_list_length(false);
+      _young_list_fixed_length = (G1YoungGenSize / HeapRegion::GrainBytes);
+    }
+     _free_regions_at_end_of_collection = _g1->free_regions();
+     _scan_only_regions_at_end_of_collection = 0;
+     calculate_young_list_min_length();
+     guarantee( _young_list_min_length == 0, "invariant, not enough info" );
+     calculate_young_list_target_config();
+   } else {
+     _young_list_fixed_length = 0;
+    _in_young_gc_mode = false;
+  }
+}
+
+void G1CollectorPolicy::calculate_young_list_min_length() {
+  _young_list_min_length = 0;
+
+  if (!adaptive_young_list_length())
+    return;
+
+  if (_alloc_rate_ms_seq->num() > 3) {
+    double now_sec = os::elapsedTime();
+    double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0;
+    double alloc_rate_ms = predict_alloc_rate_ms();
+    int min_regions = (int) ceil(alloc_rate_ms * when_ms);
+    int current_region_num = (int) _g1->young_list_length();
+    _young_list_min_length = min_regions + current_region_num;
+  }
+}
+
+void G1CollectorPolicy::calculate_young_list_target_config() {
+  if (adaptive_young_list_length()) {
+    size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
+    calculate_young_list_target_config(rs_lengths);
+  } else {
+    if (full_young_gcs())
+      _young_list_target_length = _young_list_fixed_length;
+    else
+      _young_list_target_length = _young_list_fixed_length / 2;
+    _young_list_target_length = MAX2(_young_list_target_length, (size_t)1);
+    size_t so_length = calculate_optimal_so_length(_young_list_target_length);
+    guarantee( so_length < _young_list_target_length, "invariant" );
+    _young_list_so_prefix_length = so_length;
+  }
+}
+
+// This method calculate the optimal scan-only set for a fixed young
+// gen size. I couldn't work out how to reuse the more elaborate one,
+// i.e. calculate_young_list_target_config(rs_length), as the loops are
+// fundamentally different (the other one finds a config for different
+// S-O lengths, whereas here we need to do the opposite).
+size_t G1CollectorPolicy::calculate_optimal_so_length(
+                                                    size_t young_list_length) {
+  if (!G1UseScanOnlyPrefix)
+    return 0;
+
+  if (_all_pause_times_ms->num() < 3) {
+    // we won't use a scan-only set at the beginning to allow the rest
+    // of the predictors to warm up
+    return 0;
+  }
+
+  if (_cost_per_scan_only_region_ms_seq->num() < 3) {
+    // then, we'll only set the S-O set to 1 for a little bit of time,
+    // to get enough information on the scanning cost
+    return 1;
+  }
+
+  size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
+  size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
+  size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
+  size_t scanned_cards;
+  if (full_young_gcs())
+    scanned_cards = predict_young_card_num(adj_rs_lengths);
+  else
+    scanned_cards = predict_non_young_card_num(adj_rs_lengths);
+  double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
+                                                     scanned_cards);
+
+  size_t so_length = 0;
+  double max_gc_eff = 0.0;
+  for (size_t i = 0; i < young_list_length; ++i) {
+    double gc_eff = 0.0;
+    double pause_time_ms = 0.0;
+    predict_gc_eff(young_list_length, i, base_time_ms,
+                   &gc_eff, &pause_time_ms);
+    if (gc_eff > max_gc_eff) {
+      max_gc_eff = gc_eff;
+      so_length = i;
+    }
+  }
+
+  // set it to 95% of the optimal to make sure we sample the "area"
+  // around the optimal length to get up-to-date survival rate data
+  return so_length * 950 / 1000;
+}
+
+// This is a really cool piece of code! It finds the best
+// target configuration (young length / scan-only prefix length) so
+// that GC efficiency is maximized and that we also meet a pause
+// time. It's a triple nested loop. These loops are explained below
+// from the inside-out :-)
+//
+// (a) The innermost loop will try to find the optimal young length
+// for a fixed S-O length. It uses a binary search to speed up the
+// process. We assume that, for a fixed S-O length, as we add more
+// young regions to the CSet, the GC efficiency will only go up (I'll
+// skip the proof). So, using a binary search to optimize this process
+// makes perfect sense.
+//
+// (b) The middle loop will fix the S-O length before calling the
+// innermost one. It will vary it between two parameters, increasing
+// it by a given increment.
+//
+// (c) The outermost loop will call the middle loop three times.
+//   (1) The first time it will explore all possible S-O length values
+//   from 0 to as large as it can get, using a coarse increment (to
+//   quickly "home in" to where the optimal seems to be).
+//   (2) The second time it will explore the values around the optimal
+//   that was found by the first iteration using a fine increment.
+//   (3) Once the optimal config has been determined by the second
+//   iteration, we'll redo the calculation, but setting the S-O length
+//   to 95% of the optimal to make sure we sample the "area"
+//   around the optimal length to get up-to-date survival rate data
+//
+// Termination conditions for the iterations are several: the pause
+// time is over the limit, we do not have enough to-space, etc.
+
+void G1CollectorPolicy::calculate_young_list_target_config(size_t rs_lengths) {
+  guarantee( adaptive_young_list_length(), "pre-condition" );
+
+  double start_time_sec = os::elapsedTime();
+  size_t min_reserve_perc = MAX2((size_t)2, (size_t)G1MinReservePerc);
+  min_reserve_perc = MIN2((size_t) 50, min_reserve_perc);
+  size_t reserve_regions =
+    (size_t) ((double) min_reserve_perc * (double) _g1->n_regions() / 100.0);
+
+  if (full_young_gcs() && _free_regions_at_end_of_collection > 0) {
+    // we are in fully-young mode and there are free regions in the heap
+
+    size_t min_so_length = 0;
+    size_t max_so_length = 0;
+
+    if (G1UseScanOnlyPrefix) {
+      if (_all_pause_times_ms->num() < 3) {
+        // we won't use a scan-only set at the beginning to allow the rest
+        // of the predictors to warm up
+        min_so_length = 0;
+        max_so_length = 0;
+      } else if (_cost_per_scan_only_region_ms_seq->num() < 3) {
+        // then, we'll only set the S-O set to 1 for a little bit of time,
+        // to get enough information on the scanning cost
+        min_so_length = 1;
+        max_so_length = 1;
+      } else if (_in_marking_window || _last_full_young_gc) {
+        // no S-O prefix during a marking phase either, as at the end
+        // of the marking phase we'll have to use a very small young
+        // length target to fill up the rest of the CSet with
+        // non-young regions and, if we have lots of scan-only regions
+        // left-over, we will not be able to add any more non-young
+        // regions.
+        min_so_length = 0;
+        max_so_length = 0;
+      } else {
+        // this is the common case; we'll never reach the maximum, we
+        // one of the end conditions will fire well before that
+        // (hopefully!)
+        min_so_length = 0;
+        max_so_length = _free_regions_at_end_of_collection - 1;
+      }
+    } else {
+      // no S-O prefix, as the switch is not set, but we still need to
+      // do one iteration to calculate the best young target that
+      // meets the pause time; this way we reuse the same code instead
+      // of replicating it
+      min_so_length = 0;
+      max_so_length = 0;
+    }
+
+    double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+    size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
+    size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
+    size_t scanned_cards;
+    if (full_young_gcs())
+      scanned_cards = predict_young_card_num(adj_rs_lengths);
+    else
+      scanned_cards = predict_non_young_card_num(adj_rs_lengths);
+    // calculate this once, so that we don't have to recalculate it in
+    // the innermost loop
+    double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
+                                                       scanned_cards);
+
+    // the result
+    size_t final_young_length = 0;
+    size_t final_so_length = 0;
+    double final_gc_eff = 0.0;
+    // we'll also keep track of how many times we go into the inner loop
+    // this is for profiling reasons
+    size_t calculations = 0;
+
+    // this determines which of the three iterations the outer loop is in
+    typedef enum {
+      pass_type_coarse,
+      pass_type_fine,
+      pass_type_final
+    } pass_type_t;
+
+    // range of the outer loop's iteration
+    size_t from_so_length   = min_so_length;
+    size_t to_so_length     = max_so_length;
+    guarantee( from_so_length <= to_so_length, "invariant" );
+
+    // this will keep the S-O length that's found by the second
+    // iteration of the outer loop; we'll keep it just in case the third
+    // iteration fails to find something
+    size_t fine_so_length   = 0;
+
+    // the increment step for the coarse (first) iteration
+    size_t so_coarse_increments = 5;
+
+    // the common case, we'll start with the coarse iteration
+    pass_type_t pass = pass_type_coarse;
+    size_t so_length_incr = so_coarse_increments;
+
+    if (from_so_length == to_so_length) {
+      // not point in doing the coarse iteration, we'll go directly into
+      // the fine one (we essentially trying to find the optimal young
+      // length for a fixed S-O length).
+      so_length_incr = 1;
+      pass = pass_type_final;
+    } else if (to_so_length - from_so_length < 3 * so_coarse_increments) {
+      // again, the range is too short so no point in foind the coarse
+      // iteration either
+      so_length_incr = 1;
+      pass = pass_type_fine;
+    }
+
+    bool done = false;
+    // this is the outermost loop
+    while (!done) {
+#if 0
+      // leave this in for debugging, just in case
+      gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT
+                             ", incr " SIZE_FORMAT ", pass %s",
+                             from_so_length, to_so_length, so_length_incr,
+                             (pass == pass_type_coarse) ? "coarse" :
+                             (pass == pass_type_fine) ? "fine" : "final");
+#endif // 0
+
+      size_t so_length = from_so_length;
+      size_t init_free_regions =
+        MAX2((size_t)0,
+             _free_regions_at_end_of_collection +
+             _scan_only_regions_at_end_of_collection - reserve_regions);
+
+      // this determines whether a configuration was found
+      bool gc_eff_set = false;
+      // this is the middle loop
+      while (so_length <= to_so_length) {
+        // base time, which excludes region-related time; again we
+        // calculate it once to avoid recalculating it in the
+        // innermost loop
+        double base_time_with_so_ms =
+                           base_time_ms + predict_scan_only_time_ms(so_length);
+        // it's already over the pause target, go around
+        if (base_time_with_so_ms > target_pause_time_ms)
+          break;
+
+        size_t starting_young_length = so_length+1;
+
+        // we make sure that the short young length that makes sense
+        // (one more than the S-O length) is feasible
+        size_t min_young_length = starting_young_length;
+        double min_gc_eff;
+        bool min_ok;
+        ++calculations;
+        min_ok = predict_gc_eff(min_young_length, so_length,
+                                base_time_with_so_ms,
+                                init_free_regions, target_pause_time_ms,
+                                &min_gc_eff);
+
+        if (min_ok) {
+          // the shortest young length is indeed feasible; we'll know
+          // set up the max young length and we'll do a binary search
+          // between min_young_length and max_young_length
+          size_t max_young_length = _free_regions_at_end_of_collection - 1;
+          double max_gc_eff = 0.0;
+          bool max_ok = false;
+
+          // the innermost loop! (finally!)
+          while (max_young_length > min_young_length) {
+            // we'll make sure that min_young_length is always at a
+            // feasible config
+            guarantee( min_ok, "invariant" );
+
+            ++calculations;
+            max_ok = predict_gc_eff(max_young_length, so_length,
+                                    base_time_with_so_ms,
+                                    init_free_regions, target_pause_time_ms,
+                                    &max_gc_eff);
+
+            size_t diff = (max_young_length - min_young_length) / 2;
+            if (max_ok) {
+              min_young_length = max_young_length;
+              min_gc_eff = max_gc_eff;
+              min_ok = true;
+            }
+            max_young_length = min_young_length + diff;
+          }
+
+          // the innermost loop found a config
+          guarantee( min_ok, "invariant" );
+          if (min_gc_eff > final_gc_eff) {
+            // it's the best config so far, so we'll keep it
+            final_gc_eff = min_gc_eff;
+            final_young_length = min_young_length;
+            final_so_length = so_length;
+            gc_eff_set = true;
+          }
+        }
+
+        // incremental the fixed S-O length and go around
+        so_length += so_length_incr;
+      }
+
+      // this is the end of the outermost loop and we need to decide
+      // what to do during the next iteration
+      if (pass == pass_type_coarse) {
+        // we just did the coarse pass (first iteration)
+
+        if (!gc_eff_set)
+          // we didn't find a feasible config so we'll just bail out; of
+          // course, it might be the case that we missed it; but I'd say
+          // it's a bit unlikely
+          done = true;
+        else {
+          // We did find a feasible config with optimal GC eff during
+          // the first pass. So the second pass we'll only consider the
+          // S-O lengths around that config with a fine increment.
+
+          guarantee( so_length_incr == so_coarse_increments, "invariant" );
+          guarantee( final_so_length >= min_so_length, "invariant" );
+
+#if 0
+          // leave this in for debugging, just in case
+          gclog_or_tty->print_cr("  coarse pass: SO length " SIZE_FORMAT,
+                                 final_so_length);
+#endif // 0
+
+          from_so_length =
+            (final_so_length - min_so_length > so_coarse_increments) ?
+            final_so_length - so_coarse_increments + 1 : min_so_length;
+          to_so_length =
+            (max_so_length - final_so_length > so_coarse_increments) ?
+            final_so_length + so_coarse_increments - 1 : max_so_length;
+
+          pass = pass_type_fine;
+          so_length_incr = 1;
+        }
+      } else if (pass == pass_type_fine) {
+        // we just finished the second pass
+
+        if (!gc_eff_set) {
+          // we didn't find a feasible config (yes, it's possible;
+          // notice that, sometimes, we go directly into the fine
+          // iteration and skip the coarse one) so we bail out
+          done = true;
+        } else {
+          // We did find a feasible config with optimal GC eff
+          guarantee( so_length_incr == 1, "invariant" );
+
+          if (final_so_length == 0) {
+            // The config is of an empty S-O set, so we'll just bail out
+            done = true;
+          } else {
+            // we'll go around once more, setting the S-O length to 95%
+            // of the optimal
+            size_t new_so_length = 950 * final_so_length / 1000;
+
+#if 0
+            // leave this in for debugging, just in case
+            gclog_or_tty->print_cr("  fine pass: SO length " SIZE_FORMAT
+                                   ", setting it to " SIZE_FORMAT,
+                                    final_so_length, new_so_length);
+#endif // 0
+
+            from_so_length = new_so_length;
+            to_so_length = new_so_length;
+            fine_so_length = final_so_length;
+
+            pass = pass_type_final;
+          }
+        }
+      } else if (pass == pass_type_final) {
+        // we just finished the final (third) pass
+
+        if (!gc_eff_set)
+          // we didn't find a feasible config, so we'll just use the one
+          // we found during the second pass, which we saved
+          final_so_length = fine_so_length;
+
+        // and we're done!
+        done = true;
+      } else {
+        guarantee( false, "should never reach here" );
+      }
+
+      // we now go around the outermost loop
+    }
+
+    // we should have at least one region in the target young length
+    _young_list_target_length = MAX2((size_t) 1, final_young_length);
+    if (final_so_length >= final_young_length)
+      // and we need to ensure that the S-O length is not greater than
+      // the target young length (this is being a bit careful)
+      final_so_length = 0;
+    _young_list_so_prefix_length = final_so_length;
+    guarantee( !_in_marking_window || !_last_full_young_gc ||
+               _young_list_so_prefix_length == 0, "invariant" );
+
+    // let's keep an eye of how long we spend on this calculation
+    // right now, I assume that we'll print it when we need it; we
+    // should really adde it to the breakdown of a pause
+    double end_time_sec = os::elapsedTime();
+    double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0;
+
+#if 0
+    // leave this in for debugging, just in case
+    gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT
+                           ", SO = " SIZE_FORMAT ", "
+                           "elapsed %1.2lf ms, calcs: " SIZE_FORMAT " (%s%s) "
+                           SIZE_FORMAT SIZE_FORMAT,
+                           target_pause_time_ms,
+                           _young_list_target_length - _young_list_so_prefix_length,
+                           _young_list_so_prefix_length,
+                           elapsed_time_ms,
+                           calculations,
+                           full_young_gcs() ? "full" : "partial",
+                           should_initiate_conc_mark() ? " i-m" : "",
+                           in_marking_window(),
+                           in_marking_window_im());
+#endif // 0
+
+    if (_young_list_target_length < _young_list_min_length) {
+      // bummer; this means that, if we do a pause when the optimal
+      // config dictates, we'll violate the pause spacing target (the
+      // min length was calculate based on the application's current
+      // alloc rate);
+
+      // so, we have to bite the bullet, and allocate the minimum
+      // number. We'll violate our target, but we just can't meet it.
+
+      size_t so_length = 0;
+      // a note further up explains why we do not want an S-O length
+      // during marking
+      if (!_in_marking_window && !_last_full_young_gc)
+        // but we can still try to see whether we can find an optimal
+        // S-O length
+        so_length = calculate_optimal_so_length(_young_list_min_length);
+
+#if 0
+      // leave this in for debugging, just in case
+      gclog_or_tty->print_cr("adjusted target length from "
+                             SIZE_FORMAT " to " SIZE_FORMAT
+                             ", SO " SIZE_FORMAT,
+                             _young_list_target_length, _young_list_min_length,
+                             so_length);
+#endif // 0
+
+      _young_list_target_length =
+        MAX2(_young_list_min_length, (size_t)1);
+      _young_list_so_prefix_length = so_length;
+    }
+  } else {
+    // we are in a partially-young mode or we've run out of regions (due
+    // to evacuation failure)
+
+#if 0
+    // leave this in for debugging, just in case
+    gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT
+                           ", SO " SIZE_FORMAT,
+                           _young_list_min_length, 0);
+#endif // 0
+
+    // we'll do the pause as soon as possible and with no S-O prefix
+    // (see above for the reasons behind the latter)
+    _young_list_target_length =
+      MAX2(_young_list_min_length, (size_t) 1);
+    _young_list_so_prefix_length = 0;
+  }
+
+  _rs_lengths_prediction = rs_lengths;
+}
+
+// This is used by: calculate_optimal_so_length(length). It returns
+// the GC eff and predicted pause time for a particular config
+void
+G1CollectorPolicy::predict_gc_eff(size_t young_length,
+                                  size_t so_length,
+                                  double base_time_ms,
+                                  double* ret_gc_eff,
+                                  double* ret_pause_time_ms) {
+  double so_time_ms = predict_scan_only_time_ms(so_length);
+  double accum_surv_rate_adj = 0.0;
+  if (so_length > 0)
+    accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1));
+  double accum_surv_rate =
+    accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj;
+  size_t bytes_to_copy =
+    (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
+  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
+  double young_other_time_ms =
+                       predict_young_other_time_ms(young_length - so_length);
+  double pause_time_ms =
+                base_time_ms + so_time_ms + copy_time_ms + young_other_time_ms;
+  size_t reclaimed_bytes =
+    (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy;
+  double gc_eff = (double) reclaimed_bytes / pause_time_ms;
+
+  *ret_gc_eff = gc_eff;
+  *ret_pause_time_ms = pause_time_ms;
+}
+
+// This is used by: calculate_young_list_target_config(rs_length). It
+// returns the GC eff of a particular config. It returns false if that
+// config violates any of the end conditions of the search in the
+// calling method, or true upon success. The end conditions were put
+// here since it's called twice and it was best not to replicate them
+// in the caller. Also, passing the parameteres avoids having to
+// recalculate them in the innermost loop.
+bool
+G1CollectorPolicy::predict_gc_eff(size_t young_length,
+                                  size_t so_length,
+                                  double base_time_with_so_ms,
+                                  size_t init_free_regions,
+                                  double target_pause_time_ms,
+                                  double* ret_gc_eff) {
+  *ret_gc_eff = 0.0;
+
+  if (young_length >= init_free_regions)
+    // end condition 1: not enough space for the young regions
+    return false;
+
+  double accum_surv_rate_adj = 0.0;
+  if (so_length > 0)
+    accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1));
+  double accum_surv_rate =
+    accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj;
+  size_t bytes_to_copy =
+    (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
+  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
+  double young_other_time_ms =
+                       predict_young_other_time_ms(young_length - so_length);
+  double pause_time_ms =
+                   base_time_with_so_ms + copy_time_ms + young_other_time_ms;
+
+  if (pause_time_ms > target_pause_time_ms)
+    // end condition 2: over the target pause time
+    return false;
+
+  size_t reclaimed_bytes =
+    (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy;
+  size_t free_bytes =
+                 (init_free_regions - young_length) * HeapRegion::GrainBytes;
+
+  if ((2.0 + sigma()) * (double) bytes_to_copy > (double) free_bytes)
+    // end condition 3: out of to-space (conservatively)
+    return false;
+
+  // success!
+  double gc_eff = (double) reclaimed_bytes / pause_time_ms;
+  *ret_gc_eff = gc_eff;
+
+  return true;
+}
+
+void G1CollectorPolicy::check_prediction_validity() {
+  guarantee( adaptive_young_list_length(), "should not call this otherwise" );
+
+  size_t rs_lengths = _g1->young_list_sampled_rs_lengths();
+  if (rs_lengths > _rs_lengths_prediction) {
+    // add 10% to avoid having to recalculate often
+    size_t rs_lengths_prediction = rs_lengths * 1100 / 1000;
+    calculate_young_list_target_config(rs_lengths_prediction);
+  }
+}
+
+HeapWord* G1CollectorPolicy::mem_allocate_work(size_t size,
+                                               bool is_tlab,
+                                               bool* gc_overhead_limit_was_exceeded) {
+  guarantee(false, "Not using this policy feature yet.");
+  return NULL;
+}
+
+// This method controls how a collector handles one or more
+// of its generations being fully allocated.
+HeapWord* G1CollectorPolicy::satisfy_failed_allocation(size_t size,
+                                                       bool is_tlab) {
+  guarantee(false, "Not using this policy feature yet.");
+  return NULL;
+}
+
+
+#ifndef PRODUCT
+bool G1CollectorPolicy::verify_young_ages() {
+  HeapRegion* head = _g1->young_list_first_region();
+  return
+    verify_young_ages(head, _short_lived_surv_rate_group);
+  // also call verify_young_ages on any additional surv rate groups
+}
+
+bool
+G1CollectorPolicy::verify_young_ages(HeapRegion* head,
+                                     SurvRateGroup *surv_rate_group) {
+  guarantee( surv_rate_group != NULL, "pre-condition" );
+
+  const char* name = surv_rate_group->name();
+  bool ret = true;
+  int prev_age = -1;
+
+  for (HeapRegion* curr = head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    SurvRateGroup* group = curr->surv_rate_group();
+    if (group == NULL && !curr->is_survivor()) {
+      gclog_or_tty->print_cr("## %s: encountered NULL surv_rate_group", name);
+      ret = false;
+    }
+
+    if (surv_rate_group == group) {
+      int age = curr->age_in_surv_rate_group();
+
+      if (age < 0) {
+        gclog_or_tty->print_cr("## %s: encountered negative age", name);
+        ret = false;
+      }
+
+      if (age <= prev_age) {
+        gclog_or_tty->print_cr("## %s: region ages are not strictly increasing "
+                               "(%d, %d)", name, age, prev_age);
+        ret = false;
+      }
+      prev_age = age;
+    }
+  }
+
+  return ret;
+}
+#endif // PRODUCT
+
+void G1CollectorPolicy::record_full_collection_start() {
+  _cur_collection_start_sec = os::elapsedTime();
+  // Release the future to-space so that it is available for compaction into.
+  _g1->set_full_collection();
+}
+
+void G1CollectorPolicy::record_full_collection_end() {
+  // Consider this like a collection pause for the purposes of allocation
+  // since last pause.
+  double end_sec = os::elapsedTime();
+  double full_gc_time_sec = end_sec - _cur_collection_start_sec;
+  double full_gc_time_ms = full_gc_time_sec * 1000.0;
+
+  checkpoint_conc_overhead();
+
+  _all_full_gc_times_ms->add(full_gc_time_ms);
+
+  update_recent_gc_times(end_sec, full_gc_time_sec);
+
+  _g1->clear_full_collection();
+
+  // "Nuke" the heuristics that control the fully/partially young GC
+  // transitions and make sure we start with fully young GCs after the
+  // Full GC.
+  set_full_young_gcs(true);
+  _last_full_young_gc = false;
+  _should_revert_to_full_young_gcs = false;
+  _should_initiate_conc_mark = false;
+  _known_garbage_bytes = 0;
+  _known_garbage_ratio = 0.0;
+  _in_marking_window = false;
+  _in_marking_window_im = false;
+
+  _short_lived_surv_rate_group->record_scan_only_prefix(0);
+  _short_lived_surv_rate_group->start_adding_regions();
+  // also call this on any additional surv rate groups
+
+  _prev_region_num_young   = _region_num_young;
+  _prev_region_num_tenured = _region_num_tenured;
+
+  _free_regions_at_end_of_collection = _g1->free_regions();
+  _scan_only_regions_at_end_of_collection = 0;
+  calculate_young_list_min_length();
+  calculate_young_list_target_config();
+ }
+
+void G1CollectorPolicy::record_pop_compute_rc_start() {
+  _pop_compute_rc_start = os::elapsedTime();
+}
+void G1CollectorPolicy::record_pop_compute_rc_end() {
+  double ms = (os::elapsedTime() - _pop_compute_rc_start)*1000.0;
+  _cur_popular_compute_rc_time_ms = ms;
+  _pop_compute_rc_start = 0.0;
+}
+void G1CollectorPolicy::record_pop_evac_start() {
+  _pop_evac_start = os::elapsedTime();
+}
+void G1CollectorPolicy::record_pop_evac_end() {
+  double ms = (os::elapsedTime() - _pop_evac_start)*1000.0;
+  _cur_popular_evac_time_ms = ms;
+  _pop_evac_start = 0.0;
+}
+
+void G1CollectorPolicy::record_before_bytes(size_t bytes) {
+  _bytes_in_to_space_before_gc += bytes;
+}
+
+void G1CollectorPolicy::record_after_bytes(size_t bytes) {
+  _bytes_in_to_space_after_gc += bytes;
+}
+
+void G1CollectorPolicy::record_stop_world_start() {
+  _stop_world_start = os::elapsedTime();
+}
+
+void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
+                                                      size_t start_used) {
+  if (PrintGCDetails) {
+    gclog_or_tty->stamp(PrintGCTimeStamps);
+    gclog_or_tty->print("[GC pause");
+    if (in_young_gc_mode())
+      gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial");
+  }
+
+  assert(_g1->used_regions() == _g1->recalculate_used_regions(),
+         "sanity");
+
+  double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0;
+  _all_stop_world_times_ms->add(s_w_t_ms);
+  _stop_world_start = 0.0;
+
+  _cur_collection_start_sec = start_time_sec;
+  _cur_collection_pause_used_at_start_bytes = start_used;
+  _cur_collection_pause_used_regions_at_start = _g1->used_regions();
+  _pending_cards = _g1->pending_card_num();
+  _max_pending_cards = _g1->max_pending_card_num();
+
+  _bytes_in_to_space_before_gc = 0;
+  _bytes_in_to_space_after_gc = 0;
+  _bytes_in_collection_set_before_gc = 0;
+
+#ifdef DEBUG
+  // initialise these to something well known so that we can spot
+  // if they are not set properly
+
+  for (int i = 0; i < _parallel_gc_threads; ++i) {
+    _par_last_ext_root_scan_times_ms[i] = -666.0;
+    _par_last_mark_stack_scan_times_ms[i] = -666.0;
+    _par_last_scan_only_times_ms[i] = -666.0;
+    _par_last_scan_only_regions_scanned[i] = -666.0;
+    _par_last_update_rs_start_times_ms[i] = -666.0;
+    _par_last_update_rs_times_ms[i] = -666.0;
+    _par_last_update_rs_processed_buffers[i] = -666.0;
+    _par_last_scan_rs_start_times_ms[i] = -666.0;
+    _par_last_scan_rs_times_ms[i] = -666.0;
+    _par_last_scan_new_refs_times_ms[i] = -666.0;
+    _par_last_obj_copy_times_ms[i] = -666.0;
+    _par_last_termination_times_ms[i] = -666.0;
+
+    _pop_par_last_update_rs_start_times_ms[i] = -666.0;
+    _pop_par_last_update_rs_times_ms[i] = -666.0;
+    _pop_par_last_update_rs_processed_buffers[i] = -666.0;
+    _pop_par_last_scan_rs_start_times_ms[i] = -666.0;
+    _pop_par_last_scan_rs_times_ms[i] = -666.0;
+    _pop_par_last_closure_app_times_ms[i] = -666.0;
+  }
+#endif
+
+  for (int i = 0; i < _aux_num; ++i) {
+    _cur_aux_times_ms[i] = 0.0;
+    _cur_aux_times_set[i] = false;
+  }
+
+  _satb_drain_time_set = false;
+  _last_satb_drain_processed_buffers = -1;
+
+  if (in_young_gc_mode())
+    _last_young_gc_full = false;
+
+
+  // do that for any other surv rate groups
+  _short_lived_surv_rate_group->stop_adding_regions();
+  size_t short_lived_so_length = _young_list_so_prefix_length;
+  _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length);
+  tag_scan_only(short_lived_so_length);
+
+  assert( verify_young_ages(), "region age verification" );
+}
+
+void G1CollectorPolicy::tag_scan_only(size_t short_lived_scan_only_length) {
+  // done in a way that it can be extended for other surv rate groups too...
+
+  HeapRegion* head = _g1->young_list_first_region();
+  bool finished_short_lived = (short_lived_scan_only_length == 0);
+
+  if (finished_short_lived)
+    return;
+
+  for (HeapRegion* curr = head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    SurvRateGroup* surv_rate_group = curr->surv_rate_group();
+    int age = curr->age_in_surv_rate_group();
+
+    if (surv_rate_group == _short_lived_surv_rate_group) {
+      if ((size_t)age < short_lived_scan_only_length)
+        curr->set_scan_only();
+      else
+        finished_short_lived = true;
+    }
+
+
+    if (finished_short_lived)
+      return;
+  }
+
+  guarantee( false, "we should never reach here" );
+}
+
+void G1CollectorPolicy::record_popular_pause_preamble_start() {
+  _cur_popular_preamble_start_ms = os::elapsedTime() * 1000.0;
+}
+
+void G1CollectorPolicy::record_popular_pause_preamble_end() {
+  _cur_popular_preamble_time_ms =
+    (os::elapsedTime() * 1000.0) - _cur_popular_preamble_start_ms;
+
+  // copy the recorded statistics of the first pass to temporary arrays
+  for (int i = 0; i < _parallel_gc_threads; ++i) {
+    _pop_par_last_update_rs_start_times_ms[i] = _par_last_update_rs_start_times_ms[i];
+    _pop_par_last_update_rs_times_ms[i] = _par_last_update_rs_times_ms[i];
+    _pop_par_last_update_rs_processed_buffers[i] = _par_last_update_rs_processed_buffers[i];
+    _pop_par_last_scan_rs_start_times_ms[i] = _par_last_scan_rs_start_times_ms[i];
+    _pop_par_last_scan_rs_times_ms[i] = _par_last_scan_rs_times_ms[i];
+    _pop_par_last_closure_app_times_ms[i] = _par_last_obj_copy_times_ms[i];
+  }
+}
+
+void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) {
+  _mark_closure_time_ms = mark_closure_time_ms;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_start() {
+  _mark_init_start_sec = os::elapsedTime();
+  guarantee(!in_young_gc_mode(), "should not do be here in young GC mode");
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double
+                                                   mark_init_elapsed_time_ms) {
+  _during_marking = true;
+  _should_initiate_conc_mark = false;
+  _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_end() {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_init_start_sec) * 1000.0;
+  _concurrent_mark_init_times_ms->add(elapsed_time_ms);
+  checkpoint_conc_overhead();
+  record_concurrent_mark_init_end_pre(elapsed_time_ms);
+
+  _mmu_tracker->add_pause(_mark_init_start_sec, end_time_sec, true);
+}
+
+void G1CollectorPolicy::record_concurrent_mark_remark_start() {
+  _mark_remark_start_sec = os::elapsedTime();
+  _during_marking = false;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_remark_end() {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_remark_start_sec)*1000.0;
+  checkpoint_conc_overhead();
+  _concurrent_mark_remark_times_ms->add(elapsed_time_ms);
+  _cur_mark_stop_world_time_ms += elapsed_time_ms;
+  _prev_collection_pause_end_ms += elapsed_time_ms;
+
+  _mmu_tracker->add_pause(_mark_remark_start_sec, end_time_sec, true);
+}
+
+void G1CollectorPolicy::record_concurrent_mark_cleanup_start() {
+  _mark_cleanup_start_sec = os::elapsedTime();
+}
+
+void
+G1CollectorPolicy::record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                      size_t max_live_bytes) {
+  record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes);
+  record_concurrent_mark_cleanup_end_work2();
+}
+
+void
+G1CollectorPolicy::
+record_concurrent_mark_cleanup_end_work1(size_t freed_bytes,
+                                         size_t max_live_bytes) {
+  if (_n_marks < 2) _n_marks++;
+  if (G1PolicyVerbose > 0)
+    gclog_or_tty->print_cr("At end of marking, max_live is " SIZE_FORMAT " MB "
+                           " (of " SIZE_FORMAT " MB heap).",
+                           max_live_bytes/M, _g1->capacity()/M);
+}
+
+// The important thing about this is that it includes "os::elapsedTime".
+void G1CollectorPolicy::record_concurrent_mark_cleanup_end_work2() {
+  checkpoint_conc_overhead();
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_cleanup_start_sec)*1000.0;
+  _concurrent_mark_cleanup_times_ms->add(elapsed_time_ms);
+  _cur_mark_stop_world_time_ms += elapsed_time_ms;
+  _prev_collection_pause_end_ms += elapsed_time_ms;
+
+  _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_time_sec, true);
+
+  _num_markings++;
+
+  // We did a marking, so reset the "since_last_mark" variables.
+  double considerConcMarkCost = 1.0;
+  // If there are available processors, concurrent activity is free...
+  if (Threads::number_of_non_daemon_threads() * 2 <
+      os::active_processor_count()) {
+    considerConcMarkCost = 0.0;
+  }
+  _n_pauses_at_mark_end = _n_pauses;
+  _n_marks_since_last_pause++;
+  _conc_mark_initiated = false;
+}
+
+void
+G1CollectorPolicy::record_concurrent_mark_cleanup_completed() {
+  if (in_young_gc_mode()) {
+    _should_revert_to_full_young_gcs = false;
+    _last_full_young_gc = true;
+    _in_marking_window = false;
+    if (adaptive_young_list_length())
+      calculate_young_list_target_config();
+  }
+}
+
+void G1CollectorPolicy::record_concurrent_pause() {
+  if (_stop_world_start > 0.0) {
+    double yield_ms = (os::elapsedTime() - _stop_world_start) * 1000.0;
+    _all_yield_times_ms->add(yield_ms);
+  }
+}
+
+void G1CollectorPolicy::record_concurrent_pause_end() {
+}
+
+void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() {
+  _cur_CH_strong_roots_end_sec = os::elapsedTime();
+  _cur_CH_strong_roots_dur_ms =
+    (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0;
+}
+
+void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() {
+  _cur_G1_strong_roots_end_sec = os::elapsedTime();
+  _cur_G1_strong_roots_dur_ms =
+    (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0;
+}
+
+template<class T>
+T sum_of(T* sum_arr, int start, int n, int N) {
+  T sum = (T)0;
+  for (int i = 0; i < n; i++) {
+    int j = (start + i) % N;
+    sum += sum_arr[j];
+  }
+  return sum;
+}
+
+void G1CollectorPolicy::print_par_stats (int level,
+                                         const char* str,
+                                         double* data,
+                                         bool summary) {
+  double min = data[0], max = data[0];
+  double total = 0.0;
+  int j;
+  for (j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print("[%s (ms):", str);
+  for (uint i = 0; i < ParallelGCThreads; ++i) {
+    double val = data[i];
+    if (val < min)
+      min = val;
+    if (val > max)
+      max = val;
+    total += val;
+    gclog_or_tty->print("  %3.1lf", val);
+  }
+  if (summary) {
+    gclog_or_tty->print_cr("");
+    double avg = total / (double) ParallelGCThreads;
+    gclog_or_tty->print(" ");
+    for (j = 0; j < level; ++j)
+      gclog_or_tty->print("   ");
+    gclog_or_tty->print("Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf",
+                        avg, min, max);
+  }
+  gclog_or_tty->print_cr("]");
+}
+
+void G1CollectorPolicy::print_par_buffers (int level,
+                                         const char* str,
+                                         double* data,
+                                         bool summary) {
+  double min = data[0], max = data[0];
+  double total = 0.0;
+  int j;
+  for (j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print("[%s :", str);
+  for (uint i = 0; i < ParallelGCThreads; ++i) {
+    double val = data[i];
+    if (val < min)
+      min = val;
+    if (val > max)
+      max = val;
+    total += val;
+    gclog_or_tty->print(" %d", (int) val);
+  }
+  if (summary) {
+    gclog_or_tty->print_cr("");
+    double avg = total / (double) ParallelGCThreads;
+    gclog_or_tty->print(" ");
+    for (j = 0; j < level; ++j)
+      gclog_or_tty->print("   ");
+    gclog_or_tty->print("Sum: %d, Avg: %d, Min: %d, Max: %d",
+               (int)total, (int)avg, (int)min, (int)max);
+  }
+  gclog_or_tty->print_cr("]");
+}
+
+void G1CollectorPolicy::print_stats (int level,
+                                     const char* str,
+                                     double value) {
+  for (int j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print_cr("[%s: %5.1lf ms]", str, value);
+}
+
+void G1CollectorPolicy::print_stats (int level,
+                                     const char* str,
+                                     int value) {
+  for (int j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print_cr("[%s: %d]", str, value);
+}
+
+double G1CollectorPolicy::avg_value (double* data) {
+  if (ParallelGCThreads > 0) {
+    double ret = 0.0;
+    for (uint i = 0; i < ParallelGCThreads; ++i)
+      ret += data[i];
+    return ret / (double) ParallelGCThreads;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::max_value (double* data) {
+  if (ParallelGCThreads > 0) {
+    double ret = data[0];
+    for (uint i = 1; i < ParallelGCThreads; ++i)
+      if (data[i] > ret)
+        ret = data[i];
+    return ret;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::sum_of_values (double* data) {
+  if (ParallelGCThreads > 0) {
+    double sum = 0.0;
+    for (uint i = 0; i < ParallelGCThreads; i++)
+      sum += data[i];
+    return sum;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::max_sum (double* data1,
+                                   double* data2) {
+  double ret = data1[0] + data2[0];
+
+  if (ParallelGCThreads > 0) {
+    for (uint i = 1; i < ParallelGCThreads; ++i) {
+      double data = data1[i] + data2[i];
+      if (data > ret)
+        ret = data;
+    }
+  }
+  return ret;
+}
+
+// Anything below that is considered to be zero
+#define MIN_TIMER_GRANULARITY 0.0000001
+
+void G1CollectorPolicy::record_collection_pause_end(bool popular,
+                                                    bool abandoned) {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_ms = _last_pause_time_ms;
+  bool parallel = ParallelGCThreads > 0;
+  double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0;
+  size_t rs_size =
+    _cur_collection_pause_used_regions_at_start - collection_set_size();
+  size_t cur_used_bytes = _g1->used();
+  assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
+  bool last_pause_included_initial_mark = false;
+
+#ifndef PRODUCT
+  if (G1YoungSurvRateVerbose) {
+    gclog_or_tty->print_cr("");
+    _short_lived_surv_rate_group->print();
+    // do that for any other surv rate groups too
+  }
+#endif // PRODUCT
+
+  checkpoint_conc_overhead();
+
+  if (in_young_gc_mode()) {
+    last_pause_included_initial_mark = _should_initiate_conc_mark;
+    if (last_pause_included_initial_mark)
+      record_concurrent_mark_init_end_pre(0.0);
+
+    size_t min_used_targ =
+      (_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta);
+
+    if (cur_used_bytes > min_used_targ) {
+      if (cur_used_bytes <= _prev_collection_pause_used_at_end_bytes) {
+      } else if (!_g1->mark_in_progress() && !_last_full_young_gc) {
+        _should_initiate_conc_mark = true;
+      }
+    }
+
+    _prev_collection_pause_used_at_end_bytes = cur_used_bytes;
+  }
+
+  _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
+                          end_time_sec, false);
+
+  guarantee(_cur_collection_pause_used_regions_at_start >=
+            collection_set_size(),
+            "Negative RS size?");
+
+  // This assert is exempted when we're doing parallel collection pauses,
+  // because the fragmentation caused by the parallel GC allocation buffers
+  // can lead to more memory being used during collection than was used
+  // before. Best leave this out until the fragmentation problem is fixed.
+  // Pauses in which evacuation failed can also lead to negative
+  // collections, since no space is reclaimed from a region containing an
+  // object whose evacuation failed.
+  // Further, we're now always doing parallel collection.  But I'm still
+  // leaving this here as a placeholder for a more precise assertion later.
+  // (DLD, 10/05.)
+  assert((true || parallel) // Always using GC LABs now.
+         || _g1->evacuation_failed()
+         || _cur_collection_pause_used_at_start_bytes >= cur_used_bytes,
+         "Negative collection");
+
+  size_t freed_bytes =
+    _cur_collection_pause_used_at_start_bytes - cur_used_bytes;
+  size_t surviving_bytes = _collection_set_bytes_used_before - freed_bytes;
+  double survival_fraction =
+    (double)surviving_bytes/
+    (double)_collection_set_bytes_used_before;
+
+  _n_pauses++;
+
+  if (!abandoned) {
+    _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
+    _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
+    _recent_evac_times_ms->add(evac_ms);
+    _recent_pause_times_ms->add(elapsed_ms);
+
+    _recent_rs_sizes->add(rs_size);
+
+    // We exempt parallel collection from this check because Alloc Buffer
+    // fragmentation can produce negative collections.  Same with evac
+    // failure.
+    // Further, we're now always doing parallel collection.  But I'm still
+    // leaving this here as a placeholder for a more precise assertion later.
+    // (DLD, 10/05.
+    assert((true || parallel)
+           || _g1->evacuation_failed()
+           || surviving_bytes <= _collection_set_bytes_used_before,
+           "Or else negative collection!");
+    _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before);
+    _recent_CS_bytes_surviving->add(surviving_bytes);
+
+    // this is where we update the allocation rate of the application
+    double app_time_ms =
+      (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
+    if (app_time_ms < MIN_TIMER_GRANULARITY) {
+      // This usually happens due to the timer not having the required
+      // granularity. Some Linuxes are the usual culprits.
+      // We'll just set it to something (arbitrarily) small.
+      app_time_ms = 1.0;
+    }
+    size_t regions_allocated =
+      (_region_num_young - _prev_region_num_young) +
+      (_region_num_tenured - _prev_region_num_tenured);
+    double alloc_rate_ms = (double) regions_allocated / app_time_ms;
+    _alloc_rate_ms_seq->add(alloc_rate_ms);
+    _prev_region_num_young   = _region_num_young;
+    _prev_region_num_tenured = _region_num_tenured;
+
+    double interval_ms =
+      (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
+    update_recent_gc_times(end_time_sec, elapsed_ms);
+    _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms;
+    assert(recent_avg_pause_time_ratio() < 1.00, "All GC?");
+  }
+
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
+  }
+
+  PauseSummary* summary;
+  if (!abandoned && !popular)
+    summary = _non_pop_summary;
+  else if (!abandoned && popular)
+    summary = _pop_summary;
+  else if (abandoned && !popular)
+    summary = _non_pop_abandoned_summary;
+  else if (abandoned && popular)
+    summary = _pop_abandoned_summary;
+  else
+    guarantee(false, "should not get here!");
+
+  double pop_update_rs_time;
+  double pop_update_rs_processed_buffers;
+  double pop_scan_rs_time;
+  double pop_closure_app_time;
+  double pop_other_time;
+
+  if (popular) {
+    PopPreambleSummary* preamble_summary = summary->pop_preamble_summary();
+    guarantee(preamble_summary != NULL, "should not be null!");
+
+    pop_update_rs_time = avg_value(_pop_par_last_update_rs_times_ms);
+    pop_update_rs_processed_buffers =
+      sum_of_values(_pop_par_last_update_rs_processed_buffers);
+    pop_scan_rs_time = avg_value(_pop_par_last_scan_rs_times_ms);
+    pop_closure_app_time = avg_value(_pop_par_last_closure_app_times_ms);
+    pop_other_time = _cur_popular_preamble_time_ms -
+      (pop_update_rs_time + pop_scan_rs_time + pop_closure_app_time +
+       _cur_popular_evac_time_ms);
+
+    preamble_summary->record_pop_preamble_time_ms(_cur_popular_preamble_time_ms);
+    preamble_summary->record_pop_update_rs_time_ms(pop_update_rs_time);
+    preamble_summary->record_pop_scan_rs_time_ms(pop_scan_rs_time);
+    preamble_summary->record_pop_closure_app_time_ms(pop_closure_app_time);
+    preamble_summary->record_pop_evacuation_time_ms(_cur_popular_evac_time_ms);
+    preamble_summary->record_pop_other_time_ms(pop_other_time);
+  }
+
+  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
+  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double scan_only_time = avg_value(_par_last_scan_only_times_ms);
+  double scan_only_regions_scanned =
+    sum_of_values(_par_last_scan_only_regions_scanned);
+  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
+  double update_rs_processed_buffers =
+    sum_of_values(_par_last_update_rs_processed_buffers);
+  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
+  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
+  double termination_time = avg_value(_par_last_termination_times_ms);
+
+  double parallel_other_time;
+  if (!abandoned) {
+    MainBodySummary* body_summary = summary->main_body_summary();
+    guarantee(body_summary != NULL, "should not be null!");
+
+    if (_satb_drain_time_set)
+      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    else
+      body_summary->record_satb_drain_time_ms(0.0);
+    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
+    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_scan_only_time_ms(scan_only_time);
+    body_summary->record_update_rs_time_ms(update_rs_time);
+    body_summary->record_scan_rs_time_ms(scan_rs_time);
+    body_summary->record_obj_copy_time_ms(obj_copy_time);
+    if (parallel) {
+      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
+      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
+      body_summary->record_termination_time_ms(termination_time);
+      parallel_other_time = _cur_collection_par_time_ms -
+        (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
+         scan_only_time + scan_rs_time + obj_copy_time + termination_time);
+      body_summary->record_parallel_other_time_ms(parallel_other_time);
+    }
+    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
+  }
+
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("      ET: %10.6f ms           (avg: %10.6f ms)\n"
+                           "        CH Strong: %10.6f ms    (avg: %10.6f ms)\n"
+                           "        G1 Strong: %10.6f ms    (avg: %10.6f ms)\n"
+                           "        Evac:      %10.6f ms    (avg: %10.6f ms)\n"
+                           "       ET-RS:  %10.6f ms      (avg: %10.6f ms)\n"
+                           "      |RS|: " SIZE_FORMAT,
+                           elapsed_ms, recent_avg_time_for_pauses_ms(),
+                           _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(),
+                           _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(),
+                           evac_ms, recent_avg_time_for_evac_ms(),
+                           scan_rs_time,
+                           recent_avg_time_for_pauses_ms() -
+                           recent_avg_time_for_G1_strong_ms(),
+                           rs_size);
+
+    gclog_or_tty->print_cr("       Used at start: " SIZE_FORMAT"K"
+                           "       At end " SIZE_FORMAT "K\n"
+                           "       garbage      : " SIZE_FORMAT "K"
+                           "       of     " SIZE_FORMAT "K\n"
+                           "       survival     : %6.2f%%  (%6.2f%% avg)",
+                           _cur_collection_pause_used_at_start_bytes/K,
+                           _g1->used()/K, freed_bytes/K,
+                           _collection_set_bytes_used_before/K,
+                           survival_fraction*100.0,
+                           recent_avg_survival_fraction()*100.0);
+    gclog_or_tty->print_cr("       Recent %% gc pause time: %6.2f",
+                           recent_avg_pause_time_ratio() * 100.0);
+  }
+
+  double other_time_ms = elapsed_ms;
+  if (popular)
+    other_time_ms -= _cur_popular_preamble_time_ms;
+
+  if (!abandoned) {
+    if (_satb_drain_time_set)
+      other_time_ms -= _cur_satb_drain_time_ms;
+
+    if (parallel)
+      other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
+    else
+      other_time_ms -=
+        update_rs_time +
+        ext_root_scan_time + mark_stack_scan_time + scan_only_time +
+        scan_rs_time + obj_copy_time;
+  }
+
+  if (PrintGCDetails) {
+    gclog_or_tty->print_cr("%s%s, %1.8lf secs]",
+                           (popular && !abandoned) ? " (popular)" :
+                           (!popular && abandoned) ? " (abandoned)" :
+                           (popular && abandoned) ? " (popular/abandoned)" : "",
+                           (last_pause_included_initial_mark) ? " (initial-mark)" : "",
+                           elapsed_ms / 1000.0);
+
+    if (!abandoned) {
+      if (_satb_drain_time_set)
+        print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
+      if (_last_satb_drain_processed_buffers >= 0)
+        print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
+    }
+    if (popular)
+      print_stats(1, "Popularity Preamble", _cur_popular_preamble_time_ms);
+    if (parallel) {
+      if (popular) {
+        print_par_stats(2, "Update RS (Start)", _pop_par_last_update_rs_start_times_ms, false);
+        print_par_stats(2, "Update RS", _pop_par_last_update_rs_times_ms);
+        if (G1RSBarrierUseQueue)
+          print_par_buffers(3, "Processed Buffers",
+                            _pop_par_last_update_rs_processed_buffers, true);
+        print_par_stats(2, "Scan RS", _pop_par_last_scan_rs_times_ms);
+        print_par_stats(2, "Closure app", _pop_par_last_closure_app_times_ms);
+        print_stats(2, "Evacuation", _cur_popular_evac_time_ms);
+        print_stats(2, "Other", pop_other_time);
+      }
+      if (!abandoned) {
+        print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
+        if (!popular) {
+          print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false);
+          print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
+          if (G1RSBarrierUseQueue)
+            print_par_buffers(3, "Processed Buffers",
+                              _par_last_update_rs_processed_buffers, true);
+        }
+        print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
+        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms);
+        print_par_buffers(3, "Scan-Only Regions",
+                          _par_last_scan_only_regions_scanned, true);
+        print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
+        print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
+        print_par_stats(2, "Termination", _par_last_termination_times_ms);
+        print_stats(2, "Other", parallel_other_time);
+        print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+      }
+    } else {
+      if (popular) {
+        print_stats(2, "Update RS", pop_update_rs_time);
+        if (G1RSBarrierUseQueue)
+          print_stats(3, "Processed Buffers",
+                      (int)pop_update_rs_processed_buffers);
+        print_stats(2, "Scan RS", pop_scan_rs_time);
+        print_stats(2, "Closure App", pop_closure_app_time);
+        print_stats(2, "Evacuation", _cur_popular_evac_time_ms);
+        print_stats(2, "Other", pop_other_time);
+      }
+      if (!abandoned) {
+        if (!popular) {
+          print_stats(1, "Update RS", update_rs_time);
+          if (G1RSBarrierUseQueue)
+            print_stats(2, "Processed Buffers",
+                        (int)update_rs_processed_buffers);
+        }
+        print_stats(1, "Ext Root Scanning", ext_root_scan_time);
+        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "Scan-Only Scanning", scan_only_time);
+        print_stats(1, "Scan RS", scan_rs_time);
+        print_stats(1, "Object Copying", obj_copy_time);
+      }
+    }
+    print_stats(1, "Other", other_time_ms);
+    for (int i = 0; i < _aux_num; ++i) {
+      if (_cur_aux_times_set[i]) {
+        char buffer[96];
+        sprintf(buffer, "Aux%d", i);
+        print_stats(1, buffer, _cur_aux_times_ms[i]);
+      }
+    }
+  }
+  if (PrintGCDetails)
+    gclog_or_tty->print("   [");
+  if (PrintGC || PrintGCDetails)
+    _g1->print_size_transition(gclog_or_tty,
+                               _cur_collection_pause_used_at_start_bytes,
+                               _g1->used(), _g1->capacity());
+  if (PrintGCDetails)
+    gclog_or_tty->print_cr("]");
+
+  _all_pause_times_ms->add(elapsed_ms);
+  summary->record_total_time_ms(elapsed_ms);
+  summary->record_other_time_ms(other_time_ms);
+  for (int i = 0; i < _aux_num; ++i)
+    if (_cur_aux_times_set[i])
+      _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
+
+  // Reset marks-between-pauses counter.
+  _n_marks_since_last_pause = 0;
+
+  // Update the efficiency-since-mark vars.
+  double proc_ms = elapsed_ms * (double) _parallel_gc_threads;
+  if (elapsed_ms < MIN_TIMER_GRANULARITY) {
+    // This usually happens due to the timer not having the required
+    // granularity. Some Linuxes are the usual culprits.
+    // We'll just set it to something (arbitrarily) small.
+    proc_ms = 1.0;
+  }
+  double cur_efficiency = (double) freed_bytes / proc_ms;
+
+  bool new_in_marking_window = _in_marking_window;
+  bool new_in_marking_window_im = false;
+  if (_should_initiate_conc_mark) {
+    new_in_marking_window = true;
+    new_in_marking_window_im = true;
+  }
+
+  if (in_young_gc_mode()) {
+    if (_last_full_young_gc) {
+      set_full_young_gcs(false);
+      _last_full_young_gc = false;
+    }
+
+    if ( !_last_young_gc_full ) {
+      if ( _should_revert_to_full_young_gcs ||
+           _known_garbage_ratio < 0.05 ||
+           (adaptive_young_list_length() &&
+           (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) ) {
+        set_full_young_gcs(true);
+      }
+    }
+    _should_revert_to_full_young_gcs = false;
+
+    if (_last_young_gc_full && !_during_marking)
+      _young_gc_eff_seq->add(cur_efficiency);
+  }
+
+  _short_lived_surv_rate_group->start_adding_regions();
+  // do that for any other surv rate groupsx
+
+  // <NEW PREDICTION>
+
+  if (!popular && !abandoned) {
+    double pause_time_ms = elapsed_ms;
+
+    size_t diff = 0;
+    if (_max_pending_cards >= _pending_cards)
+      diff = _max_pending_cards - _pending_cards;
+    _pending_card_diff_seq->add((double) diff);
+
+    double cost_per_card_ms = 0.0;
+    if (_pending_cards > 0) {
+      cost_per_card_ms = update_rs_time / (double) _pending_cards;
+      _cost_per_card_ms_seq->add(cost_per_card_ms);
+    }
+
+    double cost_per_scan_only_region_ms = 0.0;
+    if (scan_only_regions_scanned > 0.0) {
+      cost_per_scan_only_region_ms =
+        scan_only_time / scan_only_regions_scanned;
+      if (_in_marking_window_im)
+        _cost_per_scan_only_region_ms_during_cm_seq->add(cost_per_scan_only_region_ms);
+      else
+        _cost_per_scan_only_region_ms_seq->add(cost_per_scan_only_region_ms);
+    }
+
+    size_t cards_scanned = _g1->cards_scanned();
+
+    double cost_per_entry_ms = 0.0;
+    if (cards_scanned > 10) {
+      cost_per_entry_ms = scan_rs_time / (double) cards_scanned;
+      if (_last_young_gc_full)
+        _cost_per_entry_ms_seq->add(cost_per_entry_ms);
+      else
+        _partially_young_cost_per_entry_ms_seq->add(cost_per_entry_ms);
+    }
+
+    if (_max_rs_lengths > 0) {
+      double cards_per_entry_ratio =
+        (double) cards_scanned / (double) _max_rs_lengths;
+      if (_last_young_gc_full)
+        _fully_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+      else
+        _partially_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+    }
+
+    size_t rs_length_diff = _max_rs_lengths - _recorded_rs_lengths;
+    if (rs_length_diff >= 0)
+      _rs_length_diff_seq->add((double) rs_length_diff);
+
+    size_t copied_bytes = surviving_bytes;
+    double cost_per_byte_ms = 0.0;
+    if (copied_bytes > 0) {
+      cost_per_byte_ms = obj_copy_time / (double) copied_bytes;
+      if (_in_marking_window)
+        _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
+      else
+        _cost_per_byte_ms_seq->add(cost_per_byte_ms);
+    }
+
+    double all_other_time_ms = pause_time_ms -
+      (update_rs_time + scan_only_time + scan_rs_time + obj_copy_time +
+       _mark_closure_time_ms + termination_time);
+
+    double young_other_time_ms = 0.0;
+    if (_recorded_young_regions > 0) {
+      young_other_time_ms =
+        _recorded_young_cset_choice_time_ms +
+        _recorded_young_free_cset_time_ms;
+      _young_other_cost_per_region_ms_seq->add(young_other_time_ms /
+                                             (double) _recorded_young_regions);
+    }
+    double non_young_other_time_ms = 0.0;
+    if (_recorded_non_young_regions > 0) {
+      non_young_other_time_ms =
+        _recorded_non_young_cset_choice_time_ms +
+        _recorded_non_young_free_cset_time_ms;
+
+      _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms /
+                                         (double) _recorded_non_young_regions);
+    }
+
+    double constant_other_time_ms = all_other_time_ms -
+      (young_other_time_ms + non_young_other_time_ms);
+    _constant_other_time_ms_seq->add(constant_other_time_ms);
+
+    double survival_ratio = 0.0;
+    if (_bytes_in_collection_set_before_gc > 0) {
+      survival_ratio = (double) bytes_in_to_space_during_gc() /
+        (double) _bytes_in_collection_set_before_gc;
+    }
+
+    _pending_cards_seq->add((double) _pending_cards);
+    _scanned_cards_seq->add((double) cards_scanned);
+    _rs_lengths_seq->add((double) _max_rs_lengths);
+
+    double expensive_region_limit_ms =
+      (double) G1MaxPauseTimeMS - predict_constant_other_time_ms();
+    if (expensive_region_limit_ms < 0.0) {
+      // this means that the other time was predicted to be longer than
+      // than the max pause time
+      expensive_region_limit_ms = (double) G1MaxPauseTimeMS;
+    }
+    _expensive_region_limit_ms = expensive_region_limit_ms;
+
+    if (PREDICTIONS_VERBOSE) {
+      gclog_or_tty->print_cr("");
+      gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d "
+                    "REGIONS %d %d %d %d "
+                    "PENDING_CARDS %d %d "
+                    "CARDS_SCANNED %d %d "
+                    "RS_LENGTHS %d %d "
+                    "SCAN_ONLY_SCAN %1.6lf %1.6lf "
+                    "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf "
+                    "SURVIVAL_RATIO %1.6lf %1.6lf "
+                    "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf "
+                    "OTHER_YOUNG %1.6lf %1.6lf "
+                    "OTHER_NON_YOUNG %1.6lf %1.6lf "
+                    "VTIME_DIFF %1.6lf TERMINATION %1.6lf "
+                    "ELAPSED %1.6lf %1.6lf ",
+                    _cur_collection_start_sec,
+                    (!_last_young_gc_full) ? 2 :
+                    (last_pause_included_initial_mark) ? 1 : 0,
+                    _recorded_region_num,
+                    _recorded_young_regions,
+                    _recorded_scan_only_regions,
+                    _recorded_non_young_regions,
+                    _predicted_pending_cards, _pending_cards,
+                    _predicted_cards_scanned, cards_scanned,
+                    _predicted_rs_lengths, _max_rs_lengths,
+                    _predicted_scan_only_scan_time_ms, scan_only_time,
+                    _predicted_rs_update_time_ms, update_rs_time,
+                    _predicted_rs_scan_time_ms, scan_rs_time,
+                    _predicted_survival_ratio, survival_ratio,
+                    _predicted_object_copy_time_ms, obj_copy_time,
+                    _predicted_constant_other_time_ms, constant_other_time_ms,
+                    _predicted_young_other_time_ms, young_other_time_ms,
+                    _predicted_non_young_other_time_ms,
+                    non_young_other_time_ms,
+                    _vtime_diff_ms, termination_time,
+                    _predicted_pause_time_ms, elapsed_ms);
+    }
+
+    if (G1PolicyVerbose > 0) {
+      gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms",
+                    _predicted_pause_time_ms,
+                    (_within_target) ? "within" : "outside",
+                    elapsed_ms);
+    }
+
+  }
+
+  _in_marking_window = new_in_marking_window;
+  _in_marking_window_im = new_in_marking_window_im;
+  _free_regions_at_end_of_collection = _g1->free_regions();
+  _scan_only_regions_at_end_of_collection = _g1->young_list_length();
+  calculate_young_list_min_length();
+  calculate_young_list_target_config();
+
+  // </NEW PREDICTION>
+
+  _target_pause_time_ms = -1.0;
+
+  // TODO: calculate tenuring threshold
+  _tenuring_threshold = MaxTenuringThreshold;
+}
+
+// <NEW PREDICTION>
+
+double
+G1CollectorPolicy::
+predict_young_collection_elapsed_time_ms(size_t adjustment) {
+  guarantee( adjustment == 0 || adjustment == 1, "invariant" );
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  size_t young_num = g1h->young_list_length();
+  if (young_num == 0)
+    return 0.0;
+
+  young_num += adjustment;
+  size_t pending_cards = predict_pending_cards();
+  size_t rs_lengths = g1h->young_list_sampled_rs_lengths() +
+                      predict_rs_length_diff();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_lengths);
+  else
+    card_num = predict_non_young_card_num(rs_lengths);
+  size_t young_byte_size = young_num * HeapRegion::GrainBytes;
+  double accum_yg_surv_rate =
+    _short_lived_surv_rate_group->accum_surv_rate(adjustment);
+
+  size_t bytes_to_copy =
+    (size_t) (accum_yg_surv_rate * (double) HeapRegion::GrainBytes);
+
+  return
+    predict_rs_update_time_ms(pending_cards) +
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy) +
+    predict_young_other_time_ms(young_num) +
+    predict_constant_other_time_ms();
+}
+
+double
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) {
+  size_t rs_length = predict_rs_length_diff();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_length);
+  else
+    card_num = predict_non_young_card_num(rs_length);
+  return predict_base_elapsed_time_ms(pending_cards, card_num);
+}
+
+double
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
+                                                size_t scanned_cards) {
+  return
+    predict_rs_update_time_ms(pending_cards) +
+    predict_rs_scan_time_ms(scanned_cards) +
+    predict_constant_other_time_ms();
+}
+
+double
+G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
+                                                  bool young) {
+  size_t rs_length = hr->rem_set()->occupied();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_length);
+  else
+    card_num = predict_non_young_card_num(rs_length);
+  size_t bytes_to_copy = predict_bytes_to_copy(hr);
+
+  double region_elapsed_time_ms =
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy);
+
+  if (young)
+    region_elapsed_time_ms += predict_young_other_time_ms(1);
+  else
+    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
+
+  return region_elapsed_time_ms;
+}
+
+size_t
+G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) {
+  size_t bytes_to_copy;
+  if (hr->is_marked())
+    bytes_to_copy = hr->max_live_bytes();
+  else {
+    guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
+               "invariant" );
+    int age = hr->age_in_surv_rate_group();
+    double yg_surv_rate = predict_yg_surv_rate(age);
+    bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
+  }
+
+  return bytes_to_copy;
+}
+
+void
+G1CollectorPolicy::start_recording_regions() {
+  _recorded_rs_lengths            = 0;
+  _recorded_scan_only_regions     = 0;
+  _recorded_young_regions         = 0;
+  _recorded_non_young_regions     = 0;
+
+#if PREDICTIONS_VERBOSE
+  _predicted_rs_lengths           = 0;
+  _predicted_cards_scanned        = 0;
+
+  _recorded_marked_bytes          = 0;
+  _recorded_young_bytes           = 0;
+  _predicted_bytes_to_copy        = 0;
+#endif // PREDICTIONS_VERBOSE
+}
+
+void
+G1CollectorPolicy::record_cset_region(HeapRegion* hr, bool young) {
+  if (young) {
+    ++_recorded_young_regions;
+  } else {
+    ++_recorded_non_young_regions;
+  }
+#if PREDICTIONS_VERBOSE
+  if (young) {
+    _recorded_young_bytes += hr->asSpace()->used();
+  } else {
+    _recorded_marked_bytes += hr->max_live_bytes();
+  }
+  _predicted_bytes_to_copy += predict_bytes_to_copy(hr);
+#endif // PREDICTIONS_VERBOSE
+
+  size_t rs_length = hr->rem_set()->occupied();
+  _recorded_rs_lengths += rs_length;
+}
+
+void
+G1CollectorPolicy::record_scan_only_regions(size_t scan_only_length) {
+  _recorded_scan_only_regions = scan_only_length;
+}
+
+void
+G1CollectorPolicy::end_recording_regions() {
+#if PREDICTIONS_VERBOSE
+  _predicted_pending_cards = predict_pending_cards();
+  _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff();
+  if (full_young_gcs())
+    _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths);
+  else
+    _predicted_cards_scanned +=
+      predict_non_young_card_num(_predicted_rs_lengths);
+  _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;
+
+  _predicted_young_survival_ratio = 0.0;
+  for (int i = 0; i < _recorded_young_regions; ++i)
+    _predicted_young_survival_ratio += predict_yg_surv_rate(i);
+  _predicted_young_survival_ratio /= (double) _recorded_young_regions;
+
+  _predicted_scan_only_scan_time_ms =
+    predict_scan_only_time_ms(_recorded_scan_only_regions);
+  _predicted_rs_update_time_ms =
+    predict_rs_update_time_ms(_g1->pending_card_num());
+  _predicted_rs_scan_time_ms =
+    predict_rs_scan_time_ms(_predicted_cards_scanned);
+  _predicted_object_copy_time_ms =
+    predict_object_copy_time_ms(_predicted_bytes_to_copy);
+  _predicted_constant_other_time_ms =
+    predict_constant_other_time_ms();
+  _predicted_young_other_time_ms =
+    predict_young_other_time_ms(_recorded_young_regions);
+  _predicted_non_young_other_time_ms =
+    predict_non_young_other_time_ms(_recorded_non_young_regions);
+
+  _predicted_pause_time_ms =
+    _predicted_scan_only_scan_time_ms +
+    _predicted_rs_update_time_ms +
+    _predicted_rs_scan_time_ms +
+    _predicted_object_copy_time_ms +
+    _predicted_constant_other_time_ms +
+    _predicted_young_other_time_ms +
+    _predicted_non_young_other_time_ms;
+#endif // PREDICTIONS_VERBOSE
+}
+
+void G1CollectorPolicy::check_if_region_is_too_expensive(double
+                                                           predicted_time_ms) {
+  // I don't think we need to do this when in young GC mode since
+  // marking will be initiated next time we hit the soft limit anyway...
+  if (predicted_time_ms > _expensive_region_limit_ms) {
+    if (!in_young_gc_mode()) {
+        set_full_young_gcs(true);
+      _should_initiate_conc_mark = true;
+    } else
+      // no point in doing another partial one
+      _should_revert_to_full_young_gcs = true;
+  }
+}
+
+// </NEW PREDICTION>
+
+
+void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
+                                               double elapsed_ms) {
+  _recent_gc_times_ms->add(elapsed_ms);
+  _recent_prev_end_times_for_all_gcs_sec->add(end_time_sec);
+  _prev_collection_pause_end_ms = end_time_sec * 1000.0;
+}
+
+double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
+  if (_recent_pause_times_ms->num() == 0) return (double) G1MaxPauseTimeMS;
+  else return _recent_pause_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() {
+  if (_recent_CH_strong_roots_times_ms->num() == 0)
+    return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_CH_strong_roots_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() {
+  if (_recent_G1_strong_roots_times_ms->num() == 0)
+    return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_G1_strong_roots_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_evac_ms() {
+  if (_recent_evac_times_ms->num() == 0) return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_evac_times_ms->avg();
+}
+
+int G1CollectorPolicy::number_of_recent_gcs() {
+  assert(_recent_CH_strong_roots_times_ms->num() ==
+         _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync");
+  assert(_recent_G1_strong_roots_times_ms->num() ==
+         _recent_evac_times_ms->num(), "Sequence out of sync");
+  assert(_recent_evac_times_ms->num() ==
+         _recent_pause_times_ms->num(), "Sequence out of sync");
+  assert(_recent_pause_times_ms->num() ==
+         _recent_CS_bytes_used_before->num(), "Sequence out of sync");
+  assert(_recent_CS_bytes_used_before->num() ==
+         _recent_CS_bytes_surviving->num(), "Sequence out of sync");
+  return _recent_pause_times_ms->num();
+}
+
+double G1CollectorPolicy::recent_avg_survival_fraction() {
+  return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving,
+                                           _recent_CS_bytes_used_before);
+}
+
+double G1CollectorPolicy::last_survival_fraction() {
+  return last_survival_fraction_work(_recent_CS_bytes_surviving,
+                                     _recent_CS_bytes_used_before);
+}
+
+double
+G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving,
+                                                     TruncatedSeq* before) {
+  assert(surviving->num() == before->num(), "Sequence out of sync");
+  if (before->sum() > 0.0) {
+      double recent_survival_rate = surviving->sum() / before->sum();
+      // We exempt parallel collection from this check because Alloc Buffer
+      // fragmentation can produce negative collections.
+      // Further, we're now always doing parallel collection.  But I'm still
+      // leaving this here as a placeholder for a more precise assertion later.
+      // (DLD, 10/05.)
+      assert((true || ParallelGCThreads > 0) ||
+             _g1->evacuation_failed() ||
+             recent_survival_rate <= 1.0, "Or bad frac");
+      return recent_survival_rate;
+  } else {
+    return 1.0; // Be conservative.
+  }
+}
+
+double
+G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving,
+                                               TruncatedSeq* before) {
+  assert(surviving->num() == before->num(), "Sequence out of sync");
+  if (surviving->num() > 0 && before->last() > 0.0) {
+    double last_survival_rate = surviving->last() / before->last();
+    // We exempt parallel collection from this check because Alloc Buffer
+    // fragmentation can produce negative collections.
+    // Further, we're now always doing parallel collection.  But I'm still
+    // leaving this here as a placeholder for a more precise assertion later.
+    // (DLD, 10/05.)
+    assert((true || ParallelGCThreads > 0) ||
+           last_survival_rate <= 1.0, "Or bad frac");
+    return last_survival_rate;
+  } else {
+    return 1.0;
+  }
+}
+
+static const int survival_min_obs = 5;
+static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 };
+static const double min_survival_rate = 0.1;
+
+double
+G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg,
+                                                           double latest) {
+  double res = avg;
+  if (number_of_recent_gcs() < survival_min_obs) {
+    res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]);
+  }
+  res = MAX2(res, latest);
+  res = MAX2(res, min_survival_rate);
+  // In the parallel case, LAB fragmentation can produce "negative
+  // collections"; so can evac failure.  Cap at 1.0
+  res = MIN2(res, 1.0);
+  return res;
+}
+
+size_t G1CollectorPolicy::expansion_amount() {
+  if ((int)(recent_avg_pause_time_ratio() * 100.0) > G1GCPct) {
+    // We will double the existing space, or take G1ExpandByPctOfAvail % of
+    // the available expansion space, whichever is smaller, bounded below
+    // by a minimum expansion (unless that's all that's left.)
+    const size_t min_expand_bytes = 1*M;
+    size_t reserved_bytes = _g1->g1_reserved_obj_bytes();
+    size_t committed_bytes = _g1->capacity();
+    size_t uncommitted_bytes = reserved_bytes - committed_bytes;
+    size_t expand_bytes;
+    size_t expand_bytes_via_pct =
+      uncommitted_bytes * G1ExpandByPctOfAvail / 100;
+    expand_bytes = MIN2(expand_bytes_via_pct, committed_bytes);
+    expand_bytes = MAX2(expand_bytes, min_expand_bytes);
+    expand_bytes = MIN2(expand_bytes, uncommitted_bytes);
+    if (G1PolicyVerbose > 1) {
+      gclog_or_tty->print("Decided to expand: ratio = %5.2f, "
+                 "committed = %d%s, uncommited = %d%s, via pct = %d%s.\n"
+                 "                   Answer = %d.\n",
+                 recent_avg_pause_time_ratio(),
+                 byte_size_in_proper_unit(committed_bytes),
+                 proper_unit_for_byte_size(committed_bytes),
+                 byte_size_in_proper_unit(uncommitted_bytes),
+                 proper_unit_for_byte_size(uncommitted_bytes),
+                 byte_size_in_proper_unit(expand_bytes_via_pct),
+                 proper_unit_for_byte_size(expand_bytes_via_pct),
+                 byte_size_in_proper_unit(expand_bytes),
+                 proper_unit_for_byte_size(expand_bytes));
+    }
+    return expand_bytes;
+  } else {
+    return 0;
+  }
+}
+
+void G1CollectorPolicy::note_start_of_mark_thread() {
+  _mark_thread_startup_sec = os::elapsedTime();
+}
+
+class CountCSClosure: public HeapRegionClosure {
+  G1CollectorPolicy* _g1_policy;
+public:
+  CountCSClosure(G1CollectorPolicy* g1_policy) :
+    _g1_policy(g1_policy) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _g1_policy->_bytes_in_collection_set_before_gc += r->used();
+    return false;
+  }
+};
+
+void G1CollectorPolicy::count_CS_bytes_used() {
+  CountCSClosure cs_closure(this);
+  _g1->collection_set_iterate(&cs_closure);
+}
+
+static void print_indent(int level) {
+  for (int j = 0; j < level+1; ++j)
+    gclog_or_tty->print("   ");
+}
+
+void G1CollectorPolicy::print_summary (int level,
+                                       const char* str,
+                                       NumberSeq* seq) const {
+  double sum = seq->sum();
+  print_indent(level);
+  gclog_or_tty->print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)",
+                str, sum / 1000.0, seq->avg());
+}
+
+void G1CollectorPolicy::print_summary_sd (int level,
+                                          const char* str,
+                                          NumberSeq* seq) const {
+  print_summary(level, str, seq);
+  print_indent(level + 5);
+  gclog_or_tty->print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
+                seq->num(), seq->sd(), seq->maximum());
+}
+
+void G1CollectorPolicy::check_other_times(int level,
+                                        NumberSeq* other_times_ms,
+                                        NumberSeq* calc_other_times_ms) const {
+  bool should_print = false;
+
+  double max_sum = MAX2(fabs(other_times_ms->sum()),
+                        fabs(calc_other_times_ms->sum()));
+  double min_sum = MIN2(fabs(other_times_ms->sum()),
+                        fabs(calc_other_times_ms->sum()));
+  double sum_ratio = max_sum / min_sum;
+  if (sum_ratio > 1.1) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER SUM DOESN'T MATCH RECORDED ###");
+  }
+
+  double max_avg = MAX2(fabs(other_times_ms->avg()),
+                        fabs(calc_other_times_ms->avg()));
+  double min_avg = MIN2(fabs(other_times_ms->avg()),
+                        fabs(calc_other_times_ms->avg()));
+  double avg_ratio = max_avg / min_avg;
+  if (avg_ratio > 1.1) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER AVG DOESN'T MATCH RECORDED ###");
+  }
+
+  if (other_times_ms->sum() < -0.01) {
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## RECORDED OTHER SUM IS NEGATIVE ###");
+  }
+
+  if (other_times_ms->avg() < -0.01) {
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## RECORDED OTHER AVG IS NEGATIVE ###");
+  }
+
+  if (calc_other_times_ms->sum() < -0.01) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER SUM IS NEGATIVE ###");
+  }
+
+  if (calc_other_times_ms->avg() < -0.01) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER AVG IS NEGATIVE ###");
+  }
+
+  if (should_print)
+    print_summary(level, "Other(Calc)", calc_other_times_ms);
+}
+
+void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
+  bool parallel = ParallelGCThreads > 0;
+  MainBodySummary*    body_summary = summary->main_body_summary();
+  PopPreambleSummary* preamble_summary = summary->pop_preamble_summary();
+
+  if (summary->get_total_seq()->num() > 0) {
+    print_summary_sd(0,
+                     (preamble_summary == NULL) ? "Non-Popular Pauses" :
+                     "Popular Pauses",
+                     summary->get_total_seq());
+    if (preamble_summary != NULL) {
+      print_summary(1, "Popularity Preamble",
+                    preamble_summary->get_pop_preamble_seq());
+      print_summary(2, "Update RS", preamble_summary->get_pop_update_rs_seq());
+      print_summary(2, "Scan RS", preamble_summary->get_pop_scan_rs_seq());
+      print_summary(2, "Closure App",
+                    preamble_summary->get_pop_closure_app_seq());
+      print_summary(2, "Evacuation",
+                    preamble_summary->get_pop_evacuation_seq());
+      print_summary(2, "Other", preamble_summary->get_pop_other_seq());
+      {
+        NumberSeq* other_parts[] = {
+          preamble_summary->get_pop_update_rs_seq(),
+          preamble_summary->get_pop_scan_rs_seq(),
+          preamble_summary->get_pop_closure_app_seq(),
+          preamble_summary->get_pop_evacuation_seq()
+        };
+        NumberSeq calc_other_times_ms(preamble_summary->get_pop_preamble_seq(),
+                                      4, other_parts);
+        check_other_times(2, preamble_summary->get_pop_other_seq(),
+                          &calc_other_times_ms);
+      }
+    }
+    if (body_summary != NULL) {
+      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
+      if (parallel) {
+        print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
+        print_summary(2, "Update RS", body_summary->get_update_rs_seq());
+        print_summary(2, "Ext Root Scanning",
+                      body_summary->get_ext_root_scan_seq());
+        print_summary(2, "Mark Stack Scanning",
+                      body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "Scan-Only Scanning",
+                      body_summary->get_scan_only_seq());
+        print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
+        print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
+        print_summary(2, "Termination", body_summary->get_termination_seq());
+        print_summary(2, "Other", body_summary->get_parallel_other_seq());
+        {
+          NumberSeq* other_parts[] = {
+            body_summary->get_update_rs_seq(),
+            body_summary->get_ext_root_scan_seq(),
+            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_scan_only_seq(),
+            body_summary->get_scan_rs_seq(),
+            body_summary->get_obj_copy_seq(),
+            body_summary->get_termination_seq()
+          };
+          NumberSeq calc_other_times_ms(body_summary->get_parallel_seq(),
+                                        7, other_parts);
+          check_other_times(2, body_summary->get_parallel_other_seq(),
+                            &calc_other_times_ms);
+        }
+        print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq());
+        print_summary(1, "Clear CT", body_summary->get_clear_ct_seq());
+      } else {
+        print_summary(1, "Update RS", body_summary->get_update_rs_seq());
+        print_summary(1, "Ext Root Scanning",
+                      body_summary->get_ext_root_scan_seq());
+        print_summary(1, "Mark Stack Scanning",
+                      body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "Scan-Only Scanning",
+                      body_summary->get_scan_only_seq());
+        print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
+        print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
+      }
+    }
+    print_summary(1, "Other", summary->get_other_seq());
+    {
+      NumberSeq calc_other_times_ms;
+      if (body_summary != NULL) {
+        // not abandoned
+        if (parallel) {
+          // parallel
+          NumberSeq* other_parts[] = {
+            body_summary->get_satb_drain_seq(),
+            (preamble_summary == NULL) ? NULL :
+              preamble_summary->get_pop_preamble_seq(),
+            body_summary->get_parallel_seq(),
+            body_summary->get_clear_ct_seq()
+          };
+          calc_other_times_ms = NumberSeq (summary->get_total_seq(),
+                                          4, other_parts);
+        } else {
+          // serial
+          NumberSeq* other_parts[] = {
+            body_summary->get_satb_drain_seq(),
+            (preamble_summary == NULL) ? NULL :
+              preamble_summary->get_pop_preamble_seq(),
+            body_summary->get_update_rs_seq(),
+            body_summary->get_ext_root_scan_seq(),
+            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_scan_only_seq(),
+            body_summary->get_scan_rs_seq(),
+            body_summary->get_obj_copy_seq()
+          };
+          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                          8, other_parts);
+        }
+      } else {
+        // abandoned
+        NumberSeq* other_parts[] = {
+          (preamble_summary == NULL) ? NULL :
+            preamble_summary->get_pop_preamble_seq()
+        };
+        calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                        1, other_parts);
+      }
+      check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
+    }
+  } else {
+    print_indent(0);
+    gclog_or_tty->print_cr("none");
+  }
+  gclog_or_tty->print_cr("");
+}
+
+void
+G1CollectorPolicy::print_abandoned_summary(PauseSummary* non_pop_summary,
+                                           PauseSummary* pop_summary) const {
+  bool printed = false;
+  if (non_pop_summary->get_total_seq()->num() > 0) {
+    printed = true;
+    print_summary(non_pop_summary);
+  }
+  if (pop_summary->get_total_seq()->num() > 0) {
+    printed = true;
+    print_summary(pop_summary);
+  }
+
+  if (!printed) {
+    print_indent(0);
+    gclog_or_tty->print_cr("none");
+    gclog_or_tty->print_cr("");
+  }
+}
+
+void G1CollectorPolicy::print_tracing_info() const {
+  if (TraceGen0Time) {
+    gclog_or_tty->print_cr("ALL PAUSES");
+    print_summary_sd(0, "Total", _all_pause_times_ms);
+    gclog_or_tty->print_cr("");
+    gclog_or_tty->print_cr("");
+    gclog_or_tty->print_cr("   Full Young GC Pauses:    %8d", _full_young_pause_num);
+    gclog_or_tty->print_cr("   Partial Young GC Pauses: %8d", _partial_young_pause_num);
+    gclog_or_tty->print_cr("");
+
+    gclog_or_tty->print_cr("NON-POPULAR PAUSES");
+    print_summary(_non_pop_summary);
+
+    gclog_or_tty->print_cr("POPULAR PAUSES");
+    print_summary(_pop_summary);
+
+    gclog_or_tty->print_cr("ABANDONED PAUSES");
+    print_abandoned_summary(_non_pop_abandoned_summary,
+                            _pop_abandoned_summary);
+
+    gclog_or_tty->print_cr("MISC");
+    print_summary_sd(0, "Stop World", _all_stop_world_times_ms);
+    print_summary_sd(0, "Yields", _all_yield_times_ms);
+    for (int i = 0; i < _aux_num; ++i) {
+      if (_all_aux_times_ms[i].num() > 0) {
+        char buffer[96];
+        sprintf(buffer, "Aux%d", i);
+        print_summary_sd(0, buffer, &_all_aux_times_ms[i]);
+      }
+    }
+
+    size_t all_region_num = _region_num_young + _region_num_tenured;
+    gclog_or_tty->print_cr("   New Regions %8d, Young %8d (%6.2lf%%), "
+               "Tenured %8d (%6.2lf%%)",
+               all_region_num,
+               _region_num_young,
+               (double) _region_num_young / (double) all_region_num * 100.0,
+               _region_num_tenured,
+               (double) _region_num_tenured / (double) all_region_num * 100.0);
+
+    if (!G1RSBarrierUseQueue) {
+      gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) "
+                    "did zero traversals.",
+                    _conc_refine_enabled, _conc_refine_zero_traversals,
+                    _conc_refine_enabled > 0 ?
+                    100.0 * (float)_conc_refine_zero_traversals/
+                    (float)_conc_refine_enabled : 0.0);
+      gclog_or_tty->print_cr("  Max # of traversals = %d.",
+                    _conc_refine_max_traversals);
+      gclog_or_tty->print_cr("");
+    }
+  }
+  if (TraceGen1Time) {
+    if (_all_full_gc_times_ms->num() > 0) {
+      gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s",
+                 _all_full_gc_times_ms->num(),
+                 _all_full_gc_times_ms->sum() / 1000.0);
+      gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times_ms->avg());
+      gclog_or_tty->print_cr("                     [std. dev = %8.2f ms, max = %8.2f ms]",
+                    _all_full_gc_times_ms->sd(),
+                    _all_full_gc_times_ms->maximum());
+    }
+  }
+}
+
+void G1CollectorPolicy::print_yg_surv_rate_info() const {
+#ifndef PRODUCT
+  _short_lived_surv_rate_group->print_surv_rate_summary();
+  // add this call for any other surv rate groups
+#endif // PRODUCT
+}
+
+void G1CollectorPolicy::update_conc_refine_data() {
+  unsigned traversals = _g1->concurrent_g1_refine()->disable();
+  if (traversals == 0) _conc_refine_zero_traversals++;
+  _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals,
+                                     (size_t)traversals);
+
+  if (G1PolicyVerbose > 1)
+    gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals);
+  double multiplier = 1.0;
+  if (traversals == 0) {
+    multiplier = 4.0;
+  } else if (traversals > (size_t)G1ConcRefineTargTraversals) {
+    multiplier = 1.0/1.5;
+  } else if (traversals < (size_t)G1ConcRefineTargTraversals) {
+    multiplier = 1.5;
+  }
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("  Multiplier = %7.2f.", multiplier);
+    gclog_or_tty->print("  Delta went from %d regions to ",
+               _conc_refine_current_delta);
+  }
+  _conc_refine_current_delta =
+    MIN2(_g1->n_regions(),
+         (size_t)(_conc_refine_current_delta * multiplier));
+  _conc_refine_current_delta =
+    MAX2(_conc_refine_current_delta, (size_t)1);
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta);
+  }
+  _conc_refine_enabled++;
+}
+
+void G1CollectorPolicy::set_single_region_collection_set(HeapRegion* hr) {
+  assert(collection_set() == NULL, "Must be no current CS.");
+  _collection_set_size = 0;
+  _collection_set_bytes_used_before = 0;
+  add_to_collection_set(hr);
+  count_CS_bytes_used();
+}
+
+bool
+G1CollectorPolicy::should_add_next_region_to_young_list() {
+  assert(in_young_gc_mode(), "should be in young GC mode");
+  bool ret;
+  size_t young_list_length = _g1->young_list_length();
+
+  if (young_list_length < _young_list_target_length) {
+    ret = true;
+    ++_region_num_young;
+  } else {
+    ret = false;
+    ++_region_num_tenured;
+  }
+
+  return ret;
+}
+
+#ifndef PRODUCT
+// for debugging, bit of a hack...
+static char*
+region_num_to_mbs(int length) {
+  static char buffer[64];
+  double bytes = (double) (length * HeapRegion::GrainBytes);
+  double mbs = bytes / (double) (1024 * 1024);
+  sprintf(buffer, "%7.2lfMB", mbs);
+  return buffer;
+}
+#endif // PRODUCT
+
+void
+G1CollectorPolicy::checkpoint_conc_overhead() {
+  double conc_overhead = 0.0;
+  if (G1AccountConcurrentOverhead)
+    conc_overhead = COTracker::totalPredConcOverhead();
+  _mmu_tracker->update_conc_overhead(conc_overhead);
+#if 0
+  gclog_or_tty->print(" CO %1.4lf TARGET %1.4lf",
+             conc_overhead, _mmu_tracker->max_gc_time());
+#endif
+}
+
+
+uint G1CollectorPolicy::max_regions(int purpose) {
+  switch (purpose) {
+    case GCAllocForSurvived:
+      return G1MaxSurvivorRegions;
+    case GCAllocForTenured:
+      return UINT_MAX;
+    default:
+      return UINT_MAX;
+  };
+}
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+set_single_region_collection_set(HeapRegion* hr) {
+  G1CollectorPolicy::set_single_region_collection_set(hr);
+  _collectionSetChooser->removeRegion(hr);
+}
+
+
+bool
+G1CollectorPolicy_BestRegionsFirst::should_do_collection_pause(size_t
+                                                               word_size) {
+  assert(_g1->regions_accounted_for(), "Region leakage!");
+  // Initiate a pause when we reach the steady-state "used" target.
+  size_t used_hard = (_g1->capacity() / 100) * G1SteadyStateUsed;
+  size_t used_soft =
+   MAX2((_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta),
+        used_hard/2);
+  size_t used = _g1->used();
+
+  double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+
+  size_t young_list_length = _g1->young_list_length();
+  bool reached_target_length = young_list_length >= _young_list_target_length;
+
+  if (in_young_gc_mode()) {
+    if (reached_target_length) {
+      assert( young_list_length > 0 && _g1->young_list_length() > 0,
+              "invariant" );
+      _target_pause_time_ms = max_pause_time_ms;
+      return true;
+    }
+  } else {
+    guarantee( false, "should not reach here" );
+  }
+
+  return false;
+}
+
+#ifndef PRODUCT
+class HRSortIndexIsOKClosure: public HeapRegionClosure {
+  CollectionSetChooser* _chooser;
+public:
+  HRSortIndexIsOKClosure(CollectionSetChooser* chooser) :
+    _chooser(chooser) {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      assert(_chooser->regionProperlyOrdered(r), "Ought to be.");
+    }
+    return false;
+  }
+};
+
+bool G1CollectorPolicy_BestRegionsFirst::assertMarkedBytesDataOK() {
+  HRSortIndexIsOKClosure cl(_collectionSetChooser);
+  _g1->heap_region_iterate(&cl);
+  return true;
+}
+#endif
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+record_collection_pause_start(double start_time_sec, size_t start_used) {
+  G1CollectorPolicy::record_collection_pause_start(start_time_sec, start_used);
+}
+
+class NextNonCSElemFinder: public HeapRegionClosure {
+  HeapRegion* _res;
+public:
+  NextNonCSElemFinder(): _res(NULL) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set()) {
+      _res = r;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  HeapRegion* res() { return _res; }
+};
+
+class KnownGarbageClosure: public HeapRegionClosure {
+  CollectionSetChooser* _hrSorted;
+
+public:
+  KnownGarbageClosure(CollectionSetChooser* hrSorted) :
+    _hrSorted(hrSorted)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    // We only include humongous regions in collection
+    // sets when concurrent mark shows that their contained object is
+    // unreachable.
+
+    // Do we have any marking information for this region?
+    if (r->is_marked()) {
+      // We don't include humongous regions in collection
+      // sets because we collect them immediately at the end of a marking
+      // cycle.  We also don't include young regions because we *must*
+      // include them in the next collection pause.
+      if (!r->isHumongous() && !r->is_young()) {
+        _hrSorted->addMarkedHeapRegion(r);
+      }
+    }
+    return false;
+  }
+};
+
+class ParKnownGarbageHRClosure: public HeapRegionClosure {
+  CollectionSetChooser* _hrSorted;
+  jint _marked_regions_added;
+  jint _chunk_size;
+  jint _cur_chunk_idx;
+  jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end)
+  int _worker;
+  int _invokes;
+
+  void get_new_chunk() {
+    _cur_chunk_idx = _hrSorted->getParMarkedHeapRegionChunk(_chunk_size);
+    _cur_chunk_end = _cur_chunk_idx + _chunk_size;
+  }
+  void add_region(HeapRegion* r) {
+    if (_cur_chunk_idx == _cur_chunk_end) {
+      get_new_chunk();
+    }
+    assert(_cur_chunk_idx < _cur_chunk_end, "postcondition");
+    _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r);
+    _marked_regions_added++;
+    _cur_chunk_idx++;
+  }
+
+public:
+  ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted,
+                           jint chunk_size,
+                           int worker) :
+    _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
+    _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0),
+    _invokes(0)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    // We only include humongous regions in collection
+    // sets when concurrent mark shows that their contained object is
+    // unreachable.
+    _invokes++;
+
+    // Do we have any marking information for this region?
+    if (r->is_marked()) {
+      // We don't include humongous regions in collection
+      // sets because we collect them immediately at the end of a marking
+      // cycle.
+      // We also do not include young regions in collection sets
+      if (!r->isHumongous() && !r->is_young()) {
+        add_region(r);
+      }
+    }
+    return false;
+  }
+  jint marked_regions_added() { return _marked_regions_added; }
+  int invokes() { return _invokes; }
+};
+
+class ParKnownGarbageTask: public AbstractGangTask {
+  CollectionSetChooser* _hrSorted;
+  jint _chunk_size;
+  G1CollectedHeap* _g1;
+public:
+  ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) :
+    AbstractGangTask("ParKnownGarbageTask"),
+    _hrSorted(hrSorted), _chunk_size(chunk_size),
+    _g1(G1CollectedHeap::heap())
+  {}
+
+  void work(int i) {
+    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i);
+    // Back to zero for the claim value.
+    _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i,
+                                         HeapRegion::InitialClaimValue);
+    jint regions_added = parKnownGarbageCl.marked_regions_added();
+    _hrSorted->incNumMarkedHeapRegions(regions_added);
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Thread %d called %d times, added %d regions to list.\n",
+                 i, parKnownGarbageCl.invokes(), regions_added);
+    }
+  }
+};
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                   size_t max_live_bytes) {
+  double start;
+  if (G1PrintParCleanupStats) start = os::elapsedTime();
+  record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes);
+
+  _collectionSetChooser->clearMarkedHeapRegions();
+  double clear_marked_end;
+  if (G1PrintParCleanupStats) {
+    clear_marked_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  clear marked regions + work1: %8.3f ms.",
+                  (clear_marked_end - start)*1000.0);
+  }
+  if (ParallelGCThreads > 0) {
+    const size_t OverpartitionFactor = 4;
+    const size_t MinChunkSize = 8;
+    const size_t ChunkSize =
+      MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
+           MinChunkSize);
+    _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(),
+                                                             ChunkSize);
+    ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser,
+                                            (int) ChunkSize);
+    _g1->workers()->run_task(&parKnownGarbageTask);
+
+    assert(_g1->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+           "sanity check");
+  } else {
+    KnownGarbageClosure knownGarbagecl(_collectionSetChooser);
+    _g1->heap_region_iterate(&knownGarbagecl);
+  }
+  double known_garbage_end;
+  if (G1PrintParCleanupStats) {
+    known_garbage_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  compute known garbage: %8.3f ms.",
+                  (known_garbage_end - clear_marked_end)*1000.0);
+  }
+  _collectionSetChooser->sortMarkedHeapRegions();
+  double sort_end;
+  if (G1PrintParCleanupStats) {
+    sort_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  sorting: %8.3f ms.",
+                  (sort_end - known_garbage_end)*1000.0);
+  }
+
+  record_concurrent_mark_cleanup_end_work2();
+  double work2_end;
+  if (G1PrintParCleanupStats) {
+    work2_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  work2: %8.3f ms.",
+                  (work2_end - sort_end)*1000.0);
+  }
+}
+
+// Add the heap region to the collection set and return the conservative
+// estimate of the number of live bytes.
+void G1CollectorPolicy::
+add_to_collection_set(HeapRegion* hr) {
+  if (G1TraceRegions) {
+    gclog_or_tty->print_cr("added region to cset %d:["PTR_FORMAT", "PTR_FORMAT"], "
+                  "top "PTR_FORMAT", young %s",
+                  hr->hrs_index(), hr->bottom(), hr->end(),
+                  hr->top(), (hr->is_young()) ? "YES" : "NO");
+  }
+
+  if (_g1->mark_in_progress())
+    _g1->concurrent_mark()->registerCSetRegion(hr);
+
+  assert(!hr->in_collection_set(),
+              "should not already be in the CSet");
+  hr->set_in_collection_set(true);
+  hr->set_next_in_collection_set(_collection_set);
+  _collection_set = hr;
+  _collection_set_size++;
+  _collection_set_bytes_used_before += hr->used();
+}
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+choose_collection_set(HeapRegion* pop_region) {
+  double non_young_start_time_sec;
+  start_recording_regions();
+
+  if (pop_region != NULL) {
+    _target_pause_time_ms = (double) G1MaxPauseTimeMS;
+  } else {
+    guarantee(_target_pause_time_ms > -1.0,
+              "_target_pause_time_ms should have been set!");
+  }
+
+  // pop region is either null (and so is CS), or else it *is* the CS.
+  assert(_collection_set == pop_region, "Precondition");
+
+  double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
+  double predicted_pause_time_ms = base_time_ms;
+
+  double target_time_ms = _target_pause_time_ms;
+  double time_remaining_ms = target_time_ms - base_time_ms;
+
+  // the 10% and 50% values are arbitrary...
+  if (time_remaining_ms < 0.10*target_time_ms) {
+    time_remaining_ms = 0.50 * target_time_ms;
+    _within_target = false;
+  } else {
+    _within_target = true;
+  }
+
+  // We figure out the number of bytes available for future to-space.
+  // For new regions without marking information, we must assume the
+  // worst-case of complete survival.  If we have marking information for a
+  // region, we can bound the amount of live data.  We can add a number of
+  // such regions, as long as the sum of the live data bounds does not
+  // exceed the available evacuation space.
+  size_t max_live_bytes = _g1->free_regions() * HeapRegion::GrainBytes;
+
+  size_t expansion_bytes =
+    _g1->expansion_regions() * HeapRegion::GrainBytes;
+
+  if (pop_region == NULL) {
+    _collection_set_bytes_used_before = 0;
+    _collection_set_size = 0;
+  }
+
+  // Adjust for expansion and slop.
+  max_live_bytes = max_live_bytes + expansion_bytes;
+
+  assert(pop_region != NULL || _g1->regions_accounted_for(), "Region leakage!");
+
+  HeapRegion* hr;
+  if (in_young_gc_mode()) {
+    double young_start_time_sec = os::elapsedTime();
+
+    if (G1PolicyVerbose > 0) {
+      gclog_or_tty->print_cr("Adding %d young regions to the CSet",
+                    _g1->young_list_length());
+    }
+    _young_cset_length  = 0;
+    _last_young_gc_full = full_young_gcs() ? true : false;
+    if (_last_young_gc_full)
+      ++_full_young_pause_num;
+    else
+      ++_partial_young_pause_num;
+    hr = _g1->pop_region_from_young_list();
+    while (hr != NULL) {
+
+      assert( hr->young_index_in_cset() == -1, "invariant" );
+      assert( hr->age_in_surv_rate_group() != -1, "invariant" );
+      hr->set_young_index_in_cset((int) _young_cset_length);
+
+      ++_young_cset_length;
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, true);
+      time_remaining_ms -= predicted_time_ms;
+      predicted_pause_time_ms += predicted_time_ms;
+      if (hr == pop_region) {
+        // The popular region was young.  Skip over it.
+        assert(hr->in_collection_set(), "It's the pop region.");
+      } else {
+        assert(!hr->in_collection_set(), "It's not the pop region.");
+        add_to_collection_set(hr);
+        record_cset_region(hr, true);
+      }
+      max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes);
+      if (G1PolicyVerbose > 0) {
+        gclog_or_tty->print_cr("  Added [" PTR_FORMAT ", " PTR_FORMAT") to CS.",
+                      hr->bottom(), hr->end());
+        gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
+                      max_live_bytes/K);
+      }
+      hr = _g1->pop_region_from_young_list();
+    }
+
+    record_scan_only_regions(_g1->young_list_scan_only_length());
+
+    double young_end_time_sec = os::elapsedTime();
+    _recorded_young_cset_choice_time_ms =
+      (young_end_time_sec - young_start_time_sec) * 1000.0;
+
+    non_young_start_time_sec = os::elapsedTime();
+
+    if (_young_cset_length > 0 && _last_young_gc_full) {
+      // don't bother adding more regions...
+      goto choose_collection_set_end;
+    }
+  } else if (pop_region != NULL) {
+    // We're not in young mode, and we chose a popular region; don't choose
+    // any more.
+    return;
+  }
+
+  if (!in_young_gc_mode() || !full_young_gcs()) {
+    bool should_continue = true;
+    NumberSeq seq;
+    double avg_prediction = 100000000000000000.0; // something very large
+    do {
+      hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
+                                                      avg_prediction);
+      if (hr != NULL && !hr->popular()) {
+        double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+        time_remaining_ms -= predicted_time_ms;
+        predicted_pause_time_ms += predicted_time_ms;
+        add_to_collection_set(hr);
+        record_cset_region(hr, false);
+        max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes);
+        if (G1PolicyVerbose > 0) {
+          gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
+                        max_live_bytes/K);
+        }
+        seq.add(predicted_time_ms);
+        avg_prediction = seq.avg() + seq.sd();
+      }
+      should_continue =
+        ( hr != NULL) &&
+        ( (adaptive_young_list_length()) ? time_remaining_ms > 0.0
+          : _collection_set_size < _young_list_fixed_length );
+    } while (should_continue);
+
+    if (!adaptive_young_list_length() &&
+        _collection_set_size < _young_list_fixed_length)
+      _should_revert_to_full_young_gcs  = true;
+  }
+
+choose_collection_set_end:
+  count_CS_bytes_used();
+
+  end_recording_regions();
+
+  double non_young_end_time_sec = os::elapsedTime();
+  _recorded_non_young_cset_choice_time_ms =
+    (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
+}
+
+void G1CollectorPolicy_BestRegionsFirst::record_full_collection_end() {
+  G1CollectorPolicy::record_full_collection_end();
+  _collectionSetChooser->updateAfterFullCollection();
+}
+
+void G1CollectorPolicy_BestRegionsFirst::
+expand_if_possible(size_t numRegions) {
+  size_t expansion_bytes = numRegions * HeapRegion::GrainBytes;
+  _g1->expand(expansion_bytes);
+}
+
+void G1CollectorPolicy_BestRegionsFirst::
+record_collection_pause_end(bool popular, bool abandoned) {
+  G1CollectorPolicy::record_collection_pause_end(popular, abandoned);
+  assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,1199 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A G1CollectorPolicy makes policy decisions that determine the
+// characteristics of the collector.  Examples include:
+//   * choice of collection set.
+//   * when to collect.
+
+class HeapRegion;
+class CollectionSetChooser;
+
+// Yes, this is a bit unpleasant... but it saves replicating the same thing
+// over and over again and introducing subtle problems through small typos and
+// cutting and pasting mistakes. The macros below introduces a number
+// sequnce into the following two classes and the methods that access it.
+
+#define define_num_seq(name)                                                  \
+private:                                                                      \
+  NumberSeq _all_##name##_times_ms;                                           \
+public:                                                                       \
+  void record_##name##_time_ms(double ms) {                                   \
+    _all_##name##_times_ms.add(ms);                                           \
+  }                                                                           \
+  NumberSeq* get_##name##_seq() {                                             \
+    return &_all_##name##_times_ms;                                           \
+  }
+
+class MainBodySummary;
+class PopPreambleSummary;
+
+class PauseSummary {
+  define_num_seq(total)
+    define_num_seq(other)
+
+public:
+  virtual MainBodySummary*    main_body_summary()    { return NULL; }
+  virtual PopPreambleSummary* pop_preamble_summary() { return NULL; }
+};
+
+class MainBodySummary {
+  define_num_seq(satb_drain) // optional
+  define_num_seq(parallel) // parallel only
+    define_num_seq(ext_root_scan)
+    define_num_seq(mark_stack_scan)
+    define_num_seq(scan_only)
+    define_num_seq(update_rs)
+    define_num_seq(scan_rs)
+    define_num_seq(scan_new_refs) // Only for temp use; added to
+                                  // in parallel case.
+    define_num_seq(obj_copy)
+    define_num_seq(termination) // parallel only
+    define_num_seq(parallel_other) // parallel only
+  define_num_seq(mark_closure)
+  define_num_seq(clear_ct)  // parallel only
+};
+
+class PopPreambleSummary {
+  define_num_seq(pop_preamble)
+    define_num_seq(pop_update_rs)
+    define_num_seq(pop_scan_rs)
+    define_num_seq(pop_closure_app)
+    define_num_seq(pop_evacuation)
+    define_num_seq(pop_other)
+};
+
+class NonPopSummary: public PauseSummary,
+                     public MainBodySummary {
+public:
+  virtual MainBodySummary*    main_body_summary()    { return this; }
+};
+
+class PopSummary: public PauseSummary,
+                  public MainBodySummary,
+                  public PopPreambleSummary {
+public:
+  virtual MainBodySummary*    main_body_summary()    { return this; }
+  virtual PopPreambleSummary* pop_preamble_summary() { return this; }
+};
+
+class NonPopAbandonedSummary: public PauseSummary {
+};
+
+class PopAbandonedSummary: public PauseSummary,
+                           public PopPreambleSummary {
+public:
+  virtual PopPreambleSummary* pop_preamble_summary() { return this; }
+};
+
+class G1CollectorPolicy: public CollectorPolicy {
+protected:
+  // The number of pauses during the execution.
+  long _n_pauses;
+
+  // either equal to the number of parallel threads, if ParallelGCThreads
+  // has been set, or 1 otherwise
+  int _parallel_gc_threads;
+
+  enum SomePrivateConstants {
+    NumPrevPausesForHeuristics = 10,
+    NumPrevGCsForHeuristics = 10,
+    NumAPIs = HeapRegion::MaxAge
+  };
+
+  G1MMUTracker* _mmu_tracker;
+
+  void initialize_flags();
+
+  void initialize_all() {
+    initialize_flags();
+    initialize_size_info();
+    initialize_perm_generation(PermGen::MarkSweepCompact);
+  }
+
+  virtual size_t default_init_heap_size() {
+    // Pick some reasonable default.
+    return 8*M;
+  }
+
+
+  double _cur_collection_start_sec;
+  size_t _cur_collection_pause_used_at_start_bytes;
+  size_t _cur_collection_pause_used_regions_at_start;
+  size_t _prev_collection_pause_used_at_end_bytes;
+  double _cur_collection_par_time_ms;
+  double _cur_satb_drain_time_ms;
+  double _cur_clear_ct_time_ms;
+  bool   _satb_drain_time_set;
+  double _cur_popular_preamble_start_ms;
+  double _cur_popular_preamble_time_ms;
+  double _cur_popular_compute_rc_time_ms;
+  double _cur_popular_evac_time_ms;
+
+  double _cur_CH_strong_roots_end_sec;
+  double _cur_CH_strong_roots_dur_ms;
+  double _cur_G1_strong_roots_end_sec;
+  double _cur_G1_strong_roots_dur_ms;
+
+  // Statistics for recent GC pauses.  See below for how indexed.
+  TruncatedSeq* _recent_CH_strong_roots_times_ms;
+  TruncatedSeq* _recent_G1_strong_roots_times_ms;
+  TruncatedSeq* _recent_evac_times_ms;
+  // These exclude marking times.
+  TruncatedSeq* _recent_pause_times_ms;
+  TruncatedSeq* _recent_gc_times_ms;
+
+  TruncatedSeq* _recent_CS_bytes_used_before;
+  TruncatedSeq* _recent_CS_bytes_surviving;
+
+  TruncatedSeq* _recent_rs_sizes;
+
+  TruncatedSeq* _concurrent_mark_init_times_ms;
+  TruncatedSeq* _concurrent_mark_remark_times_ms;
+  TruncatedSeq* _concurrent_mark_cleanup_times_ms;
+
+  NonPopSummary*           _non_pop_summary;
+  PopSummary*              _pop_summary;
+  NonPopAbandonedSummary*  _non_pop_abandoned_summary;
+  PopAbandonedSummary*     _pop_abandoned_summary;
+
+  NumberSeq* _all_pause_times_ms;
+  NumberSeq* _all_full_gc_times_ms;
+  double _stop_world_start;
+  NumberSeq* _all_stop_world_times_ms;
+  NumberSeq* _all_yield_times_ms;
+
+  size_t     _region_num_young;
+  size_t     _region_num_tenured;
+  size_t     _prev_region_num_young;
+  size_t     _prev_region_num_tenured;
+
+  NumberSeq* _all_mod_union_times_ms;
+
+  int        _aux_num;
+  NumberSeq* _all_aux_times_ms;
+  double*    _cur_aux_start_times_ms;
+  double*    _cur_aux_times_ms;
+  bool*      _cur_aux_times_set;
+
+  double* _par_last_ext_root_scan_times_ms;
+  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_scan_only_times_ms;
+  double* _par_last_scan_only_regions_scanned;
+  double* _par_last_update_rs_start_times_ms;
+  double* _par_last_update_rs_times_ms;
+  double* _par_last_update_rs_processed_buffers;
+  double* _par_last_scan_rs_start_times_ms;
+  double* _par_last_scan_rs_times_ms;
+  double* _par_last_scan_new_refs_times_ms;
+  double* _par_last_obj_copy_times_ms;
+  double* _par_last_termination_times_ms;
+
+  // there are two pases during popular pauses, so we need to store
+  // somewhere the results of the first pass
+  double* _pop_par_last_update_rs_start_times_ms;
+  double* _pop_par_last_update_rs_times_ms;
+  double* _pop_par_last_update_rs_processed_buffers;
+  double* _pop_par_last_scan_rs_start_times_ms;
+  double* _pop_par_last_scan_rs_times_ms;
+  double* _pop_par_last_closure_app_times_ms;
+
+  double _pop_compute_rc_start;
+  double _pop_evac_start;
+
+  // indicates that we are in young GC mode
+  bool _in_young_gc_mode;
+
+  // indicates whether we are in full young or partially young GC mode
+  bool _full_young_gcs;
+
+  // if true, then it tries to dynamically adjust the length of the
+  // young list
+  bool _adaptive_young_list_length;
+  size_t _young_list_min_length;
+  size_t _young_list_target_length;
+  size_t _young_list_so_prefix_length;
+  size_t _young_list_fixed_length;
+
+  size_t _young_cset_length;
+  bool   _last_young_gc_full;
+
+  double _target_pause_time_ms;
+
+  unsigned              _full_young_pause_num;
+  unsigned              _partial_young_pause_num;
+
+  bool                  _during_marking;
+  bool                  _in_marking_window;
+  bool                  _in_marking_window_im;
+
+  SurvRateGroup*        _short_lived_surv_rate_group;
+  SurvRateGroup*        _survivor_surv_rate_group;
+  // add here any more surv rate groups
+
+  bool during_marking() {
+    return _during_marking;
+  }
+
+  // <NEW PREDICTION>
+
+private:
+  enum PredictionConstants {
+    TruncatedSeqLength = 10
+  };
+
+  TruncatedSeq* _alloc_rate_ms_seq;
+  double        _prev_collection_pause_end_ms;
+
+  TruncatedSeq* _pending_card_diff_seq;
+  TruncatedSeq* _rs_length_diff_seq;
+  TruncatedSeq* _cost_per_card_ms_seq;
+  TruncatedSeq* _cost_per_scan_only_region_ms_seq;
+  TruncatedSeq* _fully_young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _partially_young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _cost_per_entry_ms_seq;
+  TruncatedSeq* _partially_young_cost_per_entry_ms_seq;
+  TruncatedSeq* _cost_per_byte_ms_seq;
+  TruncatedSeq* _constant_other_time_ms_seq;
+  TruncatedSeq* _young_other_cost_per_region_ms_seq;
+  TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
+
+  TruncatedSeq* _pending_cards_seq;
+  TruncatedSeq* _scanned_cards_seq;
+  TruncatedSeq* _rs_lengths_seq;
+
+  TruncatedSeq* _cost_per_byte_ms_during_cm_seq;
+  TruncatedSeq* _cost_per_scan_only_region_ms_during_cm_seq;
+
+  TruncatedSeq* _young_gc_eff_seq;
+
+  TruncatedSeq* _max_conc_overhead_seq;
+
+  size_t _recorded_young_regions;
+  size_t _recorded_scan_only_regions;
+  size_t _recorded_non_young_regions;
+  size_t _recorded_region_num;
+
+  size_t _free_regions_at_end_of_collection;
+  size_t _scan_only_regions_at_end_of_collection;
+
+  size_t _recorded_rs_lengths;
+  size_t _max_rs_lengths;
+
+  size_t _recorded_marked_bytes;
+  size_t _recorded_young_bytes;
+
+  size_t _predicted_pending_cards;
+  size_t _predicted_cards_scanned;
+  size_t _predicted_rs_lengths;
+  size_t _predicted_bytes_to_copy;
+
+  double _predicted_survival_ratio;
+  double _predicted_rs_update_time_ms;
+  double _predicted_rs_scan_time_ms;
+  double _predicted_scan_only_scan_time_ms;
+  double _predicted_object_copy_time_ms;
+  double _predicted_constant_other_time_ms;
+  double _predicted_young_other_time_ms;
+  double _predicted_non_young_other_time_ms;
+  double _predicted_pause_time_ms;
+
+  double _vtime_diff_ms;
+
+  double _recorded_young_free_cset_time_ms;
+  double _recorded_non_young_free_cset_time_ms;
+
+  double _sigma;
+  double _expensive_region_limit_ms;
+
+  size_t _rs_lengths_prediction;
+
+  size_t _known_garbage_bytes;
+  double _known_garbage_ratio;
+
+  double sigma() {
+    return _sigma;
+  }
+
+  // A function that prevents us putting too much stock in small sample
+  // sets.  Returns a number between 2.0 and 1.0, depending on the number
+  // of samples.  5 or more samples yields one; fewer scales linearly from
+  // 2.0 at 1 sample to 1.0 at 5.
+  double confidence_factor(int samples) {
+    if (samples > 4) return 1.0;
+    else return  1.0 + sigma() * ((double)(5 - samples))/2.0;
+  }
+
+  double get_new_neg_prediction(TruncatedSeq* seq) {
+    return seq->davg() - sigma() * seq->dsd();
+  }
+
+#ifndef PRODUCT
+  bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group);
+#endif // PRODUCT
+
+protected:
+  double _pause_time_target_ms;
+  double _recorded_young_cset_choice_time_ms;
+  double _recorded_non_young_cset_choice_time_ms;
+  bool   _within_target;
+  size_t _pending_cards;
+  size_t _max_pending_cards;
+
+public:
+
+  void set_region_short_lived(HeapRegion* hr) {
+    hr->install_surv_rate_group(_short_lived_surv_rate_group);
+  }
+
+  void set_region_survivors(HeapRegion* hr) {
+    hr->install_surv_rate_group(_survivor_surv_rate_group);
+  }
+
+#ifndef PRODUCT
+  bool verify_young_ages();
+#endif // PRODUCT
+
+  void tag_scan_only(size_t short_lived_scan_only_length);
+
+  double get_new_prediction(TruncatedSeq* seq) {
+    return MAX2(seq->davg() + sigma() * seq->dsd(),
+                seq->davg() * confidence_factor(seq->num()));
+  }
+
+  size_t young_cset_length() {
+    return _young_cset_length;
+  }
+
+  void record_max_rs_lengths(size_t rs_lengths) {
+    _max_rs_lengths = rs_lengths;
+  }
+
+  size_t predict_pending_card_diff() {
+    double prediction = get_new_neg_prediction(_pending_card_diff_seq);
+    if (prediction < 0.00001)
+      return 0;
+    else
+      return (size_t) prediction;
+  }
+
+  size_t predict_pending_cards() {
+    size_t max_pending_card_num = _g1->max_pending_card_num();
+    size_t diff = predict_pending_card_diff();
+    size_t prediction;
+    if (diff > max_pending_card_num)
+      prediction = max_pending_card_num;
+    else
+      prediction = max_pending_card_num - diff;
+
+    return prediction;
+  }
+
+  size_t predict_rs_length_diff() {
+    return (size_t) get_new_prediction(_rs_length_diff_seq);
+  }
+
+  double predict_alloc_rate_ms() {
+    return get_new_prediction(_alloc_rate_ms_seq);
+  }
+
+  double predict_cost_per_card_ms() {
+    return get_new_prediction(_cost_per_card_ms_seq);
+  }
+
+  double predict_rs_update_time_ms(size_t pending_cards) {
+    return (double) pending_cards * predict_cost_per_card_ms();
+  }
+
+  double predict_fully_young_cards_per_entry_ratio() {
+    return get_new_prediction(_fully_young_cards_per_entry_ratio_seq);
+  }
+
+  double predict_partially_young_cards_per_entry_ratio() {
+    if (_partially_young_cards_per_entry_ratio_seq->num() < 2)
+      return predict_fully_young_cards_per_entry_ratio();
+    else
+      return get_new_prediction(_partially_young_cards_per_entry_ratio_seq);
+  }
+
+  size_t predict_young_card_num(size_t rs_length) {
+    return (size_t) ((double) rs_length *
+                     predict_fully_young_cards_per_entry_ratio());
+  }
+
+  size_t predict_non_young_card_num(size_t rs_length) {
+    return (size_t) ((double) rs_length *
+                     predict_partially_young_cards_per_entry_ratio());
+  }
+
+  double predict_rs_scan_time_ms(size_t card_num) {
+    if (full_young_gcs())
+      return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
+    else
+      return predict_partially_young_rs_scan_time_ms(card_num);
+  }
+
+  double predict_partially_young_rs_scan_time_ms(size_t card_num) {
+    if (_partially_young_cost_per_entry_ms_seq->num() < 3)
+      return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
+    else
+      return (double) card_num *
+        get_new_prediction(_partially_young_cost_per_entry_ms_seq);
+  }
+
+  double predict_scan_only_time_ms_during_cm(size_t scan_only_region_num) {
+    if (_cost_per_scan_only_region_ms_during_cm_seq->num() < 3)
+      return 1.5 * (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_seq);
+    else
+      return (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_during_cm_seq);
+  }
+
+  double predict_scan_only_time_ms(size_t scan_only_region_num) {
+    if (_in_marking_window_im)
+      return predict_scan_only_time_ms_during_cm(scan_only_region_num);
+    else
+      return (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_seq);
+  }
+
+  double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) {
+    if (_cost_per_byte_ms_during_cm_seq->num() < 3)
+      return 1.1 * (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_seq);
+    else
+      return (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_during_cm_seq);
+  }
+
+  double predict_object_copy_time_ms(size_t bytes_to_copy) {
+    if (_in_marking_window && !_in_marking_window_im)
+      return predict_object_copy_time_ms_during_cm(bytes_to_copy);
+    else
+      return (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_seq);
+  }
+
+  double predict_constant_other_time_ms() {
+    return get_new_prediction(_constant_other_time_ms_seq);
+  }
+
+  double predict_young_other_time_ms(size_t young_num) {
+    return
+      (double) young_num *
+      get_new_prediction(_young_other_cost_per_region_ms_seq);
+  }
+
+  double predict_non_young_other_time_ms(size_t non_young_num) {
+    return
+      (double) non_young_num *
+      get_new_prediction(_non_young_other_cost_per_region_ms_seq);
+  }
+
+  void check_if_region_is_too_expensive(double predicted_time_ms);
+
+  double predict_young_collection_elapsed_time_ms(size_t adjustment);
+  double predict_base_elapsed_time_ms(size_t pending_cards);
+  double predict_base_elapsed_time_ms(size_t pending_cards,
+                                      size_t scanned_cards);
+  size_t predict_bytes_to_copy(HeapRegion* hr);
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+
+  // for use by: calculate_optimal_so_length(length)
+  void predict_gc_eff(size_t young_region_num,
+                      size_t so_length,
+                      double base_time_ms,
+                      double *gc_eff,
+                      double *pause_time_ms);
+
+  // for use by: calculate_young_list_target_config(rs_length)
+  bool predict_gc_eff(size_t young_region_num,
+                      size_t so_length,
+                      double base_time_with_so_ms,
+                      size_t init_free_regions,
+                      double target_pause_time_ms,
+                      double* gc_eff);
+
+  void start_recording_regions();
+  void record_cset_region(HeapRegion* hr, bool young);
+  void record_scan_only_regions(size_t scan_only_length);
+  void end_recording_regions();
+
+  void record_vtime_diff_ms(double vtime_diff_ms) {
+    _vtime_diff_ms = vtime_diff_ms;
+  }
+
+  void record_young_free_cset_time_ms(double time_ms) {
+    _recorded_young_free_cset_time_ms = time_ms;
+  }
+
+  void record_non_young_free_cset_time_ms(double time_ms) {
+    _recorded_non_young_free_cset_time_ms = time_ms;
+  }
+
+  double predict_young_gc_eff() {
+    return get_new_neg_prediction(_young_gc_eff_seq);
+  }
+
+  // </NEW PREDICTION>
+
+public:
+  void cset_regions_freed() {
+    bool propagate = _last_young_gc_full && !_in_marking_window;
+    _short_lived_surv_rate_group->all_surviving_words_recorded(propagate);
+    _survivor_surv_rate_group->all_surviving_words_recorded(propagate);
+    // also call it on any more surv rate groups
+  }
+
+  void set_known_garbage_bytes(size_t known_garbage_bytes) {
+    _known_garbage_bytes = known_garbage_bytes;
+    size_t heap_bytes = _g1->capacity();
+    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
+  }
+
+  void decrease_known_garbage_bytes(size_t known_garbage_bytes) {
+    guarantee( _known_garbage_bytes >= known_garbage_bytes, "invariant" );
+
+    _known_garbage_bytes -= known_garbage_bytes;
+    size_t heap_bytes = _g1->capacity();
+    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
+  }
+
+  G1MMUTracker* mmu_tracker() {
+    return _mmu_tracker;
+  }
+
+  double predict_init_time_ms() {
+    return get_new_prediction(_concurrent_mark_init_times_ms);
+  }
+
+  double predict_remark_time_ms() {
+    return get_new_prediction(_concurrent_mark_remark_times_ms);
+  }
+
+  double predict_cleanup_time_ms() {
+    return get_new_prediction(_concurrent_mark_cleanup_times_ms);
+  }
+
+  // Returns an estimate of the survival rate of the region at yg-age
+  // "yg_age".
+  double predict_yg_surv_rate(int age) {
+    TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age);
+    if (seq->num() == 0)
+      gclog_or_tty->print("BARF! age is %d", age);
+    guarantee( seq->num() > 0, "invariant" );
+    double pred = get_new_prediction(seq);
+    if (pred > 1.0)
+      pred = 1.0;
+    return pred;
+  }
+
+  double accum_yg_surv_rate_pred(int age) {
+    return _short_lived_surv_rate_group->accum_surv_rate_pred(age);
+  }
+
+protected:
+  void print_stats (int level, const char* str, double value);
+  void print_stats (int level, const char* str, int value);
+  void print_par_stats (int level, const char* str, double* data) {
+    print_par_stats(level, str, data, true);
+  }
+  void print_par_stats (int level, const char* str, double* data, bool summary);
+  void print_par_buffers (int level, const char* str, double* data, bool summary);
+
+  void check_other_times(int level,
+                         NumberSeq* other_times_ms,
+                         NumberSeq* calc_other_times_ms) const;
+
+  void print_summary (PauseSummary* stats) const;
+  void print_abandoned_summary(PauseSummary* non_pop_summary,
+                               PauseSummary* pop_summary) const;
+
+  void print_summary (int level, const char* str, NumberSeq* seq) const;
+  void print_summary_sd (int level, const char* str, NumberSeq* seq) const;
+
+  double avg_value (double* data);
+  double max_value (double* data);
+  double sum_of_values (double* data);
+  double max_sum (double* data1, double* data2);
+
+  int _last_satb_drain_processed_buffers;
+  int _last_update_rs_processed_buffers;
+  double _last_pause_time_ms;
+
+  size_t _bytes_in_to_space_before_gc;
+  size_t _bytes_in_to_space_after_gc;
+  size_t bytes_in_to_space_during_gc() {
+    return
+      _bytes_in_to_space_after_gc - _bytes_in_to_space_before_gc;
+  }
+  size_t _bytes_in_collection_set_before_gc;
+  // Used to count used bytes in CS.
+  friend class CountCSClosure;
+
+  // Statistics kept per GC stoppage, pause or full.
+  TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec;
+
+  // We track markings.
+  int _num_markings;
+  double _mark_thread_startup_sec;       // Time at startup of marking thread
+
+  // Add a new GC of the given duration and end time to the record.
+  void update_recent_gc_times(double end_time_sec, double elapsed_ms);
+
+  // The head of the list (via "next_in_collection_set()") representing the
+  // current collection set.
+  HeapRegion* _collection_set;
+  size_t _collection_set_size;
+  size_t _collection_set_bytes_used_before;
+
+  // Info about marking.
+  int _n_marks; // Sticky at 2, so we know when we've done at least 2.
+
+  // The number of collection pauses at the end of the last mark.
+  size_t _n_pauses_at_mark_end;
+
+  // ==== This section is for stats related to starting Conc Refinement on time.
+  size_t _conc_refine_enabled;
+  size_t _conc_refine_zero_traversals;
+  size_t _conc_refine_max_traversals;
+  // In # of heap regions.
+  size_t _conc_refine_current_delta;
+
+  // At the beginning of a collection pause, update the variables above,
+  // especially the "delta".
+  void update_conc_refine_data();
+  // ====
+
+  // Stash a pointer to the g1 heap.
+  G1CollectedHeap* _g1;
+
+  // The average time in ms per collection pause, averaged over recent pauses.
+  double recent_avg_time_for_pauses_ms();
+
+  // The average time in ms for processing CollectedHeap strong roots, per
+  // collection pause, averaged over recent pauses.
+  double recent_avg_time_for_CH_strong_ms();
+
+  // The average time in ms for processing the G1 remembered set, per
+  // pause, averaged over recent pauses.
+  double recent_avg_time_for_G1_strong_ms();
+
+  // The average time in ms for "evacuating followers", per pause, averaged
+  // over recent pauses.
+  double recent_avg_time_for_evac_ms();
+
+  // The number of "recent" GCs recorded in the number sequences
+  int number_of_recent_gcs();
+
+  // The average survival ratio, computed by the total number of bytes
+  // suriviving / total number of bytes before collection over the last
+  // several recent pauses.
+  double recent_avg_survival_fraction();
+  // The survival fraction of the most recent pause; if there have been no
+  // pauses, returns 1.0.
+  double last_survival_fraction();
+
+  // Returns a "conservative" estimate of the recent survival rate, i.e.,
+  // one that may be higher than "recent_avg_survival_fraction".
+  // This is conservative in several ways:
+  //   If there have been few pauses, it will assume a potential high
+  //     variance, and err on the side of caution.
+  //   It puts a lower bound (currently 0.1) on the value it will return.
+  //   To try to detect phase changes, if the most recent pause ("latest") has a
+  //     higher-than average ("avg") survival rate, it returns that rate.
+  // "work" version is a utility function; young is restricted to young regions.
+  double conservative_avg_survival_fraction_work(double avg,
+                                                 double latest);
+
+  // The arguments are the two sequences that keep track of the number of bytes
+  //   surviving and the total number of bytes before collection, resp.,
+  //   over the last evereal recent pauses
+  // Returns the survival rate for the category in the most recent pause.
+  // If there have been no pauses, returns 1.0.
+  double last_survival_fraction_work(TruncatedSeq* surviving,
+                                     TruncatedSeq* before);
+
+  // The arguments are the two sequences that keep track of the number of bytes
+  //   surviving and the total number of bytes before collection, resp.,
+  //   over the last several recent pauses
+  // Returns the average survival ration over the last several recent pauses
+  // If there have been no pauses, return 1.0
+  double recent_avg_survival_fraction_work(TruncatedSeq* surviving,
+                                           TruncatedSeq* before);
+
+  double conservative_avg_survival_fraction() {
+    double avg = recent_avg_survival_fraction();
+    double latest = last_survival_fraction();
+    return conservative_avg_survival_fraction_work(avg, latest);
+  }
+
+  // The ratio of gc time to elapsed time, computed over recent pauses.
+  double _recent_avg_pause_time_ratio;
+
+  double recent_avg_pause_time_ratio() {
+    return _recent_avg_pause_time_ratio;
+  }
+
+  // Number of pauses between concurrent marking.
+  size_t _pauses_btwn_concurrent_mark;
+
+  size_t _n_marks_since_last_pause;
+
+  // True iff CM has been initiated.
+  bool _conc_mark_initiated;
+
+  // True iff CM should be initiated
+  bool _should_initiate_conc_mark;
+  bool _should_revert_to_full_young_gcs;
+  bool _last_full_young_gc;
+
+  // This set of variables tracks the collector efficiency, in order to
+  // determine whether we should initiate a new marking.
+  double _cur_mark_stop_world_time_ms;
+  double _mark_init_start_sec;
+  double _mark_remark_start_sec;
+  double _mark_cleanup_start_sec;
+  double _mark_closure_time_ms;
+
+  void   calculate_young_list_min_length();
+  void   calculate_young_list_target_config();
+  void   calculate_young_list_target_config(size_t rs_lengths);
+  size_t calculate_optimal_so_length(size_t young_list_length);
+
+public:
+
+  G1CollectorPolicy();
+
+  virtual G1CollectorPolicy* as_g1_policy() { return this; }
+
+  virtual CollectorPolicy::Name kind() {
+    return CollectorPolicy::G1CollectorPolicyKind;
+  }
+
+  void check_prediction_validity();
+
+  size_t bytes_in_collection_set() {
+    return _bytes_in_collection_set_before_gc;
+  }
+
+  size_t bytes_in_to_space() {
+    return bytes_in_to_space_during_gc();
+  }
+
+  unsigned calc_gc_alloc_time_stamp() {
+    return _all_pause_times_ms->num() + 1;
+  }
+
+protected:
+
+  // Count the number of bytes used in the CS.
+  void count_CS_bytes_used();
+
+  // Together these do the base cleanup-recording work.  Subclasses might
+  // want to put something between them.
+  void record_concurrent_mark_cleanup_end_work1(size_t freed_bytes,
+                                                size_t max_live_bytes);
+  void record_concurrent_mark_cleanup_end_work2();
+
+public:
+
+  virtual void init();
+
+  virtual HeapWord* mem_allocate_work(size_t size,
+                                      bool is_tlab,
+                                      bool* gc_overhead_limit_was_exceeded);
+
+  // This method controls how a collector handles one or more
+  // of its generations being fully allocated.
+  virtual HeapWord* satisfy_failed_allocation(size_t size,
+                                              bool is_tlab);
+
+  BarrierSet::Name barrier_set_name() { return BarrierSet::G1SATBCTLogging; }
+
+  GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
+
+  // The number of collection pauses so far.
+  long n_pauses() const { return _n_pauses; }
+
+  // Update the heuristic info to record a collection pause of the given
+  // start time, where the given number of bytes were used at the start.
+  // This may involve changing the desired size of a collection set.
+
+  virtual void record_stop_world_start();
+
+  virtual void record_collection_pause_start(double start_time_sec,
+                                             size_t start_used);
+
+  virtual void record_popular_pause_preamble_start();
+  virtual void record_popular_pause_preamble_end();
+
+  // Must currently be called while the world is stopped.
+  virtual void record_concurrent_mark_init_start();
+  virtual void record_concurrent_mark_init_end();
+  void record_concurrent_mark_init_end_pre(double
+                                           mark_init_elapsed_time_ms);
+
+  void record_mark_closure_time(double mark_closure_time_ms);
+
+  virtual void record_concurrent_mark_remark_start();
+  virtual void record_concurrent_mark_remark_end();
+
+  virtual void record_concurrent_mark_cleanup_start();
+  virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                  size_t max_live_bytes);
+  virtual void record_concurrent_mark_cleanup_completed();
+
+  virtual void record_concurrent_pause();
+  virtual void record_concurrent_pause_end();
+
+  virtual void record_collection_pause_end_CH_strong_roots();
+  virtual void record_collection_pause_end_G1_strong_roots();
+
+  virtual void record_collection_pause_end(bool popular, bool abandoned);
+
+  // Record the fact that a full collection occurred.
+  virtual void record_full_collection_start();
+  virtual void record_full_collection_end();
+
+  void record_ext_root_scan_time(int worker_i, double ms) {
+    _par_last_ext_root_scan_times_ms[worker_i] = ms;
+  }
+
+  void record_mark_stack_scan_time(int worker_i, double ms) {
+    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  }
+
+  void record_scan_only_time(int worker_i, double ms, int n) {
+    _par_last_scan_only_times_ms[worker_i] = ms;
+    _par_last_scan_only_regions_scanned[worker_i] = (double) n;
+  }
+
+  void record_satb_drain_time(double ms) {
+    _cur_satb_drain_time_ms = ms;
+    _satb_drain_time_set    = true;
+  }
+
+  void record_satb_drain_processed_buffers (int processed_buffers) {
+    _last_satb_drain_processed_buffers = processed_buffers;
+  }
+
+  void record_mod_union_time(double ms) {
+    _all_mod_union_times_ms->add(ms);
+  }
+
+  void record_update_rs_start_time(int thread, double ms) {
+    _par_last_update_rs_start_times_ms[thread] = ms;
+  }
+
+  void record_update_rs_time(int thread, double ms) {
+    _par_last_update_rs_times_ms[thread] = ms;
+  }
+
+  void record_update_rs_processed_buffers (int thread,
+                                           double processed_buffers) {
+    _par_last_update_rs_processed_buffers[thread] = processed_buffers;
+  }
+
+  void record_scan_rs_start_time(int thread, double ms) {
+    _par_last_scan_rs_start_times_ms[thread] = ms;
+  }
+
+  void record_scan_rs_time(int thread, double ms) {
+    _par_last_scan_rs_times_ms[thread] = ms;
+  }
+
+  void record_scan_new_refs_time(int thread, double ms) {
+    _par_last_scan_new_refs_times_ms[thread] = ms;
+  }
+
+  double get_scan_new_refs_time(int thread) {
+    return _par_last_scan_new_refs_times_ms[thread];
+  }
+
+  void reset_obj_copy_time(int thread) {
+    _par_last_obj_copy_times_ms[thread] = 0.0;
+  }
+
+  void reset_obj_copy_time() {
+    reset_obj_copy_time(0);
+  }
+
+  void record_obj_copy_time(int thread, double ms) {
+    _par_last_obj_copy_times_ms[thread] += ms;
+  }
+
+  void record_obj_copy_time(double ms) {
+    record_obj_copy_time(0, ms);
+  }
+
+  void record_termination_time(int thread, double ms) {
+    _par_last_termination_times_ms[thread] = ms;
+  }
+
+  void record_termination_time(double ms) {
+    record_termination_time(0, ms);
+  }
+
+  void record_pause_time(double ms) {
+    _last_pause_time_ms = ms;
+  }
+
+  void record_clear_ct_time(double ms) {
+    _cur_clear_ct_time_ms = ms;
+  }
+
+  void record_par_time(double ms) {
+    _cur_collection_par_time_ms = ms;
+  }
+
+  void record_aux_start_time(int i) {
+    guarantee(i < _aux_num, "should be within range");
+    _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;
+  }
+
+  void record_aux_end_time(int i) {
+    guarantee(i < _aux_num, "should be within range");
+    double ms = os::elapsedTime() * 1000.0 - _cur_aux_start_times_ms[i];
+    _cur_aux_times_set[i] = true;
+    _cur_aux_times_ms[i] += ms;
+  }
+
+  void record_pop_compute_rc_start();
+  void record_pop_compute_rc_end();
+
+  void record_pop_evac_start();
+  void record_pop_evac_end();
+
+  // Record the fact that "bytes" bytes allocated in a region.
+  void record_before_bytes(size_t bytes);
+  void record_after_bytes(size_t bytes);
+
+  // Returns "true" if this is a good time to do a collection pause.
+  // The "word_size" argument, if non-zero, indicates the size of an
+  // allocation request that is prompting this query.
+  virtual bool should_do_collection_pause(size_t word_size) = 0;
+
+  // Choose a new collection set.  Marks the chosen regions as being
+  // "in_collection_set", and links them together.  The head and number of
+  // the collection set are available via access methods.
+  // If "pop_region" is non-NULL, it is a popular region that has already
+  // been added to the collection set.
+  virtual void choose_collection_set(HeapRegion* pop_region = NULL) = 0;
+
+  void clear_collection_set() { _collection_set = NULL; }
+
+  // The head of the list (via "next_in_collection_set()") representing the
+  // current collection set.
+  HeapRegion* collection_set() { return _collection_set; }
+
+  // Sets the collection set to the given single region.
+  virtual void set_single_region_collection_set(HeapRegion* hr);
+
+  // The number of elements in the current collection set.
+  size_t collection_set_size() { return _collection_set_size; }
+
+  // Add "hr" to the CS.
+  void add_to_collection_set(HeapRegion* hr);
+
+  bool should_initiate_conc_mark()      { return _should_initiate_conc_mark; }
+  void set_should_initiate_conc_mark()  { _should_initiate_conc_mark = true; }
+  void unset_should_initiate_conc_mark(){ _should_initiate_conc_mark = false; }
+
+  void checkpoint_conc_overhead();
+
+  // If an expansion would be appropriate, because recent GC overhead had
+  // exceeded the desired limit, return an amount to expand by.
+  virtual size_t expansion_amount();
+
+  // note start of mark thread
+  void note_start_of_mark_thread();
+
+  // The marked bytes of the "r" has changed; reclassify it's desirability
+  // for marking.  Also asserts that "r" is eligible for a CS.
+  virtual void note_change_in_marked_bytes(HeapRegion* r) = 0;
+
+#ifndef PRODUCT
+  // Check any appropriate marked bytes info, asserting false if
+  // something's wrong, else returning "true".
+  virtual bool assertMarkedBytesDataOK() = 0;
+#endif
+
+  // Print tracing information.
+  void print_tracing_info() const;
+
+  // Print stats on young survival ratio
+  void print_yg_surv_rate_info() const;
+
+  void finished_recalculating_age_indexes() {
+    _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+    // do that for any other surv rate groups
+  }
+
+  bool should_add_next_region_to_young_list();
+
+  bool in_young_gc_mode() {
+    return _in_young_gc_mode;
+  }
+  void set_in_young_gc_mode(bool in_young_gc_mode) {
+    _in_young_gc_mode = in_young_gc_mode;
+  }
+
+  bool full_young_gcs() {
+    return _full_young_gcs;
+  }
+  void set_full_young_gcs(bool full_young_gcs) {
+    _full_young_gcs = full_young_gcs;
+  }
+
+  bool adaptive_young_list_length() {
+    return _adaptive_young_list_length;
+  }
+  void set_adaptive_young_list_length(bool adaptive_young_list_length) {
+    _adaptive_young_list_length = adaptive_young_list_length;
+  }
+
+  inline double get_gc_eff_factor() {
+    double ratio = _known_garbage_ratio;
+
+    double square = ratio * ratio;
+    // square = square * square;
+    double ret = square * 9.0 + 1.0;
+#if 0
+    gclog_or_tty->print_cr("ratio = %1.2lf, ret = %1.2lf", ratio, ret);
+#endif // 0
+    guarantee(0.0 <= ret && ret < 10.0, "invariant!");
+    return ret;
+  }
+
+  //
+  // Survivor regions policy.
+  //
+protected:
+
+  // Current tenuring threshold, set to 0 if the collector reaches the
+  // maximum amount of suvivors regions.
+  int _tenuring_threshold;
+
+public:
+
+  inline GCAllocPurpose
+    evacuation_destination(HeapRegion* src_region, int age, size_t word_sz) {
+      if (age < _tenuring_threshold && src_region->is_young()) {
+        return GCAllocForSurvived;
+      } else {
+        return GCAllocForTenured;
+      }
+  }
+
+  inline bool track_object_age(GCAllocPurpose purpose) {
+    return purpose == GCAllocForSurvived;
+  }
+
+  inline GCAllocPurpose alternative_purpose(int purpose) {
+    return GCAllocForTenured;
+  }
+
+  uint max_regions(int purpose);
+
+  // The limit on regions for a particular purpose is reached.
+  void note_alloc_region_limit_reached(int purpose) {
+    if (purpose == GCAllocForSurvived) {
+      _tenuring_threshold = 0;
+    }
+  }
+
+  void note_start_adding_survivor_regions() {
+    _survivor_surv_rate_group->start_adding_regions();
+  }
+
+  void note_stop_adding_survivor_regions() {
+    _survivor_surv_rate_group->stop_adding_regions();
+  }
+};
+
+// This encapsulates a particular strategy for a g1 Collector.
+//
+//      Start a concurrent mark when our heap size is n bytes
+//            greater then our heap size was at the last concurrent
+//            mark.  Where n is a function of the CMSTriggerRatio
+//            and the MinHeapFreeRatio.
+//
+//      Start a g1 collection pause when we have allocated the
+//            average number of bytes currently being freed in
+//            a collection, but only if it is at least one region
+//            full
+//
+//      Resize Heap based on desired
+//      allocation space, where desired allocation space is
+//      a function of survival rate and desired future to size.
+//
+//      Choose collection set by first picking all older regions
+//      which have a survival rate which beats our projected young
+//      survival rate.  Then fill out the number of needed regions
+//      with young regions.
+
+class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy {
+  CollectionSetChooser* _collectionSetChooser;
+  // If the estimated is less then desirable, resize if possible.
+  void expand_if_possible(size_t numRegions);
+
+  virtual void choose_collection_set(HeapRegion* pop_region = NULL);
+  virtual void record_collection_pause_start(double start_time_sec,
+                                             size_t start_used);
+  virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                  size_t max_live_bytes);
+  virtual void record_full_collection_end();
+
+public:
+  G1CollectorPolicy_BestRegionsFirst() {
+    _collectionSetChooser = new CollectionSetChooser();
+  }
+  void record_collection_pause_end(bool popular, bool abandoned);
+  bool should_do_collection_pause(size_t word_size);
+  virtual void set_single_region_collection_set(HeapRegion* hr);
+  // This is not needed any more, after the CSet choosing code was
+  // changed to use the pause prediction work. But let's leave the
+  // hook in just in case.
+  void note_change_in_marked_bytes(HeapRegion* r) { }
+#ifndef PRODUCT
+  bool assertMarkedBytesDataOK();
+#endif
+};
+
+// This should move to some place more general...
+
+// If we have "n" measurements, and we've kept track of their "sum" and the
+// "sum_of_squares" of the measurements, this returns the variance of the
+// sequence.
+inline double variance(int n, double sum_of_squares, double sum) {
+  double n_d = (double)n;
+  double avg = sum/n_d;
+  return (sum_of_squares - 2.0 * avg * sum + n_d * avg * avg) / n_d;
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MMUTracker.cpp.incl"
+
+#define _DISABLE_MMU                             0
+
+// can't rely on comparing doubles with tolerating a small margin for error
+#define SMALL_MARGIN 0.0000001
+#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN )
+#define is_double_leq(_val1, _val2) is_double_leq_0((_val1) - (_val2))
+#define is_double_geq(_val1, _val2) is_double_leq_0((_val2) - (_val1))
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+G1MMUTracker::G1MMUTracker(double time_slice, double max_gc_time) :
+  _time_slice(time_slice),
+  _max_gc_time(max_gc_time),
+  _conc_overhead_time_sec(0.0) { }
+
+void
+G1MMUTracker::update_conc_overhead(double conc_overhead) {
+  double conc_overhead_time_sec = _time_slice * conc_overhead;
+  if (conc_overhead_time_sec > 0.9 * _max_gc_time) {
+    // We are screwed, as we only seem to have <10% of the soft
+    // real-time goal available for pauses. Let's admit defeat and
+    // allow something more generous as a pause target.
+    conc_overhead_time_sec = 0.75 * _max_gc_time;
+  }
+
+  _conc_overhead_time_sec = conc_overhead_time_sec;
+}
+
+G1MMUTrackerQueue::G1MMUTrackerQueue(double time_slice, double max_gc_time) :
+  G1MMUTracker(time_slice, max_gc_time),
+  _head_index(0),
+  _tail_index(trim_index(_head_index+1)),
+  _no_entries(0) { }
+
+void G1MMUTrackerQueue::remove_expired_entries(double current_time) {
+  double limit = current_time - _time_slice;
+  while (_no_entries > 0) {
+    if (is_double_geq(limit, _array[_tail_index].end_time())) {
+      _tail_index = trim_index(_tail_index + 1);
+      --_no_entries;
+    } else
+      return;
+  }
+  guarantee(_no_entries == 0, "should have no entries in the array");
+}
+
+double G1MMUTrackerQueue::calculate_gc_time(double current_time) {
+  double gc_time = 0.0;
+  double limit = current_time - _time_slice;
+  for (int i = 0; i < _no_entries; ++i) {
+    int index = trim_index(_tail_index + i);
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        gc_time += elem->duration();
+      else
+        gc_time += elem->end_time() - limit;
+    }
+  }
+  return gc_time;
+}
+
+void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) {
+  double longest_allowed = longest_pause_internal(start);
+  if (longest_allowed < 0.0)
+    longest_allowed = 0.0;
+  double duration = end - start;
+
+  remove_expired_entries(end);
+  if (_no_entries == QueueLength) {
+    // OK, right now when we fill up we bomb out
+    // there are a few ways of dealing with this "gracefully"
+    //   increase the array size (:-)
+    //   remove the oldest entry (this might allow more GC time for
+    //     the time slice than what's allowed)
+    //   concolidate the two entries with the minimum gap between them
+    //     (this mighte allow less GC time than what's allowed)
+    guarantee(0, "array full, currently we can't recover");
+  }
+  _head_index = trim_index(_head_index + 1);
+  ++_no_entries;
+  _array[_head_index] = G1MMUTrackerQueueElem(start, end);
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::longest_pause(double current_time) {
+  if (_DISABLE_MMU)
+    return _max_gc_time;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return longest_pause_internal(current_time);
+}
+
+double G1MMUTrackerQueue::longest_pause_internal(double current_time) {
+  double target_time = _max_gc_time;
+
+  while( 1 ) {
+    double gc_time =
+      calculate_gc_time(current_time + target_time) + _conc_overhead_time_sec;
+    double diff = target_time + gc_time - _max_gc_time;
+    if (!is_double_leq_0(diff)) {
+      target_time -= diff;
+      if (is_double_leq_0(target_time)) {
+        target_time = -1.0;
+        break;
+      }
+    } else {
+      break;
+    }
+  }
+
+  return target_time;
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) {
+  if (_DISABLE_MMU)
+    return 0.0;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return when_internal(current_time, pause_time);
+}
+
+double G1MMUTrackerQueue::when_internal(double current_time,
+                                        double pause_time) {
+  // if the pause is over the maximum, just assume that it's the maximum
+  double adjusted_pause_time =
+    (pause_time > max_gc_time()) ? max_gc_time() : pause_time;
+  double earliest_end = current_time + adjusted_pause_time;
+  double limit = earliest_end - _time_slice;
+  double gc_time = calculate_gc_time(earliest_end);
+  double diff = gc_time + adjusted_pause_time - max_gc_time();
+  if (is_double_leq_0(diff))
+    return 0.0;
+
+  int index = _tail_index;
+  while ( 1 ) {
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        diff -= elem->duration();
+      else
+        diff -= elem->end_time() - limit;
+      if (is_double_leq_0(diff))
+        return  elem->end_time() + diff + _time_slice - adjusted_pause_time - current_time;
+    }
+    index = trim_index(index+1);
+    guarantee(index != trim_index(_head_index + 1), "should not go past head");
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Keeps track of the GC work and decides when it is OK to do GC work
+// and for how long so that the MMU invariants are maintained.
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+// this is the "interface"
+class G1MMUTracker {
+protected:
+  double          _time_slice;
+  double          _max_gc_time; // this is per time slice
+
+  double          _conc_overhead_time_sec;
+
+public:
+  G1MMUTracker(double time_slice, double max_gc_time);
+
+  void update_conc_overhead(double conc_overhead);
+
+  virtual void add_pause(double start, double end, bool gc_thread) = 0;
+  virtual double longest_pause(double current_time) = 0;
+  virtual double when_sec(double current_time, double pause_time) = 0;
+
+  double max_gc_time() {
+    return _max_gc_time - _conc_overhead_time_sec;
+  }
+
+  inline bool now_max_gc(double current_time) {
+    return when_sec(current_time, max_gc_time()) < 0.00001;
+  }
+
+  inline double when_max_gc_sec(double current_time) {
+    return when_sec(current_time, max_gc_time());
+  }
+
+  inline jlong when_max_gc_ms(double current_time) {
+    double when = when_max_gc_sec(current_time);
+    return (jlong) (when * 1000.0);
+  }
+
+  inline jlong when_ms(double current_time, double pause_time) {
+    double when = when_sec(current_time, pause_time);
+    return (jlong) (when * 1000.0);
+  }
+};
+
+class G1MMUTrackerQueueElem {
+private:
+  double _start_time;
+  double _end_time;
+
+public:
+  inline double start_time() { return _start_time; }
+  inline double end_time()   { return _end_time; }
+  inline double duration()   { return _end_time - _start_time; }
+
+  G1MMUTrackerQueueElem() {
+    _start_time = 0.0;
+    _end_time   = 0.0;
+  }
+
+  G1MMUTrackerQueueElem(double start_time, double end_time) {
+    _start_time = start_time;
+    _end_time   = end_time;
+  }
+};
+
+// this is an implementation of the MMUTracker using a (fixed-size) queue
+// that keeps track of all the recent pause times
+class G1MMUTrackerQueue: public G1MMUTracker {
+private:
+  enum PrivateConstants {
+    QueueLength = 64
+  };
+
+  // The array keeps track of all the pauses that fall within a time
+  // slice (the last time slice during which pauses took place).
+  // The data structure implemented is a circular queue.
+  // Head "points" to the most recent addition, tail to the oldest one.
+  // The array is of fixed size and I don't think we'll need more than
+  // two or three entries with the current behaviour of G1 pauses.
+  // If the array is full, an easy fix is to look for the pauses with
+  // the shortest gap between them and concolidate them.
+
+  G1MMUTrackerQueueElem _array[QueueLength];
+  int                   _head_index;
+  int                   _tail_index;
+  int                   _no_entries;
+
+  inline int trim_index(int index) {
+    return (index + QueueLength) % QueueLength;
+  }
+
+  void remove_expired_entries(double current_time);
+  double calculate_gc_time(double current_time);
+
+  double longest_pause_internal(double current_time);
+  double when_internal(double current_time, double pause_time);
+
+public:
+  G1MMUTrackerQueue(double time_slice, double max_gc_time);
+
+  virtual void add_pause(double start, double end, bool gc_thread);
+
+  virtual double longest_pause(double current_time);
+  virtual double when_sec(double current_time, double pause_time);
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,385 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MarkSweep.cpp.incl"
+
+class HeapRegion;
+
+void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
+                                      bool clear_all_softrefs) {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
+
+  // hook up weak ref data so it can be used during Mark-Sweep
+  assert(GenMarkSweep::ref_processor() == NULL, "no stomping");
+  GenMarkSweep::_ref_processor = rp;
+  assert(rp != NULL, "should be non-NULL");
+
+  // When collecting the permanent generation methodOops may be moving,
+  // so we either have to flush all bcp data or convert it into bci.
+  CodeCache::gc_prologue();
+  Threads::gc_prologue();
+
+  // Increment the invocation count for the permanent generation, since it is
+  // implicitly collected whenever we do a full mark sweep collection.
+  SharedHeap* sh = SharedHeap::heap();
+  sh->perm_gen()->stat_record()->invocations++;
+
+  bool marked_for_unloading = false;
+
+  allocate_stacks();
+
+  // We should save the marks of the currently locked biased monitors.
+  // The marking doesn't preserve the marks of biased objects.
+  BiasedLocking::preserve_marks();
+
+  mark_sweep_phase1(marked_for_unloading, clear_all_softrefs);
+
+  if (G1VerifyConcMark) {
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      g1h->checkConcurrentMark();
+  }
+
+  mark_sweep_phase2();
+
+  // Don't add any more derived pointers during phase3
+  COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
+
+  mark_sweep_phase3();
+
+  mark_sweep_phase4();
+
+  GenMarkSweep::restore_marks();
+  BiasedLocking::restore_marks();
+  GenMarkSweep::deallocate_stacks();
+
+  // We must invalidate the perm-gen rs, so that it gets rebuilt.
+  GenRemSet* rs = sh->rem_set();
+  rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/);
+
+  // "free at last gc" is calculated from these.
+  // CHF: cheating for now!!!
+  //  Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity());
+  //  Universe::set_heap_used_at_last_gc(Universe::heap()->used());
+
+  Threads::gc_epilogue();
+  CodeCache::gc_epilogue();
+
+  // refs processing: clean slate
+  GenMarkSweep::_ref_processor = NULL;
+}
+
+
+void G1MarkSweep::allocate_stacks() {
+  GenMarkSweep::_preserved_count_max = 0;
+  GenMarkSweep::_preserved_marks = NULL;
+  GenMarkSweep::_preserved_count = 0;
+  GenMarkSweep::_preserved_mark_stack = NULL;
+  GenMarkSweep::_preserved_oop_stack = NULL;
+
+  GenMarkSweep::_marking_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+
+  size_t size = SystemDictionary::number_of_classes() * 2;
+  GenMarkSweep::_revisit_klass_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<Klass*>((int)size, true);
+}
+
+void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
+                                    bool clear_all_softrefs) {
+  // Recursively traverse all live objects and mark them
+  EventMark m("1 mark object");
+  TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace(" 1");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_SystemClasses,
+                           &GenMarkSweep::follow_root_closure,
+                           &GenMarkSweep::follow_root_closure);
+
+  // Process reference objects found during marking
+  ReferencePolicy *soft_ref_policy;
+  if (clear_all_softrefs) {
+    soft_ref_policy = new AlwaysClearPolicy();
+  } else {
+#ifdef COMPILER2
+    soft_ref_policy = new LRUMaxHeapPolicy();
+#else
+    soft_ref_policy = new LRUCurrentHeapPolicy();
+#endif
+  }
+  assert(soft_ref_policy != NULL,"No soft reference policy");
+  GenMarkSweep::ref_processor()->process_discovered_references(
+                                   soft_ref_policy,
+                                   &GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   &GenMarkSweep::follow_stack_closure,
+                                   NULL);
+
+  // Follow system dictionary roots and unload classes
+  bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive);
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Follow code cache roots (has to be done after system dictionary,
+  // assumes all live klasses are marked)
+  CodeCache::do_unloading(&GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   purged_class);
+           GenMarkSweep::follow_stack();
+
+  // Update subklass/sibling/implementor links of live klasses
+  GenMarkSweep::follow_weak_klass_links();
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Visit symbol and interned string tables and delete unmarked oops
+  SymbolTable::unlink(&GenMarkSweep::is_alive);
+  StringTable::unlink(&GenMarkSweep::is_alive);
+
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+}
+
+class G1PrepareCompactClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mrbs;
+  CompactPoint _cp;
+  bool _popular_only;
+
+  void free_humongous_region(HeapRegion* hr) {
+    HeapWord* bot = hr->bottom();
+    HeapWord* end = hr->end();
+    assert(hr->startsHumongous(),
+           "Only the start of a humongous region should be freed.");
+    G1CollectedHeap::heap()->free_region(hr);
+    hr->prepare_for_compaction(&_cp);
+    // Also clear the part of the card table that will be unused after
+    // compaction.
+    _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+  }
+
+public:
+  G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) :
+    _cp(NULL, cs, cs->initialize_threshold()),
+    _mrbs(G1CollectedHeap::heap()->mr_bs()),
+    _popular_only(popular_only)
+  {}
+  bool doHeapRegion(HeapRegion* hr) {
+    if (_popular_only && !hr->popular())
+      return true; // terminate early
+    else if (!_popular_only && hr->popular())
+      return false; // skip this one.
+
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->forward_to(obj);
+        } else  {
+          free_humongous_region(hr);
+        }
+      } else {
+        assert(hr->continuesHumongous(), "Invalid humongous.");
+      }
+    } else {
+      hr->prepare_for_compaction(&_cp);
+      // Also clear the part of the card table that will be unused after
+      // compaction.
+      _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+    }
+    return false;
+  }
+};
+// Stolen verbatim from g1CollectedHeap.cpp
+class FindFirstRegionClosure: public HeapRegionClosure {
+  HeapRegion* _a_region;
+  bool _find_popular;
+public:
+  FindFirstRegionClosure(bool find_popular) :
+    _a_region(NULL), _find_popular(find_popular) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->popular() == _find_popular) {
+      _a_region = r;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  HeapRegion* result() { return _a_region; }
+};
+
+void G1MarkSweep::mark_sweep_phase2() {
+  // Now all live objects are marked, compute the new object addresses.
+
+  // It is imperative that we traverse perm_gen LAST. If dead space is
+  // allowed a range of dead object may get overwritten by a dead int
+  // array. If perm_gen is not traversed last a klassOop may get
+  // overwritten. This is fine since it is dead, but if the class has dead
+  // instances we have to skip them, and in order to find their size we
+  // need the klassOop!
+  //
+  // It is not required that we traverse spaces in the same order in
+  // phase2, phase3 and phase4, but the ValidateMarkSweep live oops
+  // tracking expects us to do so. See comment under phase4.
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("2 compute new addresses");
+  TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("2");
+
+  // First we compact the popular regions.
+  if (G1NumPopularRegions > 0) {
+    CompactibleSpace* sp = g1h->first_compactible_space();
+    FindFirstRegionClosure cl(true /*find_popular*/);
+    g1h->heap_region_iterate(&cl);
+    HeapRegion *r = cl.result();
+    assert(r->popular(), "should have found a popular region.");
+    assert(r == sp, "first popular heap region should "
+                    "== first compactible space");
+    G1PrepareCompactClosure blk(sp, true/*popular_only*/);
+    g1h->heap_region_iterate(&blk);
+  }
+
+  // Now we do the regular regions.
+  FindFirstRegionClosure cl(false /*find_popular*/);
+  g1h->heap_region_iterate(&cl);
+  HeapRegion *r = cl.result();
+  assert(!r->popular(), "should have founda non-popular region.");
+  CompactibleSpace* sp = r;
+  if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
+    sp = r->next_compaction_space();
+  }
+
+  G1PrepareCompactClosure blk(sp, false/*popular_only*/);
+  g1h->heap_region_iterate(&blk);
+
+  CompactPoint perm_cp(pg, NULL, NULL);
+  pg->prepare_for_compaction(&perm_cp);
+}
+
+class G1AdjustPointersClosure: public HeapRegionClosure {
+ public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->isHumongous()) {
+      if (r->startsHumongous()) {
+        // We must adjust the pointers on the single H object.
+        oop obj = oop(r->bottom());
+        debug_only(GenMarkSweep::track_interior_pointers(obj));
+        // point all the oops to the new location
+        obj->adjust_pointers();
+        debug_only(GenMarkSweep::check_interior_pointers());
+      }
+    } else {
+      // This really ought to be "as_CompactibleSpace"...
+      r->adjust_pointers();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase3() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  // Adjust the pointers to reflect the new locations
+  EventMark m("3 adjust pointers");
+  TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("3");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_AllClasses,
+                           &GenMarkSweep::adjust_root_pointer_closure,
+                           &GenMarkSweep::adjust_pointer_closure);
+
+  g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure);
+
+  // Now adjust pointers in remaining weak roots.  (All of which should
+  // have been cleared if they pointed to non-surviving objects.)
+  g1h->g1_process_weak_roots(&GenMarkSweep::adjust_root_pointer_closure,
+                             &GenMarkSweep::adjust_pointer_closure);
+
+  GenMarkSweep::adjust_marks();
+
+  G1AdjustPointersClosure blk;
+  g1h->heap_region_iterate(&blk);
+  pg->adjust_pointers();
+}
+
+class G1SpaceCompactClosure: public HeapRegionClosure {
+public:
+  G1SpaceCompactClosure() {}
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->init_mark();
+        } else {
+          assert(hr->is_empty(), "Should have been cleared in phase 2.");
+        }
+        hr->reset_during_compaction();
+      }
+    } else {
+      hr->compact();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase4() {
+  // All pointers are now adjusted, move objects accordingly
+
+  // It is imperative that we traverse perm_gen first in phase4. All
+  // classes must be allocated earlier than their instances, and traversing
+  // perm_gen first makes sure that all klassOops have moved to their new
+  // location before any instance does a dispatch through it's klass!
+
+  // The ValidateMarkSweep live oops tracking expects us to traverse spaces
+  // in the same order in phase2, phase3 and phase4. We don't quite do that
+  // here (perm_gen first rather than last), so we tell the validate code
+  // to use a higher index (saved from phase2) when verifying perm_gen.
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("4 compact heap");
+  TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("4");
+
+  pg->compact();
+
+  G1SpaceCompactClosure blk;
+  g1h->heap_region_iterate(&blk);
+
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ReferenceProcessor;
+
+// G1MarkSweep takes care of global mark-compact garbage collection for a
+// G1CollectedHeap using a four-phase pointer forwarding algorithm.  All
+// generations are assumed to support marking; those that can also support
+// compaction.
+//
+// Class unloading will only occur when a full gc is invoked.
+
+
+class G1MarkSweep : AllStatic {
+  friend class VM_G1MarkSweep;
+  friend class Scavenge;
+
+ public:
+
+  static void invoke_at_safepoint(ReferenceProcessor* rp,
+                                  bool clear_all_softrefs);
+
+ private:
+
+  // Mark live objects
+  static void mark_sweep_phase1(bool& marked_for_deopt,
+                                bool clear_all_softrefs);
+  // Calculate new addresses
+  static void mark_sweep_phase2();
+  // Update pointers
+  static void mark_sweep_phase3();
+  // Move objects to new positions
+  static void mark_sweep_phase4();
+
+  static void allocate_stacks();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class G1CollectedHeap;
+class G1RemSet;
+class HRInto_G1RemSet;
+class G1RemSet;
+class ConcurrentMark;
+class DirtyCardToOopClosure;
+class CMBitMap;
+class CMMarkStack;
+class G1ParScanThreadState;
+
+// A class that scans oops in a given heap region (much as OopsInGenClosure
+// scans oops in a generation.)
+class OopsInHeapRegionClosure: public OopsInGenClosure {
+protected:
+  HeapRegion* _from;
+public:
+  virtual void set_region(HeapRegion* from) { _from = from; }
+};
+
+
+class G1ScanAndBalanceClosure : public OopClosure {
+  G1CollectedHeap* _g1;
+  static int _nq;
+public:
+  G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { }
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+};
+
+class G1ParClosureSuper : public OopsInHeapRegionClosure {
+protected:
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem;
+  ConcurrentMark* _cm;
+  G1ParScanThreadState* _par_scan_state;
+public:
+  G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
+class G1ParScanClosure : public G1ParClosureSuper {
+public:
+  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);   // should be made inline
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)          { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
+};
+
+#define G1_PARTIAL_ARRAY_MASK 1
+
+class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
+  G1ParScanClosure _scanner;
+  template <class T> void process_array_chunk(oop obj, int start, int end);
+public:
+  G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);
+  void do_oop_nv(narrowOop* p)      { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+
+class G1ParCopyHelper : public G1ParClosureSuper {
+  G1ParScanClosure *_scanner;
+protected:
+  void mark_forwardee(oop* p);
+  oop copy_to_survivor_space(oop obj);
+public:
+  G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
+                  G1ParScanClosure *scanner) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure : public G1ParCopyHelper {
+  G1ParScanClosure _scanner;
+  void do_oop_work(oop* p);
+  void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); }
+public:
+  G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
+  inline void do_oop_nv(oop* p) {
+    do_oop_work(p);
+    if (do_mark_forwardee)
+      mark_forwardee(p);
+  }
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+
+class FilterIntoCSClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  OopClosure* _oc;
+  DirtyCardToOopClosure* _dcto_cl;
+public:
+  FilterIntoCSClosure(  DirtyCardToOopClosure* dcto_cl,
+                        G1CollectedHeap* g1, OopClosure* oc) :
+    _dcto_cl(dcto_cl), _g1(g1), _oc(oc)
+  {}
+  inline void do_oop_nv(oop* p);
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+};
+
+class FilterInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                     OopsInHeapRegionClosure* oc) :
+    _g1(g1), _oc(oc)
+  {}
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterAndMarkInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterAndMarkInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                            OopsInHeapRegionClosure* oc,
+                                            ConcurrentMark* cm)
+  : _g1(g1), _oc(oc), _cm(cm) { }
+
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterOutOfRegionClosure: public OopClosure {
+  HeapWord* _r_bottom;
+  HeapWord* _r_end;
+  OopClosure* _oc;
+  int _out_of_region;
+public:
+  FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  int out_of_region() { return _out_of_region; }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * This really ought to be an inline function, but apparently the C++
+ * compiler sometimes sees fit to ignore inline declarations.  Sigh.
+ */
+
+// This must a ifdef'ed because the counting it controls is in a
+// perf-critical inner loop.
+#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj)) {
+    _oc->do_oop(p);
+#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
+    _dcto_cl->incr_count();
+#endif
+  }
+}
+
+inline void FilterIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  HeapWord* obj_hw = (HeapWord*)obj;
+  if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) {
+    _oc->do_oop(p);
+#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
+    _out_of_region++;
+#endif
+  }
+}
+
+inline void FilterOutOfRegionClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj))
+    _oc->do_oop(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL) {
+    HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      if (hr->in_collection_set())
+        _oc->do_oop(p);
+      else if (!hr->is_young())
+        _cm->grayRoot(obj);
+    }
+  }
+}
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) {
+  RefToScanQueue* q;
+  if (ParallelGCThreads > 0) {
+    // Deal the work out equally.
+    _nq = (_nq + 1) % ParallelGCThreads;
+    q = _g1->task_queue(_nq);
+  } else {
+    q = _g1->task_queue(0);
+  }
+  bool nooverflow = q->push(p);
+  guarantee(nooverflow, "Overflow during poplularity region processing");
+}
+
+inline void G1ScanAndBalanceClosure::do_oop(oop* p) {
+  do_oop_nv(p);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,1003 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1RemSet.cpp.incl"
+
+#define CARD_REPEAT_HISTO 0
+
+#if CARD_REPEAT_HISTO
+static size_t ct_freq_sz;
+static jbyte* ct_freq = NULL;
+
+void init_ct_freq_table(size_t heap_sz_bytes) {
+  if (ct_freq == NULL) {
+    ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
+    ct_freq = new jbyte[ct_freq_sz];
+    for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
+  }
+}
+
+void ct_freq_note_card(size_t index) {
+  assert(0 <= index && index < ct_freq_sz, "Bounds error.");
+  if (ct_freq[index] < 100) { ct_freq[index]++; }
+}
+
+static IntHistogram card_repeat_count(10, 10);
+
+void ct_freq_update_histo_and_reset() {
+  for (size_t j = 0; j < ct_freq_sz; j++) {
+    card_repeat_count.add_entry(ct_freq[j]);
+    ct_freq[j] = 0;
+  }
+
+}
+#endif
+
+
+class IntoCSOopClosure: public OopsInHeapRegionClosure {
+  OopsInHeapRegionClosure* _blk;
+  G1CollectedHeap* _g1;
+public:
+  IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
+    _g1(g1), _blk(blk) {}
+  void set_region(HeapRegion* from) {
+    _blk->set_region(from);
+  }
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
+  }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool idempotent() { return true; }
+};
+
+class IntoCSRegionClosure: public HeapRegionClosure {
+  IntoCSOopClosure _blk;
+  G1CollectedHeap* _g1;
+public:
+  IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
+    _g1(g1), _blk(g1, blk) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set()) {
+      _blk.set_region(r);
+      if (r->isHumongous()) {
+        if (r->startsHumongous()) {
+          oop obj = oop(r->bottom());
+          obj->oop_iterate(&_blk);
+        }
+      } else {
+        r->oop_before_save_marks_iterate(&_blk);
+      }
+    }
+    return false;
+  }
+};
+
+void
+StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+                                            int worker_i) {
+  IntoCSRegionClosure rc(_g1, oc);
+  _g1->heap_region_iterate(&rc);
+}
+
+class UpdateRSOopClosure: public OopClosure {
+  HeapRegion* _from;
+  HRInto_G1RemSet* _rs;
+  int _worker_i;
+public:
+  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
+    _from(NULL), _rs(rs), _worker_i(worker_i) {
+    guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
+  }
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    assert(_from != NULL, "from region must be non-NULL");
+    _rs->par_write_ref(_from, p, _worker_i);
+  }
+  // Override: this closure is idempotent.
+  //  bool idempotent() { return true; }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
+class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap*    _g1h;
+  ModRefBarrierSet*   _mr_bs;
+  UpdateRSOopClosure  _cl;
+  int _worker_i;
+public:
+  UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
+    _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
+    _mr_bs(g1->mr_bs()),
+    _worker_i(worker_i),
+    _g1h(g1)
+    {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set() && !r->continuesHumongous()) {
+      _cl.set_from(r);
+      r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind);
+      _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true);
+    }
+    return false;
+  }
+};
+
+class VerifyRSCleanCardOopClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    HeapRegion* to = _g1->heap_region_containing(obj);
+    guarantee(to == NULL || !to->in_collection_set(),
+              "Missed a rem set member.");
+  }
+};
+
+HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
+  : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
+    _cg1r(g1->concurrent_g1_refine()),
+    _par_traversal_in_progress(false), _new_refs(NULL),
+    _cards_scanned(NULL), _total_cards_scanned(0)
+{
+  _seq_task = new SubTasksDone(NumSeqTasks);
+  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, ParallelGCThreads);
+}
+
+HRInto_G1RemSet::~HRInto_G1RemSet() {
+  delete _seq_task;
+}
+
+void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
+  if (_g1->is_in_g1_reserved(mr.start())) {
+    _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
+    if (_start_first == NULL) _start_first = mr.start();
+  }
+}
+
+class ScanRSClosure : public HeapRegionClosure {
+  size_t _cards_done, _cards;
+  G1CollectedHeap* _g1h;
+  OopsInHeapRegionClosure* _oc;
+  G1BlockOffsetSharedArray* _bot_shared;
+  CardTableModRefBS *_ct_bs;
+  int _worker_i;
+  bool _try_claimed;
+public:
+  ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
+    _oc(oc),
+    _cards(0),
+    _cards_done(0),
+    _worker_i(worker_i),
+    _try_claimed(false)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _bot_shared = _g1h->bot_shared();
+    _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
+  }
+
+  void set_try_claimed() { _try_claimed = true; }
+
+  void scanCard(size_t index, HeapRegion *r) {
+    _cards_done++;
+    DirtyCardToOopClosure* cl =
+      r->new_dcto_closure(_oc,
+                         CardTableModRefBS::Precise,
+                         HeapRegionDCTOC::IntoCSFilterKind);
+
+    // Set the "from" region in the closure.
+    _oc->set_region(r);
+    HeapWord* card_start = _bot_shared->address_for_index(index);
+    HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
+    Space *sp = SharedHeap::heap()->space_containing(card_start);
+    MemRegion sm_region;
+    if (ParallelGCThreads > 0) {
+      // first find the used area
+      sm_region = sp->used_region_at_save_marks();
+    } else {
+      // The closure is not idempotent.  We shouldn't look at objects
+      // allocated during the GC.
+      sm_region = sp->used_region_at_save_marks();
+    }
+    MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
+    if (!mr.is_empty()) {
+      cl->do_MemRegion(mr);
+    }
+  }
+
+  void printCard(HeapRegion* card_region, size_t card_index,
+                 HeapWord* card_start) {
+    gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
+                           "RS names card %p: "
+                           "[" PTR_FORMAT ", " PTR_FORMAT ")",
+                           _worker_i,
+                           card_region->bottom(), card_region->end(),
+                           card_index,
+                           card_start, card_start + G1BlockOffsetSharedArray::N_words);
+  }
+
+  bool doHeapRegion(HeapRegion* r) {
+    assert(r->in_collection_set(), "should only be called on elements of CS.");
+    HeapRegionRemSet* hrrs = r->rem_set();
+    if (hrrs->iter_is_complete()) return false; // All done.
+    if (!_try_claimed && !hrrs->claim_iter()) return false;
+    // If we didn't return above, then
+    //   _try_claimed || r->claim_iter()
+    // is true: either we're supposed to work on claimed-but-not-complete
+    // regions, or we successfully claimed the region.
+    HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
+    hrrs->init_iterator(iter);
+    size_t card_index;
+    while (iter->has_next(card_index)) {
+      HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
+
+#if 0
+      gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
+                          card_start, card_start + CardTableModRefBS::card_size_in_words);
+#endif
+
+      HeapRegion* card_region = _g1h->heap_region_containing(card_start);
+      assert(card_region != NULL, "Yielding cards not in the heap?");
+      _cards++;
+
+      if (!card_region->in_collection_set()) {
+        // If the card is dirty, then we will scan it during updateRS.
+        if (!_ct_bs->is_card_claimed(card_index) &&
+            !_ct_bs->is_card_dirty(card_index)) {
+          assert(_ct_bs->is_card_clean(card_index) ||
+                 _ct_bs->is_card_claimed(card_index),
+                 "Card is either dirty, clean, or claimed");
+          if (_ct_bs->claim_card(card_index))
+            scanCard(card_index, card_region);
+        }
+      }
+    }
+    hrrs->set_iter_complete();
+    return false;
+  }
+  // Set all cards back to clean.
+  void cleanup() {_g1h->cleanUpCardTable();}
+  size_t cards_done() { return _cards_done;}
+  size_t cards_looked_up() { return _cards;}
+};
+
+// We want the parallel threads to start their scanning at
+// different collection set regions to avoid contention.
+// If we have:
+//          n collection set regions
+//          p threads
+// Then thread t will start at region t * floor (n/p)
+
+HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) {
+  HeapRegion* result = _g1p->collection_set();
+  if (ParallelGCThreads > 0) {
+    size_t cs_size = _g1p->collection_set_size();
+    int n_workers = _g1->workers()->total_workers();
+    size_t cs_spans = cs_size / n_workers;
+    size_t ind      = cs_spans * worker_i;
+    for (size_t i = 0; i < ind; i++)
+      result = result->next_in_collection_set();
+  }
+  return result;
+}
+
+void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
+  double rs_time_start = os::elapsedTime();
+  HeapRegion *startRegion = calculateStartRegion(worker_i);
+
+  BufferingOopsInHeapRegionClosure boc(oc);
+  ScanRSClosure scanRScl(&boc, worker_i);
+  _g1->collection_set_iterate_from(startRegion, &scanRScl);
+  scanRScl.set_try_claimed();
+  _g1->collection_set_iterate_from(startRegion, &scanRScl);
+
+  boc.done();
+  double closure_app_time_sec = boc.closure_app_seconds();
+  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
+    closure_app_time_sec;
+  double closure_app_time_ms = closure_app_time_sec * 1000.0;
+
+  assert( _cards_scanned != NULL, "invariant" );
+  _cards_scanned[worker_i] = scanRScl.cards_done();
+
+  _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
+  _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  if (ParallelGCThreads > 0) {
+    // In this case, we called scanNewRefsRS and recorded the corresponding
+    // time.
+    double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
+    if (scan_new_refs_time_ms > 0.0) {
+      closure_app_time_ms += scan_new_refs_time_ms;
+    }
+  }
+  _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
+}
+
+void HRInto_G1RemSet::updateRS(int worker_i) {
+  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+
+  double start = os::elapsedTime();
+  _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
+
+  if (G1RSBarrierUseQueue && !cg1r->do_traversal()) {
+    // Apply the appropriate closure to all remaining log entries.
+    _g1->iterate_dirty_card_closure(false, worker_i);
+    // Now there should be no dirty cards.
+    if (G1RSLogCheckCardTable) {
+      CountNonCleanMemRegionClosure cl(_g1);
+      _ct_bs->mod_card_iterate(&cl);
+      // XXX This isn't true any more: keeping cards of young regions
+      // marked dirty broke it.  Need some reasonable fix.
+      guarantee(cl.n() == 0, "Card table should be clean.");
+    }
+  } else {
+    UpdateRSOutOfRegionClosure update_rs(_g1, worker_i);
+    _g1->heap_region_iterate(&update_rs);
+    // We did a traversal; no further one is necessary.
+    if (G1RSBarrierUseQueue) {
+      assert(cg1r->do_traversal(), "Or we shouldn't have gotten here.");
+      cg1r->set_pya_cancel();
+    }
+    if (_cg1r->use_cache()) {
+      _cg1r->clear_and_record_card_counts();
+      _cg1r->clear_hot_cache();
+    }
+  }
+  _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
+}
+
+#ifndef PRODUCT
+class PrintRSClosure : public HeapRegionClosure {
+  int _count;
+public:
+  PrintRSClosure() : _count(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+    _count += (int) hrrs->occupied();
+    if (hrrs->occupied() == 0) {
+      gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") "
+                          "has no remset entries\n",
+                          r->bottom(), r->end());
+    } else {
+      gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n",
+                          r->bottom(), r->end());
+      r->print();
+      hrrs->print();
+      gclog_or_tty->print("\nDone printing rem set\n");
+    }
+    return false;
+  }
+  int occupied() {return _count;}
+};
+#endif
+
+class CountRSSizeClosure: public HeapRegionClosure {
+  size_t _n;
+  size_t _tot;
+  size_t _max;
+  HeapRegion* _max_r;
+  enum {
+    N = 20,
+    MIN = 6
+  };
+  int _histo[N];
+public:
+  CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
+    for (int i = 0; i < N; i++) _histo[i] = 0;
+  }
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      size_t occ = r->rem_set()->occupied();
+      _n++;
+      _tot += occ;
+      if (occ > _max) {
+        _max = occ;
+        _max_r = r;
+      }
+      // Fit it into a histo bin.
+      int s = 1 << MIN;
+      int i = 0;
+      while (occ > (size_t) s && i < (N-1)) {
+        s = s << 1;
+        i++;
+      }
+      _histo[i]++;
+    }
+    return false;
+  }
+  size_t n() { return _n; }
+  size_t tot() { return _tot; }
+  size_t mx() { return _max; }
+  HeapRegion* mxr() { return _max_r; }
+  void print_histo() {
+    int mx = N;
+    while (mx >= 0) {
+      if (_histo[mx-1] > 0) break;
+      mx--;
+    }
+    gclog_or_tty->print_cr("Number of regions with given RS sizes:");
+    gclog_or_tty->print_cr("           <= %8d   %8d", 1 << MIN, _histo[0]);
+    for (int i = 1; i < mx-1; i++) {
+      gclog_or_tty->print_cr("  %8d  - %8d   %8d",
+                    (1 << (MIN + i - 1)) + 1,
+                    1 << (MIN + i),
+                    _histo[i]);
+    }
+    gclog_or_tty->print_cr("            > %8d   %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
+  }
+};
+
+void
+HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc,
+                                             int worker_i) {
+  double scan_new_refs_start_sec = os::elapsedTime();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
+  while (_new_refs[worker_i]->is_nonempty()) {
+    oop* p = _new_refs[worker_i]->pop();
+    oop obj = *p;
+    // *p was in the collection set when p was pushed on "_new_refs", but
+    // another thread may have processed this location from an RS, so it
+    // might not point into the CS any longer.  If so, it's obviously been
+    // processed, and we don't need to do anything further.
+    if (g1h->obj_in_cs(obj)) {
+      HeapRegion* r = g1h->heap_region_containing(p);
+
+      DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
+      assert(ParallelGCThreads > 1
+             || to->rem_set()->contains_reference(p),
+             "Invariant: pushed after being added."
+             "(Not reliable in parallel code.)");
+      oc->set_region(r);
+      // If "p" has already been processed concurrently, this is
+      // idempotent.
+      oc->do_oop(p);
+    }
+  }
+  _g1p->record_scan_new_refs_time(worker_i,
+                                  (os::elapsedTime() - scan_new_refs_start_sec)
+                                  * 1000.0);
+}
+
+void HRInto_G1RemSet::set_par_traversal(bool b) {
+  _par_traversal_in_progress = b;
+  HeapRegionRemSet::set_par_traversal(b);
+}
+
+void HRInto_G1RemSet::cleanupHRRS() {
+  HeapRegionRemSet::cleanup();
+}
+
+void
+HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+                                             int worker_i) {
+#if CARD_REPEAT_HISTO
+  ct_freq_update_histo_and_reset();
+#endif
+  if (worker_i == 0) {
+    _cg1r->clear_and_record_card_counts();
+  }
+
+  // Make this into a command-line flag...
+  if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
+    CountRSSizeClosure count_cl;
+    _g1->heap_region_iterate(&count_cl);
+    gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
+                  "max region is " PTR_FORMAT,
+                  count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
+                  count_cl.mx(), count_cl.mxr());
+    count_cl.print_histo();
+  }
+
+  if (ParallelGCThreads > 0) {
+    // This is a temporary change to serialize the update and scanning
+    // of remembered sets. There are some race conditions when this is
+    // done in parallel and they are causing failures. When we resolve
+    // said race conditions, we'll revert back to parallel remembered
+    // set updating and scanning. See CRs 6677707 and 6677708.
+    if (worker_i == 0) {
+      updateRS(worker_i);
+      scanNewRefsRS(oc, worker_i);
+      scanRS(oc, worker_i);
+    }
+  } else {
+    assert(worker_i == 0, "invariant");
+
+    updateRS(0);
+    scanRS(oc, 0);
+  }
+}
+
+void HRInto_G1RemSet::
+prepare_for_oops_into_collection_set_do() {
+#if G1_REM_SET_LOGGING
+  PrintRSClosure cl;
+  _g1->collection_set_iterate(&cl);
+#endif
+  cleanupHRRS();
+  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+  _g1->set_refine_cte_cl_concurrency(false);
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  dcqs.concatenate_logs();
+
+  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+  if (ParallelGCThreads > 0) {
+    set_par_traversal(true);
+    int n_workers = _g1->workers()->total_workers();
+    _seq_task->set_par_threads(n_workers);
+    for (uint i = 0; i < ParallelGCThreads; i++)
+      _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
+
+    if (cg1r->do_traversal()) {
+      updateRS(0);
+      // Have to do this again after updaters
+      cleanupHRRS();
+    }
+  }
+  guarantee( _cards_scanned == NULL, "invariant" );
+  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
+  _total_cards_scanned = 0;
+}
+
+
+class cleanUpIteratorsClosure : public HeapRegionClosure {
+  bool doHeapRegion(HeapRegion *r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+    hrrs->init_for_par_iteration();
+    return false;
+  }
+};
+
+void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
+  guarantee( _cards_scanned != NULL, "invariant" );
+  _total_cards_scanned = 0;
+  for (uint i = 0; i < n_workers(); ++i)
+    _total_cards_scanned += _cards_scanned[i];
+  FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
+  _cards_scanned = NULL;
+  // Cleanup after copy
+#if G1_REM_SET_LOGGING
+  PrintRSClosure cl;
+  _g1->heap_region_iterate(&cl);
+#endif
+  _g1->set_refine_cte_cl_concurrency(true);
+  cleanUpIteratorsClosure iterClosure;
+  _g1->collection_set_iterate(&iterClosure);
+  // Set all cards back to clean.
+  _g1->cleanUpCardTable();
+  if (ParallelGCThreads > 0) {
+    ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+    if (cg1r->do_traversal()) {
+      cg1r->cg1rThread()->set_do_traversal(false);
+    }
+    for (uint i = 0; i < ParallelGCThreads; i++) {
+      delete _new_refs[i];
+    }
+    set_par_traversal(false);
+  }
+  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+}
+
+class UpdateRSObjectClosure: public ObjectClosure {
+  UpdateRSOopClosure* _update_rs_oop_cl;
+public:
+  UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) :
+    _update_rs_oop_cl(update_rs_oop_cl) {}
+  void do_object(oop obj) {
+    obj->oop_iterate(_update_rs_oop_cl);
+  }
+
+};
+
+class ScrubRSClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+  CardTableModRefBS* _ctbs;
+public:
+  ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
+    _g1h(G1CollectedHeap::heap()),
+    _region_bm(region_bm), _card_bm(card_bm),
+    _ctbs(NULL)
+  {
+    ModRefBarrierSet* bs = _g1h->mr_bs();
+    guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
+    _ctbs = (CardTableModRefBS*)bs;
+  }
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
+    }
+    return false;
+  }
+};
+
+void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
+  ScrubRSClosure scrub_cl(region_bm, card_bm);
+  _g1->heap_region_iterate(&scrub_cl);
+}
+
+void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
+                                int worker_num, int claim_val) {
+  ScrubRSClosure scrub_cl(region_bm, card_bm);
+  _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
+}
+
+
+class ConcRefineRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+  ConcurrentGCThread* _cgc_thrd;
+  ConcurrentG1Refine* _cg1r;
+  unsigned _cards_processed;
+  UpdateRSOopClosure _update_rs_oop_cl;
+public:
+  ConcRefineRegionClosure(CardTableModRefBS* ctbs,
+                          ConcurrentG1Refine* cg1r,
+                          HRInto_G1RemSet* g1rs) :
+    _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()),
+    _update_rs_oop_cl(g1rs), _cards_processed(0),
+    _g1h(G1CollectedHeap::heap())
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set() &&
+        !r->continuesHumongous() &&
+        !r->is_young()) {
+      _update_rs_oop_cl.set_from(r);
+      UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
+
+      // For each run of dirty card in the region:
+      //   1) Clear the cards.
+      //   2) Process the range corresponding to the run, adding any
+      //      necessary RS entries.
+      // 1 must precede 2, so that a concurrent modification redirties the
+      // card.  If a processing attempt does not succeed, because it runs
+      // into an unparseable region, we will do binary search to find the
+      // beginning of the next parseable region.
+      HeapWord* startAddr = r->bottom();
+      HeapWord* endAddr = r->used_region().end();
+      HeapWord* lastAddr;
+      HeapWord* nextAddr;
+
+      for (nextAddr = lastAddr = startAddr;
+           nextAddr < endAddr;
+           nextAddr = lastAddr) {
+        MemRegion dirtyRegion;
+
+        // Get and clear dirty region from card table
+        MemRegion next_mr(nextAddr, endAddr);
+        dirtyRegion =
+          _ctbs->dirty_card_range_after_reset(
+                           next_mr,
+                           true, CardTableModRefBS::clean_card_val());
+        assert(dirtyRegion.start() >= nextAddr,
+               "returned region inconsistent?");
+
+        if (!dirtyRegion.is_empty()) {
+          HeapWord* stop_point =
+            r->object_iterate_mem_careful(dirtyRegion,
+                                          &update_rs_obj_cl);
+          if (stop_point == NULL) {
+            lastAddr = dirtyRegion.end();
+            _cards_processed +=
+              (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words);
+          } else {
+            // We're going to skip one or more cards that we can't parse.
+            HeapWord* next_parseable_card =
+              r->next_block_start_careful(stop_point);
+            // Round this up to a card boundary.
+            next_parseable_card =
+              _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card));
+            // Now we invalidate the intervening cards so we'll see them
+            // again.
+            MemRegion remaining_dirty =
+              MemRegion(stop_point, dirtyRegion.end());
+            MemRegion skipped =
+              MemRegion(stop_point, next_parseable_card);
+            _ctbs->invalidate(skipped.intersection(remaining_dirty));
+
+            // Now start up again where we can parse.
+            lastAddr = next_parseable_card;
+
+            // Count how many we did completely.
+            _cards_processed +=
+              (stop_point - dirtyRegion.start()) /
+              CardTableModRefBS::card_size_in_words;
+          }
+          // Allow interruption at regular intervals.
+          // (Might need to make them more regular, if we get big
+          // dirty regions.)
+          if (_cgc_thrd != NULL) {
+            if (_cgc_thrd->should_yield()) {
+              _cgc_thrd->yield();
+              switch (_cg1r->get_pya()) {
+              case PYA_continue:
+                // This may have changed: re-read.
+                endAddr = r->used_region().end();
+                continue;
+              case PYA_restart: case PYA_cancel:
+                return true;
+              }
+            }
+          }
+        } else {
+          break;
+        }
+      }
+    }
+    // A good yield opportunity.
+    if (_cgc_thrd != NULL) {
+      if (_cgc_thrd->should_yield()) {
+        _cgc_thrd->yield();
+        switch (_cg1r->get_pya()) {
+        case PYA_restart: case PYA_cancel:
+          return true;
+        default:
+          break;
+        }
+
+      }
+    }
+    return false;
+  }
+
+  unsigned cards_processed() { return _cards_processed; }
+};
+
+
+void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) {
+  ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this);
+  _g1->heap_region_iterate(&cr_cl);
+  _conc_refine_traversals++;
+  _conc_refine_cards += cr_cl.cards_processed();
+}
+
+static IntHistogram out_of_histo(50, 50);
+
+
+
+void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
+  // If the card is no longer dirty, nothing to do.
+  if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
+
+  // Construct the region representing the card.
+  HeapWord* start = _ct_bs->addr_for(card_ptr);
+  // And find the region containing it.
+  HeapRegion* r = _g1->heap_region_containing(start);
+  if (r == NULL) {
+    guarantee(_g1->is_in_permanent(start), "Or else where?");
+    return;  // Not in the G1 heap (might be in perm, for example.)
+  }
+  // Why do we have to check here whether a card is on a young region,
+  // given that we dirty young regions and, as a result, the
+  // post-barrier is supposed to filter them out and never to enqueue
+  // them? When we allocate a new region as the "allocation region" we
+  // actually dirty its cards after we release the lock, since card
+  // dirtying while holding the lock was a performance bottleneck. So,
+  // as a result, it is possible for other threads to actually
+  // allocate objects in the region (after the acquire the lock)
+  // before all the cards on the region are dirtied. This is unlikely,
+  // and it doesn't happen often, but it can happen. So, the extra
+  // check below filters out those cards.
+  if (r->is_young()) {
+    return;
+  }
+  // While we are processing RSet buffers during the collection, we
+  // actually don't want to scan any cards on the collection set,
+  // since we don't want to update remebered sets with entries that
+  // point into the collection set, given that live objects from the
+  // collection set are about to move and such entries will be stale
+  // very soon. This change also deals with a reliability issue which
+  // involves scanning a card in the collection set and coming across
+  // an array that was being chunked and looking malformed. Note,
+  // however, that if evacuation fails, we have to scan any objects
+  // that were not moved and create any missing entries.
+  if (r->in_collection_set()) {
+    return;
+  }
+
+  // Should we defer it?
+  if (_cg1r->use_cache()) {
+    card_ptr = _cg1r->cache_insert(card_ptr);
+    // If it was not an eviction, nothing to do.
+    if (card_ptr == NULL) return;
+
+    // OK, we have to reset the card start, region, etc.
+    start = _ct_bs->addr_for(card_ptr);
+    r = _g1->heap_region_containing(start);
+    if (r == NULL) {
+      guarantee(_g1->is_in_permanent(start), "Or else where?");
+      return;  // Not in the G1 heap (might be in perm, for example.)
+    }
+    guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
+  }
+
+  HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
+  MemRegion dirtyRegion(start, end);
+
+#if CARD_REPEAT_HISTO
+  init_ct_freq_table(_g1->g1_reserved_obj_bytes());
+  ct_freq_note_card(_ct_bs->index_for(start));
+#endif
+
+  UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
+  update_rs_oop_cl.set_from(r);
+  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
+
+  // Undirty the card.
+  *card_ptr = CardTableModRefBS::clean_card_val();
+  // We must complete this write before we do any of the reads below.
+  OrderAccess::storeload();
+  // And process it, being careful of unallocated portions of TLAB's.
+  HeapWord* stop_point =
+    r->oops_on_card_seq_iterate_careful(dirtyRegion,
+                                        &filter_then_update_rs_oop_cl);
+  // If stop_point is non-null, then we encountered an unallocated region
+  // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
+  // card and re-enqueue: if we put off the card until a GC pause, then the
+  // unallocated portion will be filled in.  Alternatively, we might try
+  // the full complexity of the technique used in "regular" precleaning.
+  if (stop_point != NULL) {
+    // The card might have gotten re-dirtied and re-enqueued while we
+    // worked.  (In fact, it's pretty likely.)
+    if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+      *card_ptr = CardTableModRefBS::dirty_card_val();
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      DirtyCardQueue* sdcq =
+        JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
+      sdcq->enqueue(card_ptr);
+    }
+  } else {
+    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
+    _conc_refine_cards++;
+  }
+}
+
+class HRRSStatsIter: public HeapRegionClosure {
+  size_t _occupied;
+  size_t _total_mem_sz;
+  size_t _max_mem_sz;
+  HeapRegion* _max_mem_sz_region;
+public:
+  HRRSStatsIter() :
+    _occupied(0),
+    _total_mem_sz(0),
+    _max_mem_sz(0),
+    _max_mem_sz_region(NULL)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    size_t mem_sz = r->rem_set()->mem_size();
+    if (mem_sz > _max_mem_sz) {
+      _max_mem_sz = mem_sz;
+      _max_mem_sz_region = r;
+    }
+    _total_mem_sz += mem_sz;
+    size_t occ = r->rem_set()->occupied();
+    _occupied += occ;
+    return false;
+  }
+  size_t total_mem_sz() { return _total_mem_sz; }
+  size_t max_mem_sz() { return _max_mem_sz; }
+  size_t occupied() { return _occupied; }
+  HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
+};
+
+void HRInto_G1RemSet::print_summary_info() {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  ConcurrentG1RefineThread* cg1r_thrd =
+    g1->concurrent_g1_refine()->cg1rThread();
+
+#if CARD_REPEAT_HISTO
+  gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
+  gclog_or_tty->print_cr("  # of repeats --> # of cards with that number.");
+  card_repeat_count.print_on(gclog_or_tty);
+#endif
+
+  if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
+    gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
+    gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
+    out_of_histo.print_on(gclog_or_tty);
+  }
+  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in "
+                "%5.2fs.",
+                _conc_refine_cards, cg1r_thrd->vtime_accum());
+
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  jint tot_processed_buffers =
+    dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
+  gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
+  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS thread.",
+                dcqs.processed_buffers_rs_thread(),
+                100.0*(float)dcqs.processed_buffers_rs_thread()/
+                (float)tot_processed_buffers);
+  gclog_or_tty->print_cr("     %8d (%5.1f%%) by mutator threads.",
+                dcqs.processed_buffers_mut(),
+                100.0*(float)dcqs.processed_buffers_mut()/
+                (float)tot_processed_buffers);
+  gclog_or_tty->print_cr("   Did %d concurrent refinement traversals.",
+                _conc_refine_traversals);
+  if (!G1RSBarrierUseQueue) {
+    gclog_or_tty->print_cr("   Scanned %8.2f cards/traversal.",
+                  _conc_refine_traversals > 0 ?
+                  (float)_conc_refine_cards/(float)_conc_refine_traversals :
+                  0);
+  }
+  gclog_or_tty->print_cr("");
+  if (G1UseHRIntoRS) {
+    HRRSStatsIter blk;
+    g1->heap_region_iterate(&blk);
+    gclog_or_tty->print_cr("  Total heap region rem set sizes = " SIZE_FORMAT "K."
+                           "  Max = " SIZE_FORMAT "K.",
+                           blk.total_mem_sz()/K, blk.max_mem_sz()/K);
+    gclog_or_tty->print_cr("  Static structures = " SIZE_FORMAT "K,"
+                           " free_lists = " SIZE_FORMAT "K.",
+                           HeapRegionRemSet::static_mem_size()/K,
+                           HeapRegionRemSet::fl_mem_size()/K);
+    gclog_or_tty->print_cr("    %d occupied cards represented.",
+                           blk.occupied());
+    gclog_or_tty->print_cr("    Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
+                           " %s, cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
+                           blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
+                           (blk.max_mem_sz_region()->popular() ? "POP" : ""),
+                           (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
+                           (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
+    gclog_or_tty->print_cr("    Did %d coarsenings.",
+                  HeapRegionRemSet::n_coarsenings());
+
+  }
+}
+void HRInto_G1RemSet::prepare_for_verify() {
+  if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) {
+    cleanupHRRS();
+    _g1->set_refine_cte_cl_concurrency(false);
+    if (SafepointSynchronize::is_at_safepoint()) {
+      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+      dcqs.concatenate_logs();
+    }
+    bool cg1r_use_cache = _cg1r->use_cache();
+    _cg1r->set_use_cache(false);
+    updateRS(0);
+    _cg1r->set_use_cache(cg1r_use_cache);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A G1RemSet provides ways of iterating over pointers into a selected
+// collection set.
+
+class G1CollectedHeap;
+class CardTableModRefBarrierSet;
+class HRInto_G1RemSet;
+class ConcurrentG1Refine;
+
+class G1RemSet {
+protected:
+  G1CollectedHeap* _g1;
+
+  unsigned _conc_refine_traversals;
+  unsigned _conc_refine_cards;
+
+  size_t n_workers();
+
+public:
+  G1RemSet(G1CollectedHeap* g1) :
+    _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0)
+  {}
+
+  // Invoke "blk->do_oop" on all pointers into the CS in object in regions
+  // outside the CS (having invoked "blk->set_region" to set the "from"
+  // region correctly beforehand.) The "worker_i" param is for the
+  // parallel case where the number of the worker thread calling this
+  // function can be helpful in partitioning the work to be done. It
+  // should be the same as the "i" passed to the calling thread's
+  // work(i) function. In the sequential case this param will be ingored.
+  virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                           int worker_i) = 0;
+
+  // Prepare for and cleanup after an oops_into_collection_set_do
+  // call.  Must call each of these once before and after (in sequential
+  // code) any threads call oops into collection set do.  (This offers an
+  // opportunity to sequential setup and teardown of structures needed by a
+  // parallel iteration over the CS's RS.)
+  virtual void prepare_for_oops_into_collection_set_do() = 0;
+  virtual void cleanup_after_oops_into_collection_set_do() = 0;
+
+  // If "this" is of the given subtype, return "this", else "NULL".
+  virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from")
+  // has changed to its new value.
+  virtual void write_ref(HeapRegion* from, oop* p) = 0;
+  virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
+
+  // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
+  // or card, respectively, such that a region or card with a corresponding
+  // 0 bit contains no part of any live object.  Eliminates any remembered
+  // set entries that correspond to dead heap ranges.
+  virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0;
+  // Like the above, but assumes is called in parallel: "worker_num" is the
+  // parallel thread id of the current thread, and "claim_val" is the
+  // value that should be used to claim heap regions.
+  virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                         int worker_num, int claim_val) = 0;
+
+  // Do any "refinement" activity that might be appropriate to the given
+  // G1RemSet.  If "refinement" has iterateive "passes", do one pass.
+  // If "t" is non-NULL, it is the thread performing the refinement.
+  // Default implementation does nothing.
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
+
+  // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
+  // join and leave around parts that must be atomic wrt GC.  (NULL means
+  // being done at a safepoint.)
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
+
+  unsigned conc_refine_cards() { return _conc_refine_cards; }
+
+  // Print any relevant summary info.
+  virtual void print_summary_info() {}
+
+  // Prepare remebered set for verification.
+  virtual void prepare_for_verify() {};
+};
+
+
+// The simplest possible G1RemSet: iterates over all objects in non-CS
+// regions, searching for pointers into the CS.
+class StupidG1RemSet: public G1RemSet {
+public:
+  StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {}
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do() {}
+  void cleanup_after_oops_into_collection_set_do() {}
+
+  // Nothing is necessary in the version below.
+  void write_ref(HeapRegion* from, oop* p) {}
+  void par_write_ref(HeapRegion* from, oop* p, int tid) {}
+
+  void scrub(BitMap* region_bm, BitMap* card_bm) {}
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val) {}
+
+};
+
+// A G1RemSet in which each heap region has a rem set that records the
+// external heap references into it.  Uses a mod ref bs to track updates,
+// so that they can be used to update the individual region remsets.
+
+class HRInto_G1RemSet: public G1RemSet {
+protected:
+  enum SomePrivateConstants {
+    UpdateRStoMergeSync  = 0,
+    MergeRStoDoDirtySync = 1,
+    DoDirtySync          = 2,
+    LastSync             = 3,
+
+    SeqTask              = 0,
+    NumSeqTasks          = 1
+  };
+
+  CardTableModRefBS*             _ct_bs;
+  SubTasksDone*                  _seq_task;
+  G1CollectorPolicy* _g1p;
+
+  ConcurrentG1Refine* _cg1r;
+
+  size_t*             _cards_scanned;
+  size_t              _total_cards_scanned;
+
+  // _par_traversal_in_progress is "true" iff a parallel traversal is in
+  // progress.  If so, then cards added to remembered sets should also have
+  // their references into the collection summarized in "_new_refs".
+  bool _par_traversal_in_progress;
+  void set_par_traversal(bool b);
+  GrowableArray<oop*>** _new_refs;
+
+public:
+  // This is called to reset dual hash tables after the gc pause
+  // is finished and the initial hash table is no longer being
+  // scanned.
+  void cleanupHRRS();
+
+  HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
+  ~HRInto_G1RemSet();
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do();
+  void cleanup_after_oops_into_collection_set_do();
+  void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void updateRS(int worker_i);
+  HeapRegion* calculateStartRegion(int i);
+
+  HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
+
+  CardTableModRefBS* ct_bs() { return _ct_bs; }
+  size_t cardsScanned() { return _total_cards_scanned; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from",
+  // which is required to be non-NULL) has changed to a new non-NULL value.
+  inline void write_ref(HeapRegion* from, oop* p);
+  // The "_nv" version is the same; it exists just so that it is not virtual.
+  inline void write_ref_nv(HeapRegion* from, oop* p);
+
+  inline bool self_forwarded(oop obj);
+  inline void par_write_ref(HeapRegion* from, oop* p, int tid);
+
+  void scrub(BitMap* region_bm, BitMap* card_bm);
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val);
+
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
+
+  virtual void print_summary_info();
+  virtual void prepare_for_verify();
+};
+
+#define G1_REM_SET_LOGGING 0
+
+class CountNonCleanMemRegionClosure: public MemRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+  HeapWord* _start_first;
+public:
+  CountNonCleanMemRegionClosure(G1CollectedHeap* g1) :
+    _g1(g1), _n(0), _start_first(NULL)
+  {}
+  void do_MemRegion(MemRegion mr);
+  int n() { return _n; };
+  HeapWord* start_first() { return _start_first; }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline size_t G1RemSet::n_workers() {
+  if (_g1->workers() != NULL) {
+    return _g1->workers()->total_workers();
+  } else {
+    return 1;
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
+  oop obj = *p;
+  assert(from != NULL && from->is_in_reserved(p),
+         "p is not in a from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  if (from != to && to != NULL) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
+  write_ref_nv(from, p);
+}
+
+inline bool HRInto_G1RemSet::self_forwarded(oop obj) {
+  bool result =  (obj->is_forwarded() && (obj->forwardee()== obj));
+  return result;
+}
+
+inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
+  oop obj = *p;
+#ifdef ASSERT
+  // can't do because of races
+  // assert(obj == NULL || obj->is_oop(), "expected an oop");
+
+  // Do the safe subset of is_oop
+  if (obj != NULL) {
+#ifdef CHECK_UNHANDLED_OOPS
+    oopDesc* o = obj.obj();
+#else
+    oopDesc* o = obj;
+#endif // CHECK_UNHANDLED_OOPS
+    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  }
+#endif // ASSERT
+  assert(from == NULL || from->is_in_reserved(p),
+         "p is not in from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  // The test below could be optimized by applying a bit op to to and from.
+  if (to != NULL && from != NULL && from != to) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p, tid)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+    // There is a tricky infinite loop if we keep pushing
+    // self forwarding pointers onto our _new_refs list.
+    if (_par_traversal_in_progress &&
+        to->in_collection_set() && !self_forwarded(obj)) {
+      _new_refs[tid]->push(p);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1SATBCardTableModRefBS.cpp.incl"
+
+G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap,
+                                                 int max_covered_regions) :
+    CardTableModRefBSForCTRS(whole_heap, max_covered_regions)
+{
+  _kind = G1SATBCT;
+}
+
+
+void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  Thread* thr = Thread::current();
+  if (thr->is_Java_thread()) {
+    JavaThread* jt = (JavaThread*)thr;
+    jt->satb_mark_queue().enqueue(pre_val);
+  } else {
+    MutexLocker x(Shared_SATB_Q_lock);
+    JavaThread::satb_mark_queue_set().shared_satb_queue()->enqueue(pre_val);
+  }
+}
+
+// When we know the current java thread:
+void
+G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field,
+                                                    oop newVal,
+                                                    JavaThread* jt) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop preVal = *(oop*)field;
+  if (preVal != NULL) {
+    jt->satb_mark_queue().enqueue(preVal);
+  }
+}
+
+void
+G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop* elem_ptr = (oop*)mr.start();
+  while ((HeapWord*)elem_ptr < mr.end()) {
+    oop elem = *elem_ptr;
+    if (elem != NULL) enqueue(elem);
+    elem_ptr++;
+  }
+}
+
+
+
+G1SATBCardTableLoggingModRefBS::
+G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                               int max_covered_regions) :
+  G1SATBCardTableModRefBS(whole_heap, max_covered_regions),
+  _dcqs(JavaThread::dirty_card_queue_set())
+{
+  _kind = G1SATBCTLogging;
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
+                                                     oop new_val) {
+  jbyte* byte = byte_for(field);
+  if (*byte != dirty_card) {
+    *byte = dirty_card;
+    Thread* thr = Thread::current();
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      jt->dirty_card_queue().enqueue(byte);
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      _dcqs.shared_dirty_card_queue()->enqueue(byte);
+    }
+  }
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_static(void* field,
+                                                       oop new_val) {
+  uintptr_t field_uint = (uintptr_t)field;
+  uintptr_t new_val_uint = (uintptr_t)new_val;
+  uintptr_t comb = field_uint ^ new_val_uint;
+  comb = comb >> HeapRegion::LogOfHRGrainBytes;
+  if (comb == 0) return;
+  if (new_val == NULL) return;
+  // Otherwise, log it.
+  G1SATBCardTableLoggingModRefBS* g1_bs =
+    (G1SATBCardTableLoggingModRefBS*)Universe::heap()->barrier_set();
+  g1_bs->write_ref_field_work(field, new_val);
+}
+
+void
+G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) {
+  jbyte* byte = byte_for(mr.start());
+  jbyte* last_byte = byte_for(mr.last());
+  Thread* thr = Thread::current();
+  if (whole_heap) {
+    while (byte <= last_byte) {
+      *byte = dirty_card;
+      byte++;
+    }
+  } else {
+    // Enqueue if necessary.
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          jt->dirty_card_queue().enqueue(byte);
+        }
+        byte++;
+      }
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          _dcqs.shared_dirty_card_queue()->enqueue(byte);
+        }
+        byte++;
+      }
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+class DirtyCardQueueSet;
+
+// This barrier is specialized to use a logging barrier to support
+// snapshot-at-the-beginning marking.
+
+class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
+private:
+  // Add "pre_val" to a set of objects that may have been disconnected from the
+  // pre-marking object graph.
+  static void enqueue(oop pre_val);
+
+public:
+  G1SATBCardTableModRefBS(MemRegion whole_heap,
+                          int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCT || CardTableModRefBS::is_a(bsn);
+  }
+
+  virtual bool has_write_ref_pre_barrier() { return true; }
+
+  // This notes that we don't need to access any BarrierSet data
+  // structures, so this can be called from a static context.
+  static void write_ref_field_pre_static(void* field, oop newVal) {
+    assert(!UseCompressedOops, "Else needs to be templatized");
+    oop preVal = *((oop*)field);
+    if (preVal != NULL) {
+      enqueue(preVal);
+    }
+  }
+
+  // When we know the current java thread:
+  static void write_ref_field_pre_static(void* field, oop newVal,
+                                         JavaThread* jt);
+
+  // We export this to make it available in cases where the static
+  // type of the barrier set is known.  Note that it is non-virtual.
+  inline void inline_write_ref_field_pre(void* field, oop newVal) {
+    write_ref_field_pre_static(field, newVal);
+  }
+
+  // This is the more general virtual version.
+  void write_ref_field_pre_work(void* field, oop new_val) {
+    inline_write_ref_field_pre(field, new_val);
+  }
+
+  virtual void write_ref_array_pre(MemRegion mr);
+
+};
+
+// Adds card-table logging to the post-barrier.
+// Usual invariant: all dirty cards are logged in the DirtyCardQueueSet.
+class G1SATBCardTableLoggingModRefBS: public G1SATBCardTableModRefBS {
+ private:
+  DirtyCardQueueSet& _dcqs;
+ public:
+  G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                                 int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCTLogging ||
+      G1SATBCardTableModRefBS::is_a(bsn);
+  }
+
+  void write_ref_field_work(void* field, oop new_val);
+
+  // Can be called from static contexts.
+  static void write_ref_field_static(void* field, oop new_val);
+
+  // NB: if you do a whole-heap invalidation, the "usual invariant" defined
+  // above no longer applies.
+  void invalidate(MemRegion mr, bool whole_heap = false);
+
+  void write_region_work(MemRegion mr)    { invalidate(mr); }
+  void write_ref_array_work(MemRegion mr) { invalidate(mr); }
+
+
+};
+
+
+#endif // SERIALGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1_globals.cpp.incl"
+
+G1_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \
+         MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG,     \
+         MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \
+         MATERIALIZE_NOTPRODUCT_FLAG,  \
+         MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Defines all globals flags used by the garbage-first compiler.
+//
+
+#define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \
+                                                                            \
+  product(intx, ParallelGCG1AllocBufferSize, 4*K,                           \
+          "Size of parallel G1 allocation buffers in to-space.")            \
+                                                                            \
+  product(intx, G1TimeSliceMS, 500,                                         \
+          "Time slice for MMU specification")                               \
+                                                                            \
+  product(intx, G1MaxPauseTimeMS, 200,                                      \
+          "Max GC time per MMU time slice")                                 \
+                                                                            \
+  product(intx, G1ConfidencePerc, 50,                                       \
+          "Confidence level for MMU/pause predictions")                     \
+                                                                            \
+  product(intx, G1MarkingOverheadPerc, 0,                                   \
+          "Overhead of concurrent marking")                                 \
+                                                                            \
+  product(bool, G1AccountConcurrentOverhead, false,                         \
+          "Whether soft real-time compliance in G1 will take into account"  \
+          "concurrent overhead")                                            \
+                                                                            \
+  product(intx, G1YoungGenSize, 0,                                          \
+          "Size of the G1 young generation, 0 is the adaptive policy")      \
+                                                                            \
+  product(bool, G1Gen, true,                                                \
+          "If true, it will enable the generational G1")                    \
+                                                                            \
+  develop(intx, G1GCPct, 10,                                                \
+          "The desired percent time spent on GC")                           \
+                                                                            \
+  product(intx, G1PolicyVerbose, 0,                                         \
+          "The verbosity level on G1 policy decisions")                     \
+                                                                            \
+  develop(bool, G1UseHRIntoRS, true,                                        \
+          "Determines whether the 'advanced' HR Into rem set is used.")     \
+                                                                            \
+  product(bool, G1VerifyRemSet, false,                                      \
+          "If true, verify the rem set functioning at each GC")             \
+                                                                            \
+  product(bool, G1VerifyConcMark, false,                                    \
+          "If true, verify the conc marking code at full GC time")          \
+                                                                            \
+  develop(intx, G1MarkingVerboseLevel, 0,                                   \
+          "Level (0-4) of verboseness of the marking code")                 \
+                                                                            \
+  develop(bool, G1VerifyConcMarkPrintReachable, true,                       \
+          "If conc mark verification fails, print reachable objects")       \
+                                                                            \
+  develop(bool, G1TraceMarkStackOverflow, false,                            \
+          "If true, extra debugging code for CM restart for ovflw.")        \
+                                                                            \
+  product(bool, G1VerifyMarkingInEvac, false,                               \
+          "If true, verify marking info during evacuation")                 \
+                                                                            \
+  develop(intx, G1PausesBtwnConcMark, -1,                                   \
+          "If positive, fixed number of pauses between conc markings")      \
+                                                                            \
+  product(intx, G1EfficiencyPctCausesMark, 80,                              \
+          "The cum gc efficiency since mark fall-off that causes "          \
+          "new marking")                                                    \
+                                                                            \
+  product(bool, TraceConcurrentMark, false,                                 \
+          "Trace concurrent mark")                                          \
+                                                                            \
+  product(bool, SummarizeG1ConcMark, false,                                 \
+          "Summarize concurrent mark info")                                 \
+                                                                            \
+  product(bool, SummarizeG1RSStats, false,                                  \
+          "Summarize remembered set processing info")                       \
+                                                                            \
+  product(bool, SummarizeG1ZFStats, false,                                  \
+          "Summarize zero-filling info")                                    \
+                                                                            \
+  product(bool, TraceG1Refine, false,                                       \
+          "Trace G1 concurrent refinement")                                 \
+                                                                            \
+  develop(bool, G1ConcMark, true,                                           \
+          "If true, run concurrent marking for G1")                         \
+                                                                            \
+  product(intx, G1CMStackSize, 2 * 1024 * 1024,                             \
+          "Size of the mark stack for concurrent marking.")                 \
+                                                                            \
+  product(intx, G1CMRegionStackSize, 1024 * 1024,                           \
+          "Size of the region stack for concurrent marking.")               \
+                                                                            \
+  develop(bool, G1ConcRefine, true,                                         \
+          "If true, run concurrent rem set refinement for G1")              \
+                                                                            \
+  develop(intx, G1ConcRefineTargTraversals, 4,                              \
+          "Number of concurrent refinement we try to achieve")              \
+                                                                            \
+  develop(intx, G1ConcRefineInitialDelta, 4,                                \
+          "Number of heap regions of alloc ahead of starting collection "   \
+          "pause to start concurrent refinement (initially)")               \
+                                                                            \
+  product(bool, G1SmoothConcRefine, true,                                   \
+          "Attempts to smooth out the overhead of concurrent refinement")   \
+                                                                            \
+  develop(bool, G1ConcZeroFill, true,                                       \
+          "If true, run concurrent zero-filling thread")                    \
+                                                                            \
+  develop(intx, G1ConcZFMaxRegions, 1,                                      \
+          "Stop zero-filling when # of zf'd regions reaches")               \
+                                                                            \
+  product(intx, G1SteadyStateUsed, 90,                                      \
+          "If non-0, try to maintain 'used' at this pct (of max)")          \
+                                                                            \
+  product(intx, G1SteadyStateUsedDelta, 30,                                 \
+          "If G1SteadyStateUsed is non-0, then do pause this number of "    \
+          "of percentage points earlier if no marking is in progress.")     \
+                                                                            \
+  develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
+          "If true, count frac of ptr writes with null pre-vals.")          \
+                                                                            \
+  product(intx, G1SATBLogBufferSize, 1*K,                                   \
+          "Number of entries in an SATB log buffer.")                       \
+                                                                            \
+  product(intx, G1SATBProcessCompletedThreshold, 20,                        \
+          "Number of completed buffers that triggers log processing.")      \
+                                                                            \
+  develop(intx, G1ExtraRegionSurvRate, 33,                                  \
+          "If the young survival rate is S, and there's room left in "      \
+          "to-space, we will allow regions whose survival rate is up to "   \
+          "S + (1 - S)*X, where X is this parameter (as a fraction.)")      \
+                                                                            \
+  develop(intx, G1InitYoungSurvRatio, 50,                                   \
+          "Expected Survival Rate for newly allocated bytes")               \
+                                                                            \
+  develop(bool, G1SATBPrintStubs, false,                                    \
+          "If true, print generated stubs for the SATB barrier")            \
+                                                                            \
+  product(intx, G1ExpandByPctOfAvail, 20,                                   \
+          "When expanding, % of uncommitted space to claim.")               \
+                                                                            \
+  develop(bool, G1RSBarrierRegionFilter, true,                              \
+          "If true, generate region filtering code in RS barrier")          \
+                                                                            \
+  develop(bool, G1RSBarrierNullFilter, true,                                \
+          "If true, generate null-pointer filtering code in RS barrier")    \
+                                                                            \
+  develop(bool, G1PrintCTFilterStats, false,                                \
+          "If true, print stats on RS filtering effectiveness")             \
+                                                                            \
+  develop(bool, G1RSBarrierUseQueue, true,                                  \
+          "If true, use queueing RS barrier")                               \
+                                                                            \
+  develop(bool, G1RSLogCheckCardTable, false,                               \
+          "If true, verify that no dirty cards remain after RS log "        \
+          "processing.")                                                    \
+                                                                            \
+  product(intx, G1MinPausesBetweenMarks, 2,                                 \
+          "Number of inefficient pauses necessary to trigger marking.")     \
+                                                                            \
+  product(intx, G1InefficientPausePct, 80,                                  \
+          "Threshold of an 'inefficient' pauses (as % of cum efficiency.")  \
+                                                                            \
+  product(intx, G1RSPopLimit, 32768,                                        \
+          "Limit that defines popularity.  Should go away! XXX")            \
+                                                                            \
+  develop(bool, G1RSCountHisto, false,                                      \
+          "If true, print a histogram of RS occupancies after each pause")  \
+                                                                            \
+  product(intx, G1ObjPopLimit, 256,                                         \
+          "Limit that defines popularity for an object.")                   \
+                                                                            \
+  product(bool, G1TraceFileOverwrite, false,                                \
+          "Allow the trace file to be overwritten")                         \
+                                                                            \
+  develop(intx, G1PrintRegionLivenessInfo, 0,                               \
+          "When > 0, print the occupancies of the <n> best and worst"       \
+          "regions.")                                                       \
+                                                                            \
+  develop(bool, G1TracePopularity, false,                                   \
+          "When true, provide detailed tracing of popularity.")             \
+                                                                            \
+  product(bool, G1SummarizePopularity, false,                               \
+          "When true, provide end-of-run-summarization of popularity.")     \
+                                                                            \
+  product(intx, G1NumPopularRegions, 1,                                     \
+          "Number of regions reserved to hold popular objects.  "           \
+          "Should go away later.")                                          \
+                                                                            \
+  develop(bool, G1PrintParCleanupStats, false,                              \
+          "When true, print extra stats about parallel cleanup.")           \
+                                                                            \
+  product(bool, G1DoAgeCohortChecks, false,                                 \
+          "When true, check well-formedness of age cohort structures.")     \
+                                                                            \
+  develop(bool, G1DisablePreBarrier, false,                                 \
+          "Disable generation of pre-barrier (i.e., marking barrier)   ")   \
+                                                                            \
+  develop(bool, G1DisablePostBarrier, false,                                \
+          "Disable generation of post-barrier (i.e., RS barrier)   ")       \
+                                                                            \
+  product(intx, G1DirtyCardQueueMax, 30,                                    \
+          "Maximum number of completed RS buffers before mutator threads "  \
+          "start processing them.")                                         \
+                                                                            \
+  develop(intx, G1ConcRSLogCacheSize, 10,                                   \
+          "Log base 2 of the length of conc RS hot-card cache.")            \
+                                                                            \
+  product(bool, G1ConcRSCountTraversals, false,                             \
+          "If true, gather data about the number of times CR traverses "    \
+          "cards ")                                                         \
+                                                                            \
+  product(intx, G1ConcRSHotCardLimit, 4,                                    \
+          "The threshold that defines (>=) a hot card.")                    \
+                                                                            \
+  develop(bool, G1PrintOopAppls, false,                                     \
+          "When true, print applications of closures to external locs.")    \
+                                                                            \
+  product(intx, G1LogRSRegionEntries, 7,                                    \
+          "Log_2 of max number of regions for which we keep bitmaps.")      \
+                                                                            \
+  develop(bool, G1RecordHRRSOops, false,                                    \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(bool, G1RecordHRRSEvents, false,                                  \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(intx, G1MaxVerifyFailures, -1,                                    \
+          "The maximum number of verification failrues to print.  "         \
+          "-1 means print all.")                                            \
+                                                                            \
+  develop(bool, G1ScrubRemSets, true,                                       \
+          "When true, do RS scrubbing after cleanup.")                      \
+                                                                            \
+  develop(bool, G1RSScrubVerbose, false,                                    \
+          "When true, do RS scrubbing with verbose output.")                \
+                                                                            \
+  develop(bool, G1YoungSurvRateVerbose, false,                              \
+          "print out the survival rate of young regions according to age.") \
+                                                                            \
+  develop(intx, G1YoungSurvRateNumRegionsSummary, 0,                        \
+          "the number of regions for which we'll print a surv rate "        \
+          "summary.")                                                       \
+                                                                            \
+  product(bool, G1UseScanOnlyPrefix, false,                                 \
+          "It determines whether the system will calculate an optimum "     \
+          "scan-only set.")                                                 \
+                                                                            \
+  product(intx, G1MinReservePerc, 10,                                       \
+          "It determines the minimum reserve we should have in the heap "   \
+          "to minimize the probability of promotion failure.")              \
+                                                                            \
+  product(bool, G1TraceRegions, false,                                      \
+          "If set G1 will print information on which regions are being "    \
+          "allocated and which are reclaimed.")                             \
+                                                                            \
+  develop(bool, G1HRRSUseSparseTable, true,                                 \
+          "When true, use sparse table to save space.")                     \
+                                                                            \
+  develop(bool, G1HRRSFlushLogBuffersOnVerify, false,                       \
+          "Forces flushing of log buffers before verification.")            \
+                                                                            \
+  product(intx, G1MaxSurvivorRegions, 0,                                    \
+          "The maximum number of survivor regions")
+
+G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The following OopClosure types get specialized versions of
+// "oop_oop_iterate" that invoke the closures' do_oop methods
+// non-virtually, using a mechanism defined in this file.  Extend these
+// macros in the obvious way to add specializations for new closures.
+
+// Forward declarations.
+enum G1Barrier {
+  G1BarrierNone, G1BarrierRS, G1BarrierEvac
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure;
+class G1ParScanClosure;
+
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+class FilterIntoCSClosure;
+class FilterOutOfRegionClosure;
+class FilterInHeapRegionAndIntoCSClosure;
+class FilterAndMarkInHeapRegionAndIntoCSClosure;
+class G1ScanAndBalanceClosure;
+
+#ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
+#error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
+      f(G1ParScanHeapEvacClosure,_nv)                   \
+      f(G1ParScanClosure,_nv)                           \
+      f(FilterIntoCSClosure,_nv)                        \
+      f(FilterOutOfRegionClosure,_nv)                   \
+      f(FilterInHeapRegionAndIntoCSClosure,_nv)         \
+      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)  \
+      f(G1ScanAndBalanceClosure,_nv)
+
+#ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
+#error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,873 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegion.cpp.incl"
+
+HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1,
+                                 HeapRegion* hr, OopClosure* cl,
+                                 CardTableModRefBS::PrecisionStyle precision,
+                                 FilterKind fk) :
+  ContiguousSpaceDCTOC(hr, cl, precision, NULL),
+  _hr(hr), _fk(fk), _g1(g1)
+{}
+
+FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
+                                                   OopClosure* oc) :
+  _r_bottom(r->bottom()), _r_end(r->end()),
+  _oc(oc), _out_of_region(0)
+{}
+
+class VerifyLiveClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _bs;
+  oop _containing_obj;
+  bool _failures;
+  int _n_failures;
+public:
+  VerifyLiveClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
+    _failures(false), _n_failures(0)
+  {
+    BarrierSet* bs = _g1h->barrier_set();
+    if (bs->is_a(BarrierSet::CardTableModRef))
+      _bs = (CardTableModRefBS*)bs;
+  }
+
+  void set_containing_obj(oop obj) {
+    _containing_obj = obj;
+  }
+
+  bool failures() { return _failures; }
+  int n_failures() { return _n_failures; }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    assert(_containing_obj != NULL, "Precondition");
+    assert(!_g1h->is_obj_dead(_containing_obj), "Precondition");
+    oop obj = *p;
+    if (obj != NULL) {
+      bool failed = false;
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) {
+        if (!_failures) {
+          gclog_or_tty->print_cr("");
+          gclog_or_tty->print_cr("----------");
+        }
+        if (!_g1h->is_in_closed_subset(obj)) {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to obj "PTR_FORMAT
+                        " not in the heap.",
+                        p, (void*) _containing_obj, (void*) obj);
+        } else {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to dead obj "PTR_FORMAT".",
+                        p, (void*) _containing_obj, (void*) obj);
+        }
+        gclog_or_tty->print_cr("Live obj:");
+        _containing_obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("Bad referent:");
+        obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("----------");
+        _failures = true;
+        failed = true;
+        _n_failures++;
+      }
+
+      if (!_g1h->full_collection()) {
+        HeapRegion* from = _g1h->heap_region_containing(p);
+        HeapRegion* to   = _g1h->heap_region_containing(*p);
+        if (from != NULL && to != NULL &&
+            from != to &&
+            !to->popular() &&
+            !to->isHumongous()) {
+          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
+          jbyte cv_field = *_bs->byte_for_const(p);
+          const jbyte dirty = CardTableModRefBS::dirty_card_val();
+
+          bool is_bad = !(from->is_young()
+                          || to->rem_set()->contains_reference(p)
+                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
+                              (_containing_obj->is_objArray() ?
+                                  cv_field == dirty
+                               : cv_obj == dirty || cv_field == dirty));
+          if (is_bad) {
+            if (!_failures) {
+              gclog_or_tty->print_cr("");
+              gclog_or_tty->print_cr("----------");
+            }
+            gclog_or_tty->print_cr("Missing rem set entry:");
+            gclog_or_tty->print_cr("Field "PTR_FORMAT
+                          " of obj "PTR_FORMAT
+                          ", in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT"),",
+                          p, (void*) _containing_obj,
+                          from->hrs_index(),
+                          from->bottom(),
+                          from->end());
+            _containing_obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("points to obj "PTR_FORMAT
+                          " in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT").",
+                          (void*) obj, to->hrs_index(),
+                          to->bottom(), to->end());
+            obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
+                          cv_obj, cv_field);
+            gclog_or_tty->print_cr("----------");
+            _failures = true;
+            if (!failed) _n_failures++;
+          }
+        }
+      }
+    }
+  }
+};
+
+template<class ClosureType>
+HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
+                               HeapRegion* hr,
+                               HeapWord* cur, HeapWord* top) {
+  oop cur_oop = oop(cur);
+  int oop_size = cur_oop->size();
+  HeapWord* next_obj = cur + oop_size;
+  while (next_obj < top) {
+    // Keep filtering the remembered set.
+    if (!g1h->is_obj_dead(cur_oop, hr)) {
+      // Bottom lies entirely below top, so we can call the
+      // non-memRegion version of oop_iterate below.
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        cur_oop->oop_iterate(&vl_cl);
+      }
+#endif
+      cur_oop->oop_iterate(cl);
+    }
+    cur = next_obj;
+    cur_oop = oop(cur);
+    oop_size = cur_oop->size();
+    next_obj = cur + oop_size;
+  }
+  return cur;
+}
+
+void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr,
+                                              HeapWord* bottom,
+                                              HeapWord* top,
+                                              OopClosure* cl) {
+  G1CollectedHeap* g1h = _g1;
+
+  int oop_size;
+
+  OopClosure* cl2 = cl;
+  FilterIntoCSClosure intoCSFilt(this, g1h, cl);
+  FilterOutOfRegionClosure outOfRegionFilt(_hr, cl);
+  switch (_fk) {
+  case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
+  case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
+  }
+
+  // Start filtering what we add to the remembered set. If the object is
+  // not considered dead, either because it is marked (in the mark bitmap)
+  // or it was allocated after marking finished, then we add it. Otherwise
+  // we can safely ignore the object.
+  if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+    if (G1VerifyMarkingInEvac) {
+      VerifyLiveClosure vl_cl(g1h);
+      oop(bottom)->oop_iterate(&vl_cl, mr);
+    }
+#endif
+    oop_size = oop(bottom)->oop_iterate(cl2, mr);
+  } else {
+    oop_size = oop(bottom)->size();
+  }
+
+  bottom += oop_size;
+
+  if (bottom < top) {
+    // We replicate the loop below for several kinds of possible filters.
+    switch (_fk) {
+    case NoFilterKind:
+      bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top);
+      break;
+    case IntoCSFilterKind: {
+      FilterIntoCSClosure filt(this, g1h, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    case OutOfRegionFilterKind: {
+      FilterOutOfRegionClosure filt(_hr, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+
+    // Last object. Need to do dead-obj filtering here too.
+    if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        oop(bottom)->oop_iterate(&vl_cl, mr);
+      }
+#endif
+      oop(bottom)->oop_iterate(cl2, mr);
+    }
+  }
+}
+
+void HeapRegion::reset_after_compaction() {
+  G1OffsetTableContigSpace::reset_after_compaction();
+  // After a compaction the mark bitmap is invalid, so we must
+  // treat all objects as being inside the unmarked area.
+  zero_marked_bytes();
+  init_top_at_mark_start();
+}
+
+DirtyCardToOopClosure*
+HeapRegion::new_dcto_closure(OopClosure* cl,
+                             CardTableModRefBS::PrecisionStyle precision,
+                             HeapRegionDCTOC::FilterKind fk) {
+  return new HeapRegionDCTOC(G1CollectedHeap::heap(),
+                             this, cl, precision, fk);
+}
+
+void HeapRegion::hr_clear(bool par, bool clear_space) {
+  _humongous_type = NotHumongous;
+  _humongous_start_region = NULL;
+  _in_collection_set = false;
+  _is_gc_alloc_region = false;
+
+  // Age stuff (if parallel, this will be done separately, since it needs
+  // to be sequential).
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  set_young_index_in_cset(-1);
+  uninstall_surv_rate_group();
+  set_young_type(NotYoung);
+
+  // In case it had been the start of a humongous sequence, reset its end.
+  set_end(_orig_end);
+
+  if (!par) {
+    // If this is parallel, this will be done later.
+    HeapRegionRemSet* hrrs = rem_set();
+    if (hrrs != NULL) hrrs->clear();
+    _claimed = InitialClaimValue;
+  }
+  zero_marked_bytes();
+  set_sort_index(-1);
+  if ((uintptr_t)bottom() >= (uintptr_t)g1h->popular_object_boundary())
+    set_popular(false);
+
+  _offsets.resize(HeapRegion::GrainWords);
+  init_top_at_mark_start();
+  if (clear_space) clear(SpaceDecorator::Mangle);
+}
+
+// <PREDICTION>
+void HeapRegion::calc_gc_efficiency() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _gc_efficiency = (double) garbage_bytes() /
+                            g1h->predict_region_elapsed_time_ms(this, false);
+}
+// </PREDICTION>
+
+void HeapRegion::set_startsHumongous() {
+  _humongous_type = StartsHumongous;
+  _humongous_start_region = this;
+  assert(end() == _orig_end, "Should be normal before alloc.");
+}
+
+bool HeapRegion::claimHeapRegion(jint claimValue) {
+  jint current = _claimed;
+  if (current != claimValue) {
+    jint res = Atomic::cmpxchg(claimValue, &_claimed, current);
+    if (res == current) {
+      return true;
+    }
+  }
+  return false;
+}
+
+HeapWord* HeapRegion::next_block_start_careful(HeapWord* addr) {
+  HeapWord* low = addr;
+  HeapWord* high = end();
+  while (low < high) {
+    size_t diff = pointer_delta(high, low);
+    // Must add one below to bias toward the high amount.  Otherwise, if
+  // "high" were at the desired value, and "low" were one less, we
+    // would not converge on "high".  This is not symmetric, because
+    // we set "high" to a block start, which might be the right one,
+    // which we don't do for "low".
+    HeapWord* middle = low + (diff+1)/2;
+    if (middle == high) return high;
+    HeapWord* mid_bs = block_start_careful(middle);
+    if (mid_bs < addr) {
+      low = middle;
+    } else {
+      high = mid_bs;
+    }
+  }
+  assert(low == high && low >= addr, "Didn't work.");
+  return low;
+}
+
+void HeapRegion::set_next_on_unclean_list(HeapRegion* r) {
+  assert(r == NULL || r->is_on_unclean_list(), "Malformed unclean list.");
+  _next_in_special_set = r;
+}
+
+void HeapRegion::set_on_unclean_list(bool b) {
+  _is_on_unclean_list = b;
+}
+
+void HeapRegion::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
+  G1OffsetTableContigSpace::initialize(mr, false, mangle_space);
+  hr_clear(false/*par*/, clear_space);
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+HeapRegion::
+HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+                     MemRegion mr, bool is_zeroed)
+  : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed),
+    _next_fk(HeapRegionDCTOC::NoFilterKind),
+    _hrs_index(-1),
+    _humongous_type(NotHumongous), _humongous_start_region(NULL),
+    _in_collection_set(false), _is_gc_alloc_region(false),
+    _is_on_free_list(false), _is_on_unclean_list(false),
+    _next_in_special_set(NULL), _orig_end(NULL),
+    _claimed(InitialClaimValue), _evacuation_failed(false),
+    _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1),
+    _popularity(NotPopular),
+    _young_type(NotYoung), _next_young_region(NULL),
+    _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1),
+    _rem_set(NULL), _zfs(NotZeroFilled)
+{
+  _orig_end = mr.end();
+  // Note that initialize() will set the start of the unmarked area of the
+  // region.
+  this->initialize(mr, !is_zeroed, SpaceDecorator::Mangle);
+  set_top(bottom());
+  set_saved_mark();
+
+  _rem_set =  new HeapRegionRemSet(sharedOffsetArray, this);
+
+  assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
+  // In case the region is allocated during a pause, note the top.
+  // We haven't done any counting on a brand new region.
+  _top_at_conc_mark_count = bottom();
+}
+
+class NextCompactionHeapRegionClosure: public HeapRegionClosure {
+  const HeapRegion* _target;
+  bool _target_seen;
+  HeapRegion* _last;
+  CompactibleSpace* _res;
+public:
+  NextCompactionHeapRegionClosure(const HeapRegion* target) :
+    _target(target), _target_seen(false), _res(NULL) {}
+  bool doHeapRegion(HeapRegion* cur) {
+    if (_target_seen) {
+      if (!cur->isHumongous()) {
+        _res = cur;
+        return true;
+      }
+    } else if (cur == _target) {
+      _target_seen = true;
+    }
+    return false;
+  }
+  CompactibleSpace* result() { return _res; }
+};
+
+CompactibleSpace* HeapRegion::next_compaction_space() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // cast away const-ness
+  HeapRegion* r = (HeapRegion*) this;
+  NextCompactionHeapRegionClosure blk(r);
+  g1h->heap_region_iterate_from(r, &blk);
+  return blk.result();
+}
+
+void HeapRegion::set_continuesHumongous(HeapRegion* start) {
+  // The order is important here.
+  start->add_continuingHumongousRegion(this);
+  _humongous_type = ContinuesHumongous;
+  _humongous_start_region = start;
+}
+
+void HeapRegion::add_continuingHumongousRegion(HeapRegion* cont) {
+  // Must join the blocks of the current H region seq with the block of the
+  // added region.
+  offsets()->join_blocks(bottom(), cont->bottom());
+  arrayOop obj = (arrayOop)(bottom());
+  obj->set_length((int) (obj->length() + cont->capacity()/jintSize));
+  set_end(cont->end());
+  set_top(cont->end());
+}
+
+void HeapRegion::save_marks() {
+  set_saved_mark();
+}
+
+void HeapRegion::oops_in_mr_iterate(MemRegion mr, OopClosure* cl) {
+  HeapWord* p = mr.start();
+  HeapWord* e = mr.end();
+  oop obj;
+  while (p < e) {
+    obj = oop(p);
+    p += obj->oop_iterate(cl);
+  }
+  assert(p == e, "bad memregion: doesn't end on obj boundary");
+}
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \
+void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \
+  ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl);              \
+}
+SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN)
+
+
+void HeapRegion::oop_before_save_marks_iterate(OopClosure* cl) {
+  oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
+}
+
+#ifdef DEBUG
+HeapWord* HeapRegion::allocate(size_t size) {
+  jint state = zero_fill_state();
+  assert(!G1CollectedHeap::heap()->allocs_are_zero_filled() ||
+         zero_fill_is_allocated(),
+         "When ZF is on, only alloc in ZF'd regions");
+  return G1OffsetTableContigSpace::allocate(size);
+}
+#endif
+
+void HeapRegion::set_zero_fill_state_work(ZeroFillState zfs) {
+  assert(top() == bottom() || zfs == Allocated,
+         "Region must be empty, or we must be setting it to allocated.");
+  assert(ZF_mon->owned_by_self() ||
+         Universe::heap()->is_gc_active(),
+         "Must hold the lock or be a full GC to modify.");
+  _zfs = zfs;
+}
+
+void HeapRegion::set_zero_fill_complete() {
+  set_zero_fill_state_work(ZeroFilled);
+  if (ZF_mon->owned_by_self()) {
+    ZF_mon->notify_all();
+  }
+}
+
+
+void HeapRegion::ensure_zero_filled() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  ensure_zero_filled_locked();
+}
+
+void HeapRegion::ensure_zero_filled_locked() {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  bool should_ignore_zf = SafepointSynchronize::is_at_safepoint();
+  assert(should_ignore_zf || Heap_lock->is_locked(),
+         "Either we're in a GC or we're allocating a region.");
+  switch (zero_fill_state()) {
+  case HeapRegion::NotZeroFilled:
+    set_zero_fill_in_progress(Thread::current());
+    {
+      ZF_mon->unlock();
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      ZF_mon->lock_without_safepoint_check();
+    }
+    // A trap.
+    guarantee(zero_fill_state() == HeapRegion::ZeroFilling
+              && zero_filler() == Thread::current(),
+              "AHA!  Tell Dave D if you see this...");
+    set_zero_fill_complete();
+    // gclog_or_tty->print_cr("Did sync ZF.");
+    ConcurrentZFThread::note_sync_zfs();
+    break;
+  case HeapRegion::ZeroFilling:
+    if (should_ignore_zf) {
+      // We can "break" the lock and take over the work.
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      set_zero_fill_complete();
+      ConcurrentZFThread::note_sync_zfs();
+      break;
+    } else {
+      ConcurrentZFThread::wait_for_ZF_completed(this);
+    }
+  case HeapRegion::ZeroFilled:
+    // Nothing to do.
+    break;
+  case HeapRegion::Allocated:
+    guarantee(false, "Should not call on allocated regions.");
+  }
+  assert(zero_fill_state() == HeapRegion::ZeroFilled, "Post");
+}
+
+HeapWord*
+HeapRegion::object_iterate_mem_careful(MemRegion mr,
+                                                 ObjectClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  mr = mr.intersection(used_region());
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  assert(cur <= mr.start()
+         && (oop(cur)->klass() == NULL ||
+             cur + oop(cur)->size() > mr.start()),
+         "postcondition of block_start");
+  oop obj;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    } else if (!g1h->is_obj_dead(obj)) {
+      cl->do_object(obj);
+    }
+    if (cl->abort()) return cur;
+    // The check above must occur before the operation below, since an
+    // abort might invalidate the "size" operation.
+    cur += obj->size();
+  }
+  return NULL;
+}
+
+HeapWord*
+HeapRegion::
+oops_on_card_seq_iterate_careful(MemRegion mr,
+                                     FilterOutOfRegionClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If we're within a stop-world GC, then we might look at a card in a
+  // GC alloc region that extends onto a GC LAB, which may not be
+  // parseable.  Stop such at the "saved_mark" of the region.
+  if (G1CollectedHeap::heap()->is_gc_active()) {
+    mr = mr.intersection(used_region_at_save_marks());
+  } else {
+    mr = mr.intersection(used_region());
+  }
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  assert(cur <= mr.start(), "Postcondition");
+
+  while (cur <= mr.start()) {
+    if (oop(cur)->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    }
+    // Otherwise...
+    int sz = oop(cur)->size();
+    if (cur + sz > mr.start()) break;
+    // Otherwise, go on.
+    cur = cur + sz;
+  }
+  oop obj;
+  obj = oop(cur);
+  // If we finish this loop...
+  assert(cur <= mr.start()
+         && obj->klass() != NULL
+         && cur + obj->size() > mr.start(),
+         "Loop postcondition");
+  if (!g1h->is_obj_dead(obj)) {
+    obj->oop_iterate(cl, mr);
+  }
+
+  HeapWord* next;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    };
+    // Otherwise:
+    next = (cur + obj->size());
+    if (!g1h->is_obj_dead(obj)) {
+      if (next < mr.end()) {
+        obj->oop_iterate(cl);
+      } else {
+        // this obj spans the boundary.  If it's an array, stop at the
+        // boundary.
+        if (obj->is_objArray()) {
+          obj->oop_iterate(cl, mr);
+        } else {
+          obj->oop_iterate(cl);
+        }
+      }
+    }
+    cur = next;
+  }
+  return NULL;
+}
+
+void HeapRegion::print() const { print_on(gclog_or_tty); }
+void HeapRegion::print_on(outputStream* st) const {
+  if (isHumongous()) {
+    if (startsHumongous())
+      st->print(" HS");
+    else
+      st->print(" HC");
+  } else {
+    st->print("   ");
+  }
+  if (in_collection_set())
+    st->print(" CS");
+  else if (is_gc_alloc_region())
+    st->print(" A ");
+  else
+    st->print("   ");
+  if (is_young())
+    st->print(is_scan_only() ? " SO" : (is_survivor() ? " SU" : " Y "));
+  else
+    st->print("   ");
+  if (is_empty())
+    st->print(" F");
+  else
+    st->print("  ");
+  st->print(" %d", _gc_time_stamp);
+  G1OffsetTableContigSpace::print_on(st);
+}
+
+#define OBJ_SAMPLE_INTERVAL 0
+#define BLOCK_SAMPLE_INTERVAL 100
+
+// This really ought to be commoned up into OffsetTableContigSpace somehow.
+// We would need a mechanism to make that code skip dead objects.
+
+void HeapRegion::verify(bool allow_dirty) const {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  HeapWord* p = bottom();
+  HeapWord* prev_p = NULL;
+  int objs = 0;
+  int blocks = 0;
+  VerifyLiveClosure vl_cl(g1);
+  while (p < top()) {
+    size_t size = oop(p)->size();
+    if (blocks == BLOCK_SAMPLE_INTERVAL) {
+      guarantee(p == block_start_const(p + (size/2)),
+                "check offset computation");
+      blocks = 0;
+    } else {
+      blocks++;
+    }
+    if (objs == OBJ_SAMPLE_INTERVAL) {
+      oop obj = oop(p);
+      if (!g1->is_obj_dead(obj, this)) {
+        obj->verify();
+        vl_cl.set_containing_obj(obj);
+        obj->oop_iterate(&vl_cl);
+        if (G1MaxVerifyFailures >= 0
+            && vl_cl.n_failures() >= G1MaxVerifyFailures) break;
+      }
+      objs = 0;
+    } else {
+      objs++;
+    }
+    prev_p = p;
+    p += size;
+  }
+  HeapWord* rend = end();
+  HeapWord* rtop = top();
+  if (rtop < rend) {
+    guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop,
+              "check offset computation");
+  }
+  if (vl_cl.failures()) {
+    gclog_or_tty->print_cr("Heap:");
+    G1CollectedHeap::heap()->print();
+    gclog_or_tty->print_cr("");
+  }
+  if (G1VerifyConcMark &&
+      G1VerifyConcMarkPrintReachable &&
+      vl_cl.failures()) {
+    g1->concurrent_mark()->print_prev_bitmap_reachable();
+  }
+  guarantee(!vl_cl.failures(), "should not have had any failures");
+  guarantee(p == top(), "end of last object must match end of space");
+}
+
+// G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
+// away eventually.
+
+void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
+  // false ==> we'll do the clearing if there's clearing to be done.
+  ContiguousSpace::initialize(mr, false, mangle_space);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+  if (clear_space) clear(mangle_space);
+}
+
+void G1OffsetTableContigSpace::clear(bool mangle_space) {
+  ContiguousSpace::clear(mangle_space);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+}
+
+void G1OffsetTableContigSpace::set_bottom(HeapWord* new_bottom) {
+  Space::set_bottom(new_bottom);
+  _offsets.set_bottom(new_bottom);
+}
+
+void G1OffsetTableContigSpace::set_end(HeapWord* new_end) {
+  Space::set_end(new_end);
+  _offsets.resize(new_end - bottom());
+}
+
+void G1OffsetTableContigSpace::print() const {
+  print_short();
+  gclog_or_tty->print_cr(" [" INTPTR_FORMAT ", " INTPTR_FORMAT ", "
+                INTPTR_FORMAT ", " INTPTR_FORMAT ")",
+                bottom(), top(), _offsets.threshold(), end());
+}
+
+HeapWord* G1OffsetTableContigSpace::initialize_threshold() {
+  return _offsets.initialize_threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start,
+                                                    HeapWord* end) {
+  _offsets.alloc_block(start, end);
+  return _offsets.threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::saved_mark_word() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" );
+  if (_gc_time_stamp < g1h->get_gc_time_stamp())
+    return top();
+  else
+    return ContiguousSpace::saved_mark_word();
+}
+
+void G1OffsetTableContigSpace::set_saved_mark() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
+
+  if (_gc_time_stamp < curr_gc_time_stamp) {
+    // The order of these is important, as another thread might be
+    // about to start scanning this region. If it does so after
+    // set_saved_mark and before _gc_time_stamp = ..., then the latter
+    // will be false, and it will pick up top() as the high water mark
+    // of region. If it does so after _gc_time_stamp = ..., then it
+    // will pick up the right saved_mark_word() as the high water mark
+    // of the region. Either way, the behaviour will be correct.
+    ContiguousSpace::set_saved_mark();
+    _gc_time_stamp = curr_gc_time_stamp;
+    OrderAccess::fence();
+  }
+}
+
+G1OffsetTableContigSpace::
+G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                         MemRegion mr, bool is_zeroed) :
+  _offsets(sharedOffsetArray, mr),
+  _par_alloc_lock(Mutex::leaf, "OffsetTableContigSpace par alloc lock", true),
+  _gc_time_stamp(0)
+{
+  _offsets.set_space(this);
+  initialize(mr, !is_zeroed, SpaceDecorator::Mangle);
+}
+
+size_t RegionList::length() {
+  size_t len = 0;
+  HeapRegion* cur = hd();
+  DEBUG_ONLY(HeapRegion* last = NULL);
+  while (cur != NULL) {
+    len++;
+    DEBUG_ONLY(last = cur);
+    cur = get_next(cur);
+  }
+  assert(last == tl(), "Invariant");
+  return len;
+}
+
+void RegionList::insert_before_head(HeapRegion* r) {
+  assert(well_formed(), "Inv");
+  set_next(r, hd());
+  _hd = r;
+  _sz++;
+  if (tl() == NULL) _tl = r;
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::prepend_list(RegionList* new_list) {
+  assert(well_formed(), "Precondition");
+  assert(new_list->well_formed(), "Precondition");
+  HeapRegion* new_tl = new_list->tl();
+  if (new_tl != NULL) {
+    set_next(new_tl, hd());
+    _hd = new_list->hd();
+    _sz += new_list->sz();
+    if (tl() == NULL) _tl = new_list->tl();
+  } else {
+    assert(new_list->hd() == NULL && new_list->sz() == 0, "Inv");
+  }
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::delete_after(HeapRegion* r) {
+  assert(well_formed(), "Precondition");
+  HeapRegion* next = get_next(r);
+  assert(r != NULL, "Precondition");
+  HeapRegion* next_tl = get_next(next);
+  set_next(r, next_tl);
+  dec_sz();
+  if (next == tl()) {
+    assert(next_tl == NULL, "Inv");
+    _tl = r;
+  }
+  assert(well_formed(), "Inv");
+}
+
+HeapRegion* RegionList::pop() {
+  assert(well_formed(), "Inv");
+  HeapRegion* res = hd();
+  if (res != NULL) {
+    _hd = get_next(res);
+    _sz--;
+    set_next(res, NULL);
+    if (sz() == 0) _tl = NULL;
+  }
+  assert(well_formed(), "Inv");
+  return res;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,937 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+// A HeapRegion is the smallest piece of a G1CollectedHeap that
+// can be collected independently.
+
+// NOTE: Although a HeapRegion is a Space, its
+// Space::initDirtyCardClosure method must not be called.
+// The problem is that the existence of this method breaks
+// the independence of barrier sets from remembered sets.
+// The solution is to remove this method from the definition
+// of a Space.
+
+class CompactibleSpace;
+class ContiguousSpace;
+class HeapRegionRemSet;
+class HeapRegionRemSetIterator;
+class HeapRegion;
+
+// A dirty card to oop closure for heap regions. It
+// knows how to get the G1 heap and how to use the bitmap
+// in the concurrent marker used by G1 to filter remembered
+// sets.
+
+class HeapRegionDCTOC : public ContiguousSpaceDCTOC {
+public:
+  // Specification of possible DirtyCardToOopClosure filtering.
+  enum FilterKind {
+    NoFilterKind,
+    IntoCSFilterKind,
+    OutOfRegionFilterKind
+  };
+
+protected:
+  HeapRegion* _hr;
+  FilterKind _fk;
+  G1CollectedHeap* _g1;
+
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               OopClosure* cl);
+
+  // We don't specialize this for FilteringClosure; filtering is handled by
+  // the "FilterKind" mechanism.  But we provide this to avoid a compiler
+  // warning.
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               FilteringClosure* cl) {
+    HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top,
+                                                       (OopClosure*)cl);
+  }
+
+  // Get the actual top of the area on which the closure will
+  // operate, given where the top is assumed to be (the end of the
+  // memory region passed to do_MemRegion) and where the object
+  // at the top is assumed to start. For example, an object may
+  // start at the top but actually extend past the assumed top,
+  // in which case the top becomes the end of the object.
+  HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) {
+    return ContiguousSpaceDCTOC::get_actual_top(top, top_obj);
+  }
+
+  // Walk the given memory region from bottom to (actual) top
+  // looking for objects and applying the oop closure (_cl) to
+  // them. The base implementation of this treats the area as
+  // blocks, where a block may or may not be an object. Sub-
+  // classes should override this to provide more accurate
+  // or possibly more efficient walking.
+  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) {
+    Filtering_DCTOC::walk_mem_region(mr, bottom, top);
+  }
+
+public:
+  HeapRegionDCTOC(G1CollectedHeap* g1,
+                  HeapRegion* hr, OopClosure* cl,
+                  CardTableModRefBS::PrecisionStyle precision,
+                  FilterKind fk);
+};
+
+
+// The complicating factor is that BlockOffsetTable diverged
+// significantly, and we need functionality that is only in the G1 version.
+// So I copied that code, which led to an alternate G1 version of
+// OffsetTableContigSpace.  If the two versions of BlockOffsetTable could
+// be reconciled, then G1OffsetTableContigSpace could go away.
+
+// The idea behind time stamps is the following. Doing a save_marks on
+// all regions at every GC pause is time consuming (if I remember
+// well, 10ms or so). So, we would like to do that only for regions
+// that are GC alloc regions. To achieve this, we use time
+// stamps. For every evacuation pause, G1CollectedHeap generates a
+// unique time stamp (essentially a counter that gets
+// incremented). Every time we want to call save_marks on a region,
+// we set the saved_mark_word to top and also copy the current GC
+// time stamp to the time stamp field of the space. Reading the
+// saved_mark_word involves checking the time stamp of the
+// region. If it is the same as the current GC time stamp, then we
+// can safely read the saved_mark_word field, as it is valid. If the
+// time stamp of the region is not the same as the current GC time
+// stamp, then we instead read top, as the saved_mark_word field is
+// invalid. Time stamps (on the regions and also on the
+// G1CollectedHeap) are reset at every cleanup (we iterate over
+// the regions anyway) and at the end of a Full GC. The current scheme
+// that uses sequential unsigned ints will fail only if we have 4b
+// evacuation pauses between two cleanups, which is _highly_ unlikely.
+
+class G1OffsetTableContigSpace: public ContiguousSpace {
+  friend class VMStructs;
+ protected:
+  G1BlockOffsetArrayContigSpace _offsets;
+  Mutex _par_alloc_lock;
+  volatile unsigned _gc_time_stamp;
+
+ public:
+  // Constructor.  If "is_zeroed" is true, the MemRegion "mr" may be
+  // assumed to contain zeros.
+  G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                           MemRegion mr, bool is_zeroed = false);
+
+  void set_bottom(HeapWord* value);
+  void set_end(HeapWord* value);
+
+  virtual HeapWord* saved_mark_word() const;
+  virtual void set_saved_mark();
+  void reset_gc_time_stamp() { _gc_time_stamp = 0; }
+
+  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void clear(bool mangle_space);
+
+  HeapWord* block_start(const void* p);
+  HeapWord* block_start_const(const void* p) const;
+
+  // Add offset table update.
+  virtual HeapWord* allocate(size_t word_size);
+  HeapWord* par_allocate(size_t word_size);
+
+  // MarkSweep support phase3
+  virtual HeapWord* initialize_threshold();
+  virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end);
+
+  virtual void print() const;
+};
+
+class HeapRegion: public G1OffsetTableContigSpace {
+  friend class VMStructs;
+ private:
+
+  enum HumongousType {
+    NotHumongous = 0,
+    StartsHumongous,
+    ContinuesHumongous
+  };
+
+  // The next filter kind that should be used for a "new_dcto_cl" call with
+  // the "traditional" signature.
+  HeapRegionDCTOC::FilterKind _next_fk;
+
+  // Requires that the region "mr" be dense with objects, and begin and end
+  // with an object.
+  void oops_in_mr_iterate(MemRegion mr, OopClosure* cl);
+
+  // The remembered set for this region.
+  // (Might want to make this "inline" later, to avoid some alloc failure
+  // issues.)
+  HeapRegionRemSet* _rem_set;
+
+  G1BlockOffsetArrayContigSpace* offsets() { return &_offsets; }
+
+ protected:
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int  _hrs_index;
+
+  HumongousType _humongous_type;
+  // For a humongous region, region in which it starts.
+  HeapRegion* _humongous_start_region;
+  // For the start region of a humongous sequence, it's original end().
+  HeapWord* _orig_end;
+
+  // True iff the region is in current collection_set.
+  bool _in_collection_set;
+
+    // True iff the region is on the unclean list, waiting to be zero filled.
+  bool _is_on_unclean_list;
+
+  // True iff the region is on the free list, ready for allocation.
+  bool _is_on_free_list;
+
+  // Is this or has it been an allocation region in the current collection
+  // pause.
+  bool _is_gc_alloc_region;
+
+  // True iff an attempt to evacuate an object in the region failed.
+  bool _evacuation_failed;
+
+  // A heap region may be a member one of a number of special subsets, each
+  // represented as linked lists through the field below.  Currently, these
+  // sets include:
+  //   The collection set.
+  //   The set of allocation regions used in a collection pause.
+  //   Spaces that may contain gray objects.
+  HeapRegion* _next_in_special_set;
+
+  // next region in the young "generation" region set
+  HeapRegion* _next_young_region;
+
+  // For parallel heapRegion traversal.
+  jint _claimed;
+
+  // We use concurrent marking to determine the amount of live data
+  // in each heap region.
+  size_t _prev_marked_bytes;    // Bytes known to be live via last completed marking.
+  size_t _next_marked_bytes;    // Bytes known to be live via in-progress marking.
+
+  // See "sort_index" method.  -1 means is not in the array.
+  int _sort_index;
+
+  // Means it has (or at least had) a very large RS, and should not be
+  // considered for membership in a collection set.
+  enum PopularityState {
+    NotPopular,
+    PopularPending,
+    Popular
+  };
+  PopularityState _popularity;
+
+  // <PREDICTION>
+  double _gc_efficiency;
+  // </PREDICTION>
+
+  enum YoungType {
+    NotYoung,                   // a region is not young
+    ScanOnly,                   // a region is young and scan-only
+    Young,                      // a region is young
+    Survivor                    // a region is young and it contains
+                                // survivor
+  };
+
+  YoungType _young_type;
+  int  _young_index_in_cset;
+  SurvRateGroup* _surv_rate_group;
+  int  _age_index;
+
+  // The start of the unmarked area. The unmarked area extends from this
+  // word until the top and/or end of the region, and is the part
+  // of the region for which no marking was done, i.e. objects may
+  // have been allocated in this part since the last mark phase.
+  // "prev" is the top at the start of the last completed marking.
+  // "next" is the top at the start of the in-progress marking (if any.)
+  HeapWord* _prev_top_at_mark_start;
+  HeapWord* _next_top_at_mark_start;
+  // If a collection pause is in progress, this is the top at the start
+  // of that pause.
+
+  // We've counted the marked bytes of objects below here.
+  HeapWord* _top_at_conc_mark_count;
+
+  void init_top_at_mark_start() {
+    assert(_prev_marked_bytes == 0 &&
+           _next_marked_bytes == 0,
+           "Must be called after zero_marked_bytes.");
+    HeapWord* bot = bottom();
+    _prev_top_at_mark_start = bot;
+    _next_top_at_mark_start = bot;
+    _top_at_conc_mark_count = bot;
+  }
+
+  jint _zfs;  // A member of ZeroFillState.  Protected by ZF_lock.
+  Thread* _zero_filler; // If _zfs is ZeroFilling, the thread that (last)
+                        // made it so.
+
+  void set_young_type(YoungType new_type) {
+    //assert(_young_type != new_type, "setting the same type" );
+    // TODO: add more assertions here
+    _young_type = new_type;
+  }
+
+ public:
+  // If "is_zeroed" is "true", the region "mr" can be assumed to contain zeros.
+  HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+             MemRegion mr, bool is_zeroed);
+
+  enum SomePublicConstants {
+    // HeapRegions are GrainBytes-aligned
+    // and have sizes that are multiples of GrainBytes.
+    LogOfHRGrainBytes = 20,
+    LogOfHRGrainWords = LogOfHRGrainBytes - LogHeapWordSize,
+    GrainBytes = 1 << LogOfHRGrainBytes,
+    GrainWords = 1 <<LogOfHRGrainWords,
+    MaxAge = 2, NoOfAges = MaxAge+1
+  };
+
+  enum ClaimValues {
+    InitialClaimValue     = 0,
+    FinalCountClaimValue  = 1,
+    NoteEndClaimValue     = 2,
+    ScrubRemSetClaimValue = 3,
+    ParVerifyClaimValue   = 4
+  };
+
+  // Concurrent refinement requires contiguous heap regions (in which TLABs
+  // might be allocated) to be zero-filled.  Each region therefore has a
+  // zero-fill-state.
+  enum ZeroFillState {
+    NotZeroFilled,
+    ZeroFilling,
+    ZeroFilled,
+    Allocated
+  };
+
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int hrs_index() const { return _hrs_index; }
+  void set_hrs_index(int index) { _hrs_index = index; }
+
+  // The number of bytes marked live in the region in the last marking phase.
+  size_t marked_bytes()    { return _prev_marked_bytes; }
+  // The number of bytes counted in the next marking.
+  size_t next_marked_bytes() { return _next_marked_bytes; }
+  // The number of bytes live wrt the next marking.
+  size_t next_live_bytes() {
+    return (top() - next_top_at_mark_start())
+      * HeapWordSize
+      + next_marked_bytes();
+  }
+
+  // A lower bound on the amount of garbage bytes in the region.
+  size_t garbage_bytes() {
+    size_t used_at_mark_start_bytes =
+      (prev_top_at_mark_start() - bottom()) * HeapWordSize;
+    assert(used_at_mark_start_bytes >= marked_bytes(),
+           "Can't mark more than we have.");
+    return used_at_mark_start_bytes - marked_bytes();
+  }
+
+  // An upper bound on the number of live bytes in the region.
+  size_t max_live_bytes() { return used() - garbage_bytes(); }
+
+  void add_to_marked_bytes(size_t incr_bytes) {
+    _next_marked_bytes = _next_marked_bytes + incr_bytes;
+    guarantee( _next_marked_bytes <= used(), "invariant" );
+  }
+
+  void zero_marked_bytes()      {
+    _prev_marked_bytes = _next_marked_bytes = 0;
+  }
+
+  bool isHumongous() const { return _humongous_type != NotHumongous; }
+  bool startsHumongous() const { return _humongous_type == StartsHumongous; }
+  bool continuesHumongous() const { return _humongous_type == ContinuesHumongous; }
+  // For a humongous region, region in which it starts.
+  HeapRegion* humongous_start_region() const {
+    return _humongous_start_region;
+  }
+
+  // Causes the current region to represent a humongous object spanning "n"
+  // regions.
+  virtual void set_startsHumongous();
+
+  // The regions that continue a humongous sequence should be added using
+  // this method, in increasing address order.
+  void set_continuesHumongous(HeapRegion* start);
+
+  void add_continuingHumongousRegion(HeapRegion* cont);
+
+  // If the region has a remembered set, return a pointer to it.
+  HeapRegionRemSet* rem_set() const {
+    return _rem_set;
+  }
+
+  // True iff the region is in current collection_set.
+  bool in_collection_set() const {
+    return _in_collection_set;
+  }
+  void set_in_collection_set(bool b) {
+    _in_collection_set = b;
+  }
+  HeapRegion* next_in_collection_set() {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->in_collection_set(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_in_collection_set(HeapRegion* r) {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(r == NULL || r->in_collection_set(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  // True iff it is or has been an allocation region in the current
+  // collection pause.
+  bool is_gc_alloc_region() const {
+    return _is_gc_alloc_region;
+  }
+  void set_is_gc_alloc_region(bool b) {
+    _is_gc_alloc_region = b;
+  }
+  HeapRegion* next_gc_alloc_region() {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_gc_alloc_region(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_gc_alloc_region(HeapRegion* r) {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(r == NULL || r->is_gc_alloc_region(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  bool is_reserved() {
+    return popular();
+  }
+
+  bool is_on_free_list() {
+    return _is_on_free_list;
+  }
+
+  void set_on_free_list(bool b) {
+    _is_on_free_list = b;
+  }
+
+  HeapRegion* next_from_free_list() {
+    assert(is_on_free_list(),
+           "Should only invoke on free space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_free_list(),
+           "Malformed Free List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_free_list(HeapRegion* r) {
+    assert(r == NULL || r->is_on_free_list(), "Malformed free list.");
+    _next_in_special_set = r;
+  }
+
+  bool is_on_unclean_list() {
+    return _is_on_unclean_list;
+  }
+
+  void set_on_unclean_list(bool b);
+
+  HeapRegion* next_from_unclean_list() {
+    assert(is_on_unclean_list(),
+           "Should only invoke on unclean space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_unclean_list(),
+           "Malformed unclean List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_unclean_list(HeapRegion* r);
+
+  HeapRegion* get_next_young_region() { return _next_young_region; }
+  void set_next_young_region(HeapRegion* hr) {
+    _next_young_region = hr;
+  }
+
+  // Allows logical separation between objects allocated before and after.
+  void save_marks();
+
+  // Reset HR stuff to default values.
+  void hr_clear(bool par, bool clear_space);
+
+  void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+
+  // Ensure that "this" is zero-filled.
+  void ensure_zero_filled();
+  // This one requires that the calling thread holds ZF_mon.
+  void ensure_zero_filled_locked();
+
+  // Get the start of the unmarked area in this region.
+  HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; }
+  HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; }
+
+  // Apply "cl->do_oop" to (the addresses of) all reference fields in objects
+  // allocated in the current region before the last call to "save_mark".
+  void oop_before_save_marks_iterate(OopClosure* cl);
+
+  // This call determines the "filter kind" argument that will be used for
+  // the next call to "new_dcto_cl" on this region with the "traditional"
+  // signature (i.e., the call below.)  The default, in the absence of a
+  // preceding call to this method, is "NoFilterKind", and a call to this
+  // method is necessary for each such call, or else it reverts to the
+  // default.
+  // (This is really ugly, but all other methods I could think of changed a
+  // lot of main-line code for G1.)
+  void set_next_filter_kind(HeapRegionDCTOC::FilterKind nfk) {
+    _next_fk = nfk;
+  }
+
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapRegionDCTOC::FilterKind fk);
+
+#if WHASSUP
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapWord* boundary) {
+    assert(boundary == NULL, "This arg doesn't make sense here.");
+    DirtyCardToOopClosure* res = new_dcto_closure(cl, precision, _next_fk);
+    _next_fk = HeapRegionDCTOC::NoFilterKind;
+    return res;
+  }
+#endif
+
+  //
+  // Note the start or end of marking. This tells the heap region
+  // that the collector is about to start or has finished (concurrently)
+  // marking the heap.
+  //
+
+  // Note the start of a marking phase. Record the
+  // start of the unmarked area of the region here.
+  void note_start_of_marking(bool during_initial_mark) {
+    init_top_at_conc_mark_count();
+    _next_marked_bytes = 0;
+    if (during_initial_mark && is_young() && !is_survivor())
+      _next_top_at_mark_start = bottom();
+    else
+      _next_top_at_mark_start = top();
+  }
+
+  // Note the end of a marking phase. Install the start of
+  // the unmarked area that was captured at start of marking.
+  void note_end_of_marking() {
+    _prev_top_at_mark_start = _next_top_at_mark_start;
+    _prev_marked_bytes = _next_marked_bytes;
+    _next_marked_bytes = 0;
+
+    guarantee(_prev_marked_bytes <=
+              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
+              "invariant");
+  }
+
+  // After an evacuation, we need to update _next_top_at_mark_start
+  // to be the current top.  Note this is only valid if we have only
+  // ever evacuated into this region.  If we evacuate, allocate, and
+  // then evacuate we are in deep doodoo.
+  void note_end_of_copying() {
+    assert(top() >= _next_top_at_mark_start,
+           "Increase only");
+    _next_top_at_mark_start = top();
+  }
+
+  // Returns "false" iff no object in the region was allocated when the
+  // last mark phase ended.
+  bool is_marked() { return _prev_top_at_mark_start != bottom(); }
+
+  // If "is_marked()" is true, then this is the index of the region in
+  // an array constructed at the end of marking of the regions in a
+  // "desirability" order.
+  int sort_index() {
+    return _sort_index;
+  }
+  void set_sort_index(int i) {
+    _sort_index = i;
+  }
+
+  void init_top_at_conc_mark_count() {
+    _top_at_conc_mark_count = bottom();
+  }
+
+  void set_top_at_conc_mark_count(HeapWord *cur) {
+    assert(bottom() <= cur && cur <= end(), "Sanity.");
+    _top_at_conc_mark_count = cur;
+  }
+
+  HeapWord* top_at_conc_mark_count() {
+    return _top_at_conc_mark_count;
+  }
+
+  void reset_during_compaction() {
+    guarantee( isHumongous() && startsHumongous(),
+               "should only be called for humongous regions");
+
+    zero_marked_bytes();
+    init_top_at_mark_start();
+  }
+
+  bool popular() { return _popularity == Popular; }
+  void set_popular(bool b) {
+    if (b) {
+      _popularity = Popular;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+  bool popular_pending() { return _popularity == PopularPending; }
+  void set_popular_pending(bool b) {
+    if (b) {
+      _popularity = PopularPending;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+
+  // <PREDICTION>
+  void calc_gc_efficiency(void);
+  double gc_efficiency() { return _gc_efficiency;}
+  // </PREDICTION>
+
+  bool is_young() const     { return _young_type != NotYoung; }
+  bool is_scan_only() const { return _young_type == ScanOnly; }
+  bool is_survivor() const  { return _young_type == Survivor; }
+
+  int  young_index_in_cset() const { return _young_index_in_cset; }
+  void set_young_index_in_cset(int index) {
+    assert( (index == -1) || is_young(), "pre-condition" );
+    _young_index_in_cset = index;
+  }
+
+  int age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    return _surv_rate_group->age_in_group(_age_index);
+  }
+
+  void recalculate_age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    _age_index = _surv_rate_group->recalculate_age_index(_age_index);
+  }
+
+  void record_surv_words_in_group(size_t words_survived) {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    int age_in_group = age_in_surv_rate_group();
+    _surv_rate_group->record_surviving_words(age_in_group, words_survived);
+  }
+
+  int age_in_surv_rate_group_cond() {
+    if (_surv_rate_group != NULL)
+      return age_in_surv_rate_group();
+    else
+      return -1;
+  }
+
+  SurvRateGroup* surv_rate_group() {
+    return _surv_rate_group;
+  }
+
+  void install_surv_rate_group(SurvRateGroup* surv_rate_group) {
+    assert( surv_rate_group != NULL, "pre-condition" );
+    assert( _surv_rate_group == NULL, "pre-condition" );
+    assert( is_young(), "pre-condition" );
+
+    _surv_rate_group = surv_rate_group;
+    _age_index = surv_rate_group->next_age_index();
+  }
+
+  void uninstall_surv_rate_group() {
+    if (_surv_rate_group != NULL) {
+      assert( _age_index > -1, "pre-condition" );
+      assert( is_young(), "pre-condition" );
+
+      _surv_rate_group = NULL;
+      _age_index = -1;
+    } else {
+      assert( _age_index == -1, "pre-condition" );
+    }
+  }
+
+  void set_young() { set_young_type(Young); }
+
+  void set_scan_only() { set_young_type(ScanOnly); }
+
+  void set_survivor() { set_young_type(Survivor); }
+
+  void set_not_young() { set_young_type(NotYoung); }
+
+  // Determine if an object has been allocated since the last
+  // mark performed by the collector. This returns true iff the object
+  // is within the unmarked area of the region.
+  bool obj_allocated_since_prev_marking(oop obj) const {
+    return (HeapWord *) obj >= prev_top_at_mark_start();
+  }
+  bool obj_allocated_since_next_marking(oop obj) const {
+    return (HeapWord *) obj >= next_top_at_mark_start();
+  }
+
+  // For parallel heapRegion traversal.
+  bool claimHeapRegion(int claimValue);
+  jint claim_value() { return _claimed; }
+  // Use this carefully: only when you're sure no one is claiming...
+  void set_claim_value(int claimValue) { _claimed = claimValue; }
+
+  // Returns the "evacuation_failed" property of the region.
+  bool evacuation_failed() { return _evacuation_failed; }
+
+  // Sets the "evacuation_failed" property of the region.
+  void set_evacuation_failed(bool b) {
+    _evacuation_failed = b;
+
+    if (b) {
+      init_top_at_conc_mark_count();
+      _next_marked_bytes = 0;
+    }
+  }
+
+  // Requires that "mr" be entirely within the region.
+  // Apply "cl->do_object" to all objects that intersect with "mr".
+  // If the iteration encounters an unparseable portion of the region,
+  // or if "cl->abort()" is true after a closure application,
+  // terminate the iteration and return the address of the start of the
+  // subregion that isn't done.  (The two can be distinguished by querying
+  // "cl->abort()".)  Return of "NULL" indicates that the iteration
+  // completed.
+  HeapWord*
+  object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
+
+  HeapWord*
+  oops_on_card_seq_iterate_careful(MemRegion mr,
+                                   FilterOutOfRegionClosure* cl);
+
+  // The region "mr" is entirely in "this", and starts and ends at block
+  // boundaries. The caller declares that all the contained blocks are
+  // coalesced into one.
+  void declare_filled_region_to_BOT(MemRegion mr) {
+    _offsets.single_block(mr.start(), mr.end());
+  }
+
+  // A version of block start that is guaranteed to find *some* block
+  // boundary at or before "p", but does not object iteration, and may
+  // therefore be used safely when the heap is unparseable.
+  HeapWord* block_start_careful(const void* p) const {
+    return _offsets.block_start_careful(p);
+  }
+
+  // Requires that "addr" is within the region.  Returns the start of the
+  // first ("careful") block that starts at or after "addr", or else the
+  // "end" of the region if there is no such block.
+  HeapWord* next_block_start_careful(HeapWord* addr);
+
+  // Returns the zero-fill-state of the current region.
+  ZeroFillState zero_fill_state() { return (ZeroFillState)_zfs; }
+  bool zero_fill_is_allocated() { return _zfs == Allocated; }
+  Thread* zero_filler() { return _zero_filler; }
+
+  // Indicate that the contents of the region are unknown, and therefore
+  // might require zero-filling.
+  void set_zero_fill_needed() {
+    set_zero_fill_state_work(NotZeroFilled);
+  }
+  void set_zero_fill_in_progress(Thread* t) {
+    set_zero_fill_state_work(ZeroFilling);
+    _zero_filler = t;
+  }
+  void set_zero_fill_complete();
+  void set_zero_fill_allocated() {
+    set_zero_fill_state_work(Allocated);
+  }
+
+  void set_zero_fill_state_work(ZeroFillState zfs);
+
+  // This is called when a full collection shrinks the heap.
+  // We want to set the heap region to a value which says
+  // it is no longer part of the heap.  For now, we'll let "NotZF" fill
+  // that role.
+  void reset_zero_fill() {
+    set_zero_fill_state_work(NotZeroFilled);
+    _zero_filler = NULL;
+  }
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix)  \
+  virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
+  SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
+
+  CompactibleSpace* next_compaction_space() const;
+
+  virtual void reset_after_compaction();
+
+  void print() const;
+  void print_on(outputStream* st) const;
+
+  // Override
+  virtual void verify(bool allow_dirty) const;
+
+#ifdef DEBUG
+  HeapWord* allocate(size_t size);
+#endif
+};
+
+// HeapRegionClosure is used for iterating over regions.
+// Terminates the iteration when the "doHeapRegion" method returns "true".
+class HeapRegionClosure : public StackObj {
+  friend class HeapRegionSeq;
+  friend class G1CollectedHeap;
+
+  bool _complete;
+  void incomplete() { _complete = false; }
+
+ public:
+  HeapRegionClosure(): _complete(true) {}
+
+  // Typically called on each region until it returns true.
+  virtual bool doHeapRegion(HeapRegion* r) = 0;
+
+  // True after iteration if the closure was applied to all heap regions
+  // and returned "false" in all cases.
+  bool complete() { return _complete; }
+};
+
+// A linked lists of heap regions.  It leaves the "next" field
+// unspecified; that's up to subtypes.
+class RegionList {
+protected:
+  virtual HeapRegion* get_next(HeapRegion* chr) = 0;
+  virtual void set_next(HeapRegion* chr,
+                        HeapRegion* new_next) = 0;
+
+  HeapRegion* _hd;
+  HeapRegion* _tl;
+  size_t _sz;
+
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  RegionList() : _hd(NULL), _tl(NULL), _sz(0) {}
+public:
+  void reset() {
+    _hd = NULL;
+    _tl = NULL;
+    _sz = 0;
+  }
+  HeapRegion* hd() { return _hd; }
+  HeapRegion* tl() { return _tl; }
+  size_t sz() { return _sz; }
+  size_t length();
+
+  bool well_formed() {
+    return
+      ((hd() == NULL && tl() == NULL && sz() == 0)
+       || (hd() != NULL && tl() != NULL && sz() > 0))
+      && (sz() == length());
+  }
+  virtual void insert_before_head(HeapRegion* r);
+  void prepend_list(RegionList* new_list);
+  virtual HeapRegion* pop();
+  void dec_sz() { _sz--; }
+  // Requires that "r" is an element of the list, and is not the tail.
+  void delete_after(HeapRegion* r);
+};
+
+class EmptyNonHRegionList: public RegionList {
+protected:
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  EmptyNonHRegionList() : RegionList() {}
+
+public:
+  void insert_before_head(HeapRegion* r) {
+    //    assert(r->is_empty(), "Better be empty");
+    assert(!r->isHumongous(), "Better not be humongous.");
+    RegionList::insert_before_head(r);
+  }
+  void prepend_list(EmptyNonHRegionList* new_list) {
+    //    assert(new_list->hd() == NULL || new_list->hd()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->hd() == NULL || !new_list->hd()->isHumongous(),
+           "Better not be humongous.");
+    //    assert(new_list->tl() == NULL || new_list->tl()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->tl() == NULL || !new_list->tl()->isHumongous(),
+           "Better not be humongous.");
+    RegionList::prepend_list(new_list);
+  }
+};
+
+class UncleanRegionList: public EmptyNonHRegionList {
+public:
+  HeapRegion* get_next(HeapRegion* hr) {
+    return hr->next_from_unclean_list();
+  }
+  void set_next(HeapRegion* hr, HeapRegion* new_next) {
+    hr->set_next_on_unclean_list(new_next);
+  }
+
+  UncleanRegionList() : EmptyNonHRegionList() {}
+
+  void insert_before_head(HeapRegion* r) {
+    assert(!r->is_on_free_list(),
+           "Better not already be on free list");
+    assert(!r->is_on_unclean_list(),
+           "Better not already be on unclean list");
+    r->set_zero_fill_needed();
+    r->set_on_unclean_list(true);
+    EmptyNonHRegionList::insert_before_head(r);
+  }
+  void prepend_list(UncleanRegionList* new_list) {
+    assert(new_list->tl() == NULL || !new_list->tl()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->tl() == NULL || new_list->tl()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    assert(new_list->hd() == NULL || !new_list->hd()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->hd() == NULL || new_list->hd()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    EmptyNonHRegionList::prepend_list(new_list);
+  }
+  HeapRegion* pop() {
+    HeapRegion* res = RegionList::pop();
+    if (res != NULL) res->set_on_unclean_list(false);
+    return res;
+  }
+};
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
+
+#endif // SERIALGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
+  HeapWord* res = ContiguousSpace::allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+// Because of the requirement of keeping "_offsets" up to date with the
+// allocations, we sequentialize these with a lock.  Therefore, best if
+// this is used for larger LAB allocations only.
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
+  MutexLocker x(&_par_alloc_lock);
+  // This ought to be just "allocate", because of the lock above, but that
+  // ContiguousSpace::allocate asserts that either the allocating thread
+  // holds the heap lock or it is the VM thread and we're at a safepoint.
+  // The best I (dld) could figure was to put a field in ContiguousSpace
+  // meaning "locking at safepoint taken care of", and set/reset that
+  // here.  But this will do for now, especially in light of the comment
+  // above.  Perhaps in the future some lock-free manner of keeping the
+  // coordination.
+  HeapWord* res = ContiguousSpace::par_allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
+  return _offsets.block_start(p);
+}
+
+inline HeapWord*
+G1OffsetTableContigSpace::block_start_const(const void* p) const {
+  return _offsets.block_start_const(p);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,1443 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegionRemSet.cpp.incl"
+
+#define HRRS_VERBOSE 0
+
+#define PRT_COUNT_OCCUPIED 1
+
+// OtherRegionsTable
+
+class PerRegionTable: public CHeapObj {
+  friend class OtherRegionsTable;
+  friend class HeapRegionRemSetIterator;
+
+  HeapRegion*     _hr;
+  BitMap          _bm;
+#if PRT_COUNT_OCCUPIED
+  jint            _occupied;
+#endif
+  PerRegionTable* _next_free;
+
+  PerRegionTable* next_free() { return _next_free; }
+  void set_next_free(PerRegionTable* prt) { _next_free = prt; }
+
+
+  static PerRegionTable* _free_list;
+
+#ifdef _MSC_VER
+  // For some reason even though the classes are marked as friend they are unable
+  // to access CardsPerRegion when private/protected. Only the windows c++ compiler
+  // says this Sun CC and linux gcc don't have a problem with access when private
+
+  public:
+
+#endif // _MSC_VER
+
+  enum SomePrivateConstants {
+    CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+  };
+
+protected:
+  // We need access in order to union things into the base table.
+  BitMap* bm() { return &_bm; }
+
+  void recount_occupied() {
+    _occupied = (jint) bm()->count_one_bits();
+  }
+
+  PerRegionTable(HeapRegion* hr) :
+    _hr(hr),
+#if PRT_COUNT_OCCUPIED
+    _occupied(0),
+#endif
+    _bm(CardsPerRegion, false /* in-resource-area */)
+  {}
+
+  static void free(PerRegionTable* prt) {
+    while (true) {
+      PerRegionTable* fl = _free_list;
+      prt->set_next_free(fl);
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+      if (res == fl) return;
+    }
+    ShouldNotReachHere();
+  }
+
+  static PerRegionTable* alloc(HeapRegion* hr) {
+    PerRegionTable* fl = _free_list;
+    while (fl != NULL) {
+      PerRegionTable* nxt = fl->next_free();
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
+      if (res == fl) {
+        fl->init(hr);
+        return fl;
+      } else {
+        fl = _free_list;
+      }
+    }
+    assert(fl == NULL, "Loop condition.");
+    return new PerRegionTable(hr);
+  }
+
+  void add_card_work(short from_card, bool par) {
+    if (!_bm.at(from_card)) {
+      if (par) {
+        if (_bm.par_at_put(from_card, 1)) {
+#if PRT_COUNT_OCCUPIED
+          Atomic::inc(&_occupied);
+#endif
+        }
+      } else {
+        _bm.at_put(from_card, 1);
+#if PRT_COUNT_OCCUPIED
+        _occupied++;
+#endif
+      }
+    }
+  }
+
+  void add_reference_work(oop* from, bool par) {
+    // Must make this robust in case "from" is not in "_hr", because of
+    // concurrency.
+
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("    PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").",
+                           from, *from);
+#endif
+
+    HeapRegion* loc_hr = hr();
+    // If the test below fails, then this table was reused concurrently
+    // with this operation.  This is OK, since the old table was coarsened,
+    // and adding a bit to the new table is never incorrect.
+    if (loc_hr->is_in_reserved(from)) {
+      size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom());
+      size_t from_card =
+        hw_offset >>
+        (CardTableModRefBS::card_shift - LogHeapWordSize);
+
+      add_card_work((short) from_card, par);
+    }
+  }
+
+public:
+
+  HeapRegion* hr() const { return _hr; }
+
+#if PRT_COUNT_OCCUPIED
+  jint occupied() const {
+    // Overkill, but if we ever need it...
+    // guarantee(_occupied == _bm.count_one_bits(), "Check");
+    return _occupied;
+  }
+#else
+  jint occupied() const {
+    return _bm.count_one_bits();
+  }
+#endif
+
+  void init(HeapRegion* hr) {
+    _hr = hr;
+#if PRT_COUNT_OCCUPIED
+    _occupied = 0;
+#endif
+    _bm.clear();
+  }
+
+  void add_reference(oop* from) {
+    add_reference_work(from, /*parallel*/ true);
+  }
+
+  void seq_add_reference(oop* from) {
+    add_reference_work(from, /*parallel*/ false);
+  }
+
+  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
+    HeapWord* hr_bot = hr()->bottom();
+    int hr_first_card_index = ctbs->index_for(hr_bot);
+    bm()->set_intersection_at_offset(*card_bm, hr_first_card_index);
+#if PRT_COUNT_OCCUPIED
+    recount_occupied();
+#endif
+  }
+
+  void add_card(short from_card_index) {
+    add_card_work(from_card_index, /*parallel*/ true);
+  }
+
+  void seq_add_card(short from_card_index) {
+    add_card_work(from_card_index, /*parallel*/ false);
+  }
+
+  // (Destructively) union the bitmap of the current table into the given
+  // bitmap (which is assumed to be of the same size.)
+  void union_bitmap_into(BitMap* bm) {
+    bm->set_union(_bm);
+  }
+
+  // Mem size in bytes.
+  size_t mem_size() const {
+    return sizeof(this) + _bm.size_in_words() * HeapWordSize;
+  }
+
+  static size_t fl_mem_size() {
+    PerRegionTable* cur = _free_list;
+    size_t res = 0;
+    while (cur != NULL) {
+      res += sizeof(PerRegionTable);
+      cur = cur->next_free();
+    }
+    return res;
+  }
+
+  // Requires "from" to be in "hr()".
+  bool contains_reference(oop* from) const {
+    assert(hr()->is_in_reserved(from), "Precondition.");
+    size_t card_ind = pointer_delta(from, hr()->bottom(),
+                                    CardTableModRefBS::card_size);
+    return _bm.at(card_ind);
+  }
+};
+
+PerRegionTable* PerRegionTable::_free_list = NULL;
+
+
+#define COUNT_PAR_EXPANDS 0
+
+#if COUNT_PAR_EXPANDS
+static jint n_par_expands = 0;
+static jint n_par_contracts = 0;
+static jint par_expand_list_len = 0;
+static jint max_par_expand_list_len = 0;
+
+static void print_par_expand() {
+  Atomic::inc(&n_par_expands);
+  Atomic::inc(&par_expand_list_len);
+  if (par_expand_list_len > max_par_expand_list_len) {
+    max_par_expand_list_len = par_expand_list_len;
+  }
+  if ((n_par_expands % 10) == 0) {
+    gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, "
+                  "len = %d, max_len = %d\n.",
+                  n_par_expands, n_par_contracts, par_expand_list_len,
+                  max_par_expand_list_len);
+  }
+}
+#endif
+
+class PosParPRT: public PerRegionTable {
+  PerRegionTable** _par_tables;
+
+  enum SomePrivateConstants {
+    ReserveParTableExpansion = 1
+  };
+
+  void par_expand() {
+    int n = HeapRegionRemSet::num_par_rem_sets()-1;
+    if (n <= 0) return;
+    if (_par_tables == NULL) {
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion,
+                            &_par_tables, NULL);
+      if (res != NULL) return;
+      // Otherwise, we reserved the right to do the expansion.
+
+      PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n);
+      for (int i = 0; i < n; i++) {
+        PerRegionTable* ptable = PerRegionTable::alloc(hr());
+        ptables[i] = ptable;
+      }
+      // Here we do not need an atomic.
+      _par_tables = ptables;
+#if COUNT_PAR_EXPANDS
+      print_par_expand();
+#endif
+      // We must put this table on the expanded list.
+      PosParPRT* exp_head = _par_expanded_list;
+      while (true) {
+        set_next_par_expanded(exp_head);
+        PosParPRT* res =
+          (PosParPRT*)
+          Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head);
+        if (res == exp_head) return;
+        // Otherwise.
+        exp_head = res;
+      }
+      ShouldNotReachHere();
+    }
+  }
+
+  void par_contract() {
+    assert(_par_tables != NULL, "Precondition.");
+    int n = HeapRegionRemSet::num_par_rem_sets()-1;
+    for (int i = 0; i < n; i++) {
+      _par_tables[i]->union_bitmap_into(bm());
+      PerRegionTable::free(_par_tables[i]);
+      _par_tables[i] = NULL;
+    }
+#if PRT_COUNT_OCCUPIED
+    // We must recount the "occupied."
+    recount_occupied();
+#endif
+    FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables);
+    _par_tables = NULL;
+#if COUNT_PAR_EXPANDS
+    Atomic::inc(&n_par_contracts);
+    Atomic::dec(&par_expand_list_len);
+#endif
+  }
+
+  static PerRegionTable** _par_table_fl;
+
+  PosParPRT* _next;
+
+  static PosParPRT* _free_list;
+
+  PerRegionTable** par_tables() const {
+    assert(uintptr_t(NULL) == 0, "Assumption.");
+    if (uintptr_t(_par_tables) <= ReserveParTableExpansion)
+      return NULL;
+    else
+      return _par_tables;
+  }
+
+  PosParPRT* _next_par_expanded;
+  PosParPRT* next_par_expanded() { return _next_par_expanded; }
+  void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; }
+  static PosParPRT* _par_expanded_list;
+
+public:
+
+  PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {}
+
+  jint occupied() const {
+    jint res = PerRegionTable::occupied();
+    if (par_tables() != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        res += par_tables()[i]->occupied();
+      }
+    }
+    return res;
+  }
+
+  void init(HeapRegion* hr) {
+    PerRegionTable::init(hr);
+    _next = NULL;
+    if (par_tables() != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        par_tables()[i]->init(hr);
+      }
+    }
+  }
+
+  static void free(PosParPRT* prt) {
+    while (true) {
+      PosParPRT* fl = _free_list;
+      prt->set_next(fl);
+      PosParPRT* res =
+        (PosParPRT*)
+        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+      if (res == fl) return;
+    }
+    ShouldNotReachHere();
+  }
+
+  static PosParPRT* alloc(HeapRegion* hr) {
+    PosParPRT* fl = _free_list;
+    while (fl != NULL) {
+      PosParPRT* nxt = fl->next();
+      PosParPRT* res =
+        (PosParPRT*)
+        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
+      if (res == fl) {
+        fl->init(hr);
+        return fl;
+      } else {
+        fl = _free_list;
+      }
+    }
+    assert(fl == NULL, "Loop condition.");
+    return new PosParPRT(hr);
+  }
+
+  PosParPRT* next() const { return _next; }
+  void set_next(PosParPRT* nxt) { _next = nxt; }
+  PosParPRT** next_addr() { return &_next; }
+
+  void add_reference(oop* from, int tid) {
+    // Expand if necessary.
+    PerRegionTable** pt = par_tables();
+    if (par_tables() == NULL && tid > 0 && hr()->is_gc_alloc_region()) {
+      par_expand();
+      pt = par_tables();
+    }
+    if (pt != NULL) {
+      // We always have to assume that mods to table 0 are in parallel,
+      // because of the claiming scheme in parallel expansion.  A thread
+      // with tid != 0 that finds the table to be NULL, but doesn't succeed
+      // in claiming the right of expanding it, will end up in the else
+      // clause of the above if test.  That thread could be delayed, and a
+      // thread 0 add reference could see the table expanded, and come
+      // here.  Both threads would be adding in parallel.  But we get to
+      // not use atomics for tids > 0.
+      if (tid == 0) {
+        PerRegionTable::add_reference(from);
+      } else {
+        pt[tid-1]->seq_add_reference(from);
+      }
+    } else {
+      // Not expanded -- add to the base table.
+      PerRegionTable::add_reference(from);
+    }
+  }
+
+  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
+    assert(_par_tables == NULL, "Precondition");
+    PerRegionTable::scrub(ctbs, card_bm);
+  }
+
+  size_t mem_size() const {
+    size_t res =
+      PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable);
+    if (_par_tables != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        res += _par_tables[i]->mem_size();
+      }
+    }
+    return res;
+  }
+
+  static size_t fl_mem_size() {
+    PosParPRT* cur = _free_list;
+    size_t res = 0;
+    while (cur != NULL) {
+      res += sizeof(PosParPRT);
+      cur = cur->next();
+    }
+    return res;
+  }
+
+  bool contains_reference(oop* from) const {
+    if (PerRegionTable::contains_reference(from)) return true;
+    if (_par_tables != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        if (_par_tables[i]->contains_reference(from)) return true;
+      }
+    }
+    return false;
+  }
+
+  static void par_contract_all();
+
+};
+
+void PosParPRT::par_contract_all() {
+  PosParPRT* hd = _par_expanded_list;
+  while (hd != NULL) {
+    PosParPRT* nxt = hd->next_par_expanded();
+    PosParPRT* res =
+      (PosParPRT*)
+      Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd);
+    if (res == hd) {
+      // We claimed the right to contract this table.
+      hd->set_next_par_expanded(NULL);
+      hd->par_contract();
+      hd = _par_expanded_list;
+    } else {
+      hd = res;
+    }
+  }
+}
+
+PosParPRT* PosParPRT::_free_list = NULL;
+PosParPRT* PosParPRT::_par_expanded_list = NULL;
+
+jint OtherRegionsTable::_cache_probes = 0;
+jint OtherRegionsTable::_cache_hits = 0;
+
+size_t OtherRegionsTable::_max_fine_entries = 0;
+size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0;
+#if SAMPLE_FOR_EVICTION
+size_t OtherRegionsTable::_fine_eviction_stride = 0;
+size_t OtherRegionsTable::_fine_eviction_sample_size = 0;
+#endif
+
+OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
+  _g1h(G1CollectedHeap::heap()),
+  _m(Mutex::leaf, "An OtherRegionsTable lock", true),
+  _hr(hr),
+  _coarse_map(G1CollectedHeap::heap()->max_regions(),
+              false /* in-resource-area */),
+  _fine_grain_regions(NULL),
+  _n_fine_entries(0), _n_coarse_entries(0),
+#if SAMPLE_FOR_EVICTION
+  _fine_eviction_start(0),
+#endif
+  _sparse_table(hr)
+{
+  typedef PosParPRT* PosParPRTPtr;
+  if (_max_fine_entries == 0) {
+    assert(_mod_max_fine_entries_mask == 0, "Both or none.");
+    _max_fine_entries = (1 << G1LogRSRegionEntries);
+    _mod_max_fine_entries_mask = _max_fine_entries - 1;
+#if SAMPLE_FOR_EVICTION
+    assert(_fine_eviction_sample_size == 0
+           && _fine_eviction_stride == 0, "All init at same time.");
+    _fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries);
+    _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size;
+#endif
+  }
+  _fine_grain_regions = new PosParPRTPtr[_max_fine_entries];
+  if (_fine_grain_regions == NULL)
+    vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries,
+                          "Failed to allocate _fine_grain_entries.");
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    _fine_grain_regions[i] = NULL;
+  }
+}
+
+int** OtherRegionsTable::_from_card_cache = NULL;
+size_t OtherRegionsTable::_from_card_cache_max_regions = 0;
+size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
+
+void OtherRegionsTable::init_from_card_cache(size_t max_regions) {
+  _from_card_cache_max_regions = max_regions;
+
+  int n_par_rs = HeapRegionRemSet::num_par_rem_sets();
+  _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs);
+  for (int i = 0; i < n_par_rs; i++) {
+    _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions);
+    for (size_t j = 0; j < max_regions; j++) {
+      _from_card_cache[i][j] = -1;  // An invalid value.
+    }
+  }
+  _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int);
+}
+
+void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max.");
+    for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) {
+      _from_card_cache[i][j] = -1;  // An invalid value.
+    }
+  }
+}
+
+#ifndef PRODUCT
+void OtherRegionsTable::print_from_card_cache() {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    for (size_t j = 0; j < _from_card_cache_max_regions; j++) {
+      gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.",
+                    i, j, _from_card_cache[i][j]);
+    }
+  }
+}
+#endif
+
+void OtherRegionsTable::add_reference(oop* from, int tid) {
+  size_t cur_hrs_ind = hr()->hrs_index();
+
+#if HRRS_VERBOSE
+  gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
+                                                  from, *from);
+#endif
+
+  int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
+
+#if HRRS_VERBOSE
+  gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
+                hr()->bottom(), from_card,
+                _from_card_cache[tid][cur_hrs_ind]);
+#endif
+
+#define COUNT_CACHE 0
+#if COUNT_CACHE
+  jint p = Atomic::add(1, &_cache_probes);
+  if ((p % 10000) == 0) {
+    jint hits = _cache_hits;
+    gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.",
+                  _cache_hits, p, 100.0* (float)hits/(float)p);
+  }
+#endif
+  if (from_card == _from_card_cache[tid][cur_hrs_ind]) {
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("  from-card cache hit.");
+#endif
+#if COUNT_CACHE
+    Atomic::inc(&_cache_hits);
+#endif
+    assert(contains_reference(from), "We just added it!");
+    return;
+  } else {
+    _from_card_cache[tid][cur_hrs_ind] = from_card;
+  }
+
+  // Note that this may be a continued H region.
+  HeapRegion* from_hr = _g1h->heap_region_containing_raw(from);
+  size_t from_hrs_ind = (size_t)from_hr->hrs_index();
+
+  // If the region is already coarsened, return.
+  if (_coarse_map.at(from_hrs_ind)) {
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("  coarse map hit.");
+#endif
+    assert(contains_reference(from), "We just added it!");
+    return;
+  }
+
+  // Otherwise find a per-region table to add it to.
+  size_t ind = from_hrs_ind & _mod_max_fine_entries_mask;
+  PosParPRT* prt = find_region_table(ind, from_hr);
+  if (prt == NULL) {
+    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+    // Confirm that it's really not there...
+    prt = find_region_table(ind, from_hr);
+    if (prt == NULL) {
+
+      uintptr_t from_hr_bot_card_index =
+        uintptr_t(from_hr->bottom())
+          >> CardTableModRefBS::card_shift;
+      int card_index = from_card - from_hr_bot_card_index;
+      assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion,
+             "Must be in range.");
+      if (G1HRRSUseSparseTable &&
+          _sparse_table.add_card((short) from_hrs_ind, card_index)) {
+        if (G1RecordHRRSOops) {
+          HeapRegionRemSet::record(hr(), from);
+#if HRRS_VERBOSE
+          gclog_or_tty->print("   Added card " PTR_FORMAT " to region "
+                              "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                              align_size_down(uintptr_t(from),
+                                              CardTableModRefBS::card_size),
+                              hr()->bottom(), from);
+#endif
+        }
+#if HRRS_VERBOSE
+        gclog_or_tty->print_cr("   added card to sparse table.");
+#endif
+        assert(contains_reference_locked(from), "We just added it!");
+        return;
+      } else {
+#if HRRS_VERBOSE
+        gclog_or_tty->print_cr("   [tid %d] sparse table entry "
+                      "overflow(f: %d, t: %d)",
+                      tid, from_hrs_ind, cur_hrs_ind);
+#endif
+      }
+
+      // Otherwise, transfer from sparse to fine-grain.
+      short cards[SparsePRTEntry::CardsPerEntry];
+      if (G1HRRSUseSparseTable) {
+        bool res = _sparse_table.get_cards((short) from_hrs_ind, &cards[0]);
+        assert(res, "There should have been an entry");
+      }
+
+      if (_n_fine_entries == _max_fine_entries) {
+        prt = delete_region_table();
+      } else {
+        prt = PosParPRT::alloc(from_hr);
+      }
+      prt->init(from_hr);
+      // Record the outgoing pointer in the from_region's outgoing bitmap.
+      from_hr->rem_set()->add_outgoing_reference(hr());
+
+      PosParPRT* first_prt = _fine_grain_regions[ind];
+      prt->set_next(first_prt);  // XXX Maybe move to init?
+      _fine_grain_regions[ind] = prt;
+      _n_fine_entries++;
+
+      // Add in the cards from the sparse table.
+      if (G1HRRSUseSparseTable) {
+        for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) {
+          short c = cards[i];
+          if (c != SparsePRTEntry::NullEntry) {
+            prt->add_card(c);
+          }
+        }
+        // Now we can delete the sparse entry.
+        bool res = _sparse_table.delete_entry((short) from_hrs_ind);
+        assert(res, "It should have been there.");
+      }
+    }
+    assert(prt != NULL && prt->hr() == from_hr, "consequence");
+  }
+  // Note that we can't assert "prt->hr() == from_hr", because of the
+  // possibility of concurrent reuse.  But see head comment of
+  // OtherRegionsTable for why this is OK.
+  assert(prt != NULL, "Inv");
+
+  prt->add_reference(from, tid);
+  if (G1RecordHRRSOops) {
+    HeapRegionRemSet::record(hr(), from);
+#if HRRS_VERBOSE
+    gclog_or_tty->print("Added card " PTR_FORMAT " to region "
+                        "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                        align_size_down(uintptr_t(from),
+                                        CardTableModRefBS::card_size),
+                        hr()->bottom(), from);
+#endif
+  }
+  assert(contains_reference(from), "We just added it!");
+}
+
+PosParPRT*
+OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const {
+  assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
+  PosParPRT* prt = _fine_grain_regions[ind];
+  while (prt != NULL && prt->hr() != hr) {
+    prt = prt->next();
+  }
+  // Loop postcondition is the method postcondition.
+  return prt;
+}
+
+
+#define DRT_CENSUS 0
+
+#if DRT_CENSUS
+static const int HistoSize = 6;
+static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
+static int coarsenings = 0;
+static int occ_sum = 0;
+#endif
+
+jint OtherRegionsTable::_n_coarsenings = 0;
+
+PosParPRT* OtherRegionsTable::delete_region_table() {
+#if DRT_CENSUS
+  int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
+  const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 };
+#endif
+
+  assert(_m.owned_by_self(), "Precondition");
+  assert(_n_fine_entries == _max_fine_entries, "Precondition");
+  PosParPRT* max = NULL;
+  jint max_occ = 0;
+  PosParPRT** max_prev;
+  size_t max_ind;
+
+#if SAMPLE_FOR_EVICTION
+  size_t i = _fine_eviction_start;
+  for (size_t k = 0; k < _fine_eviction_sample_size; k++) {
+    size_t ii = i;
+    // Make sure we get a non-NULL sample.
+    while (_fine_grain_regions[ii] == NULL) {
+      ii++;
+      if (ii == _max_fine_entries) ii = 0;
+      guarantee(ii != i, "We must find one.");
+    }
+    PosParPRT** prev = &_fine_grain_regions[ii];
+    PosParPRT* cur = *prev;
+    while (cur != NULL) {
+      jint cur_occ = cur->occupied();
+      if (max == NULL || cur_occ > max_occ) {
+        max = cur;
+        max_prev = prev;
+        max_ind = i;
+        max_occ = cur_occ;
+      }
+      prev = cur->next_addr();
+      cur = cur->next();
+    }
+    i = i + _fine_eviction_stride;
+    if (i >= _n_fine_entries) i = i - _n_fine_entries;
+  }
+  _fine_eviction_start++;
+  if (_fine_eviction_start >= _n_fine_entries)
+    _fine_eviction_start -= _n_fine_entries;
+#else
+  for (int i = 0; i < _max_fine_entries; i++) {
+    PosParPRT** prev = &_fine_grain_regions[i];
+    PosParPRT* cur = *prev;
+    while (cur != NULL) {
+      jint cur_occ = cur->occupied();
+#if DRT_CENSUS
+      for (int k = 0; k < HistoSize; k++) {
+        if (cur_occ <= histo_limits[k]) {
+          histo[k]++; global_histo[k]++; break;
+        }
+      }
+#endif
+      if (max == NULL || cur_occ > max_occ) {
+        max = cur;
+        max_prev = prev;
+        max_ind = i;
+        max_occ = cur_occ;
+      }
+      prev = cur->next_addr();
+      cur = cur->next();
+    }
+  }
+#endif
+  // XXX
+  guarantee(max != NULL, "Since _n_fine_entries > 0");
+#if DRT_CENSUS
+  gclog_or_tty->print_cr("In a coarsening: histo of occs:");
+  for (int k = 0; k < HistoSize; k++) {
+    gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], histo[k]);
+  }
+  coarsenings++;
+  occ_sum += max_occ;
+  if ((coarsenings % 100) == 0) {
+    gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings);
+    for (int k = 0; k < HistoSize; k++) {
+      gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], global_histo[k]);
+    }
+    gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.",
+                  (float)occ_sum/(float)coarsenings);
+  }
+#endif
+
+  // Set the corresponding coarse bit.
+  int max_hrs_index = max->hr()->hrs_index();
+  if (!_coarse_map.at(max_hrs_index)) {
+    _coarse_map.at_put(max_hrs_index, true);
+    _n_coarse_entries++;
+#if 0
+    gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] "
+               "for region [" PTR_FORMAT "...] (%d coarse entries).\n",
+               hr()->bottom(),
+               max->hr()->bottom(),
+               _n_coarse_entries);
+#endif
+  }
+
+  // Unsplice.
+  *max_prev = max->next();
+  Atomic::inc(&_n_coarsenings);
+  _n_fine_entries--;
+  return max;
+}
+
+
+// At present, this must be called stop-world single-threaded.
+void OtherRegionsTable::scrub(CardTableModRefBS* ctbs,
+                              BitMap* region_bm, BitMap* card_bm) {
+  // First eliminated garbage regions from the coarse map.
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print_cr("Scrubbing region %d:", hr()->hrs_index());
+
+  assert(_coarse_map.size() == region_bm->size(), "Precondition");
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print("   Coarse map: before = %d...", _n_coarse_entries);
+  _coarse_map.set_intersection(*region_bm);
+  _n_coarse_entries = _coarse_map.count_one_bits();
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print_cr("   after = %d.", _n_coarse_entries);
+
+  // Now do the fine-grained maps.
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    PosParPRT** prev = &_fine_grain_regions[i];
+    while (cur != NULL) {
+      PosParPRT* nxt = cur->next();
+      // If the entire region is dead, eliminate.
+      if (G1RSScrubVerbose)
+        gclog_or_tty->print_cr("     For other region %d:", cur->hr()->hrs_index());
+      if (!region_bm->at(cur->hr()->hrs_index())) {
+        *prev = nxt;
+        cur->set_next(NULL);
+        _n_fine_entries--;
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print_cr("          deleted via region map.");
+        PosParPRT::free(cur);
+      } else {
+        // Do fine-grain elimination.
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print("          occ: before = %4d.", cur->occupied());
+        cur->scrub(ctbs, card_bm);
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print_cr("          after = %4d.", cur->occupied());
+        // Did that empty the table completely?
+        if (cur->occupied() == 0) {
+          *prev = nxt;
+          cur->set_next(NULL);
+          _n_fine_entries--;
+          PosParPRT::free(cur);
+        } else {
+          prev = cur->next_addr();
+        }
+      }
+      cur = nxt;
+    }
+  }
+  // Since we may have deleted a from_card_cache entry from the RS, clear
+  // the FCC.
+  clear_fcc();
+}
+
+
+size_t OtherRegionsTable::occupied() const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  size_t sum = occ_fine();
+  sum += occ_sparse();
+  sum += occ_coarse();
+  return sum;
+}
+
+size_t OtherRegionsTable::occ_fine() const {
+  size_t sum = 0;
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      sum += cur->occupied();
+      cur = cur->next();
+    }
+  }
+  return sum;
+}
+
+size_t OtherRegionsTable::occ_coarse() const {
+  return (_n_coarse_entries * PosParPRT::CardsPerRegion);
+}
+
+size_t OtherRegionsTable::occ_sparse() const {
+  return _sparse_table.occupied();
+}
+
+size_t OtherRegionsTable::mem_size() const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  size_t sum = 0;
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      sum += cur->mem_size();
+      cur = cur->next();
+    }
+  }
+  sum += (sizeof(PosParPRT*) * _max_fine_entries);
+  sum += (_coarse_map.size_in_words() * HeapWordSize);
+  sum += (_sparse_table.mem_size());
+  sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above.
+  return sum;
+}
+
+size_t OtherRegionsTable::static_mem_size() {
+  return _from_card_cache_mem_size;
+}
+
+size_t OtherRegionsTable::fl_mem_size() {
+  return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size();
+}
+
+void OtherRegionsTable::clear_fcc() {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    _from_card_cache[i][hr()->hrs_index()] = -1;
+  }
+}
+
+void OtherRegionsTable::clear() {
+  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      PosParPRT* nxt = cur->next();
+      PosParPRT::free(cur);
+      cur = nxt;
+    }
+    _fine_grain_regions[i] = NULL;
+  }
+  _sparse_table.clear();
+  _coarse_map.clear();
+  _n_fine_entries = 0;
+  _n_coarse_entries = 0;
+
+  clear_fcc();
+}
+
+void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) {
+  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  size_t hrs_ind = (size_t)from_hr->hrs_index();
+  size_t ind = hrs_ind & _mod_max_fine_entries_mask;
+  if (del_single_region_table(ind, from_hr)) {
+    assert(!_coarse_map.at(hrs_ind), "Inv");
+  } else {
+    _coarse_map.par_at_put(hrs_ind, 0);
+  }
+  // Check to see if any of the fcc entries come from here.
+  int hr_ind = hr()->hrs_index();
+  for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
+    int fcc_ent = _from_card_cache[tid][hr_ind];
+    if (fcc_ent != -1) {
+      HeapWord* card_addr = (HeapWord*)
+        (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift);
+      if (hr()->is_in_reserved(card_addr)) {
+        // Clear the from card cache.
+        _from_card_cache[tid][hr_ind] = -1;
+      }
+    }
+  }
+}
+
+bool OtherRegionsTable::del_single_region_table(size_t ind,
+                                                HeapRegion* hr) {
+  assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
+  PosParPRT** prev_addr = &_fine_grain_regions[ind];
+  PosParPRT* prt = *prev_addr;
+  while (prt != NULL && prt->hr() != hr) {
+    prev_addr = prt->next_addr();
+    prt = prt->next();
+  }
+  if (prt != NULL) {
+    assert(prt->hr() == hr, "Loop postcondition.");
+    *prev_addr = prt->next();
+    PosParPRT::free(prt);
+    _n_fine_entries--;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool OtherRegionsTable::contains_reference(oop* from) const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  return contains_reference_locked(from);
+}
+
+bool OtherRegionsTable::contains_reference_locked(oop* from) const {
+  HeapRegion* hr = _g1h->heap_region_containing_raw(from);
+  if (hr == NULL) return false;
+  size_t hr_ind = hr->hrs_index();
+  // Is this region in the coarse map?
+  if (_coarse_map.at(hr_ind)) return true;
+
+  PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask,
+                                     hr);
+  if (prt != NULL) {
+    return prt->contains_reference(from);
+
+  } else {
+    uintptr_t from_card =
+      (uintptr_t(from) >> CardTableModRefBS::card_shift);
+    uintptr_t hr_bot_card_index =
+      uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift;
+    assert(from_card >= hr_bot_card_index, "Inv");
+    int card_index = from_card - hr_bot_card_index;
+    return _sparse_table.contains_card((short)hr_ind, card_index);
+  }
+
+
+}
+
+
+bool HeapRegionRemSet::_par_traversal = false;
+
+void HeapRegionRemSet::set_par_traversal(bool b) {
+  assert(_par_traversal != b, "Proper alternation...");
+  _par_traversal = b;
+}
+
+int HeapRegionRemSet::num_par_rem_sets() {
+  // We always have at least two, so that a mutator thread can claim an
+  // id and add to a rem set.
+  return (int) MAX2(ParallelGCThreads, (size_t)2);
+}
+
+HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
+                                   HeapRegion* hr)
+    : _bosa(bosa), _other_regions(hr),
+      _outgoing_region_map(G1CollectedHeap::heap()->max_regions(),
+                           false /* in-resource-area */),
+      _iter_state(Unclaimed)
+{}
+
+
+void HeapRegionRemSet::init_for_par_iteration() {
+  _iter_state = Unclaimed;
+}
+
+bool HeapRegionRemSet::claim_iter() {
+  if (_iter_state != Unclaimed) return false;
+  jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_state), Unclaimed);
+  return (res == Unclaimed);
+}
+
+void HeapRegionRemSet::set_iter_complete() {
+  _iter_state = Complete;
+}
+
+bool HeapRegionRemSet::iter_is_complete() {
+  return _iter_state == Complete;
+}
+
+
+void HeapRegionRemSet::init_iterator(HeapRegionRemSetIterator* iter) const {
+  iter->initialize(this);
+}
+
+#ifndef PRODUCT
+void HeapRegionRemSet::print() const {
+  HeapRegionRemSetIterator iter;
+  init_iterator(&iter);
+  size_t card_index;
+  while (iter.has_next(card_index)) {
+    HeapWord* card_start =
+      G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
+    gclog_or_tty->print_cr("  Card " PTR_FORMAT ".", card_start);
+  }
+  // XXX
+  if (iter.n_yielded() != occupied()) {
+    gclog_or_tty->print_cr("Yielded disagrees with occupied:");
+    gclog_or_tty->print_cr("  %6d yielded (%6d coarse, %6d fine).",
+                  iter.n_yielded(),
+                  iter.n_yielded_coarse(), iter.n_yielded_fine());
+    gclog_or_tty->print_cr("  %6d occ     (%6d coarse, %6d fine).",
+                  occupied(), occ_coarse(), occ_fine());
+  }
+  guarantee(iter.n_yielded() == occupied(),
+            "We should have yielded all the represented cards.");
+}
+#endif
+
+void HeapRegionRemSet::cleanup() {
+  SparsePRT::cleanup_all();
+}
+
+void HeapRegionRemSet::par_cleanup() {
+  PosParPRT::par_contract_all();
+}
+
+void HeapRegionRemSet::add_outgoing_reference(HeapRegion* to_hr) {
+  _outgoing_region_map.par_at_put(to_hr->hrs_index(), 1);
+}
+
+void HeapRegionRemSet::clear() {
+  clear_outgoing_entries();
+  _outgoing_region_map.clear();
+  _other_regions.clear();
+  assert(occupied() == 0, "Should be clear.");
+}
+
+void HeapRegionRemSet::clear_outgoing_entries() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  size_t i = _outgoing_region_map.get_next_one_offset(0);
+  while (i < _outgoing_region_map.size()) {
+    HeapRegion* to_region = g1h->region_at(i);
+    to_region->rem_set()->clear_incoming_entry(hr());
+    i = _outgoing_region_map.get_next_one_offset(i+1);
+  }
+}
+
+
+void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs,
+                             BitMap* region_bm, BitMap* card_bm) {
+  _other_regions.scrub(ctbs, region_bm, card_bm);
+}
+
+//-------------------- Iteration --------------------
+
+HeapRegionRemSetIterator::
+HeapRegionRemSetIterator() :
+  _hrrs(NULL),
+  _g1h(G1CollectedHeap::heap()),
+  _bosa(NULL),
+  _sparse_iter(size_t(G1CollectedHeap::heap()->reserved_region().start())
+               >> CardTableModRefBS::card_shift)
+{}
+
+void HeapRegionRemSetIterator::initialize(const HeapRegionRemSet* hrrs) {
+  _hrrs = hrrs;
+  _coarse_map = &_hrrs->_other_regions._coarse_map;
+  _fine_grain_regions = _hrrs->_other_regions._fine_grain_regions;
+  _bosa = _hrrs->bosa();
+
+  _is = Sparse;
+  // Set these values so that we increment to the first region.
+  _coarse_cur_region_index = -1;
+  _coarse_cur_region_cur_card = (PosParPRT::CardsPerRegion-1);;
+
+  _cur_region_cur_card = 0;
+
+  _fine_array_index = -1;
+  _fine_cur_prt = NULL;
+
+  _n_yielded_coarse = 0;
+  _n_yielded_fine = 0;
+  _n_yielded_sparse = 0;
+
+  _sparse_iter.init(&hrrs->_other_regions._sparse_table);
+}
+
+bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) {
+  if (_hrrs->_other_regions._n_coarse_entries == 0) return false;
+  // Go to the next card.
+  _coarse_cur_region_cur_card++;
+  // Was the last the last card in the current region?
+  if (_coarse_cur_region_cur_card == PosParPRT::CardsPerRegion) {
+    // Yes: find the next region.  This may leave _coarse_cur_region_index
+    // Set to the last index, in which case there are no more coarse
+    // regions.
+    _coarse_cur_region_index =
+      (int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1);
+    if ((size_t)_coarse_cur_region_index < _coarse_map->size()) {
+      _coarse_cur_region_cur_card = 0;
+      HeapWord* r_bot =
+        _g1h->region_at(_coarse_cur_region_index)->bottom();
+      _cur_region_card_offset = _bosa->index_for(r_bot);
+    } else {
+      return false;
+    }
+  }
+  // If we didn't return false above, then we can yield a card.
+  card_index = _cur_region_card_offset + _coarse_cur_region_cur_card;
+  return true;
+}
+
+void HeapRegionRemSetIterator::fine_find_next_non_null_prt() {
+  // Otherwise, find the next bucket list in the array.
+  _fine_array_index++;
+  while (_fine_array_index < (int) OtherRegionsTable::_max_fine_entries) {
+    _fine_cur_prt = _fine_grain_regions[_fine_array_index];
+    if (_fine_cur_prt != NULL) return;
+    else _fine_array_index++;
+  }
+  assert(_fine_cur_prt == NULL, "Loop post");
+}
+
+bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) {
+  if (fine_has_next()) {
+    _cur_region_cur_card =
+      _fine_cur_prt->_bm.get_next_one_offset(_cur_region_cur_card + 1);
+  }
+  while (!fine_has_next()) {
+    if (_cur_region_cur_card == PosParPRT::CardsPerRegion) {
+      _cur_region_cur_card = 0;
+      _fine_cur_prt = _fine_cur_prt->next();
+    }
+    if (_fine_cur_prt == NULL) {
+      fine_find_next_non_null_prt();
+      if (_fine_cur_prt == NULL) return false;
+    }
+    assert(_fine_cur_prt != NULL && _cur_region_cur_card == 0,
+           "inv.");
+    HeapWord* r_bot =
+      _fine_cur_prt->hr()->bottom();
+    _cur_region_card_offset = _bosa->index_for(r_bot);
+    _cur_region_cur_card = _fine_cur_prt->_bm.get_next_one_offset(0);
+  }
+  assert(fine_has_next(), "Or else we exited the loop via the return.");
+  card_index = _cur_region_card_offset + _cur_region_cur_card;
+  return true;
+}
+
+bool HeapRegionRemSetIterator::fine_has_next() {
+  return
+    _fine_cur_prt != NULL &&
+    _cur_region_cur_card < PosParPRT::CardsPerRegion;
+}
+
+bool HeapRegionRemSetIterator::has_next(size_t& card_index) {
+  switch (_is) {
+  case Sparse:
+    if (_sparse_iter.has_next(card_index)) {
+      _n_yielded_sparse++;
+      return true;
+    }
+    // Otherwise, deliberate fall-through
+    _is = Fine;
+  case Fine:
+    if (fine_has_next(card_index)) {
+      _n_yielded_fine++;
+      return true;
+    }
+    // Otherwise, deliberate fall-through
+    _is = Coarse;
+  case Coarse:
+    if (coarse_has_next(card_index)) {
+      _n_yielded_coarse++;
+      return true;
+    }
+    // Otherwise...
+    break;
+  }
+  assert(ParallelGCThreads > 1 ||
+         n_yielded() == _hrrs->occupied(),
+         "Should have yielded all the cards in the rem set "
+         "(in the non-par case).");
+  return false;
+}
+
+
+
+oop**        HeapRegionRemSet::_recorded_oops = NULL;
+HeapWord**   HeapRegionRemSet::_recorded_cards = NULL;
+HeapRegion** HeapRegionRemSet::_recorded_regions = NULL;
+int          HeapRegionRemSet::_n_recorded = 0;
+
+HeapRegionRemSet::Event* HeapRegionRemSet::_recorded_events = NULL;
+int*         HeapRegionRemSet::_recorded_event_index = NULL;
+int          HeapRegionRemSet::_n_recorded_events = 0;
+
+void HeapRegionRemSet::record(HeapRegion* hr, oop* f) {
+  if (_recorded_oops == NULL) {
+    assert(_n_recorded == 0
+           && _recorded_cards == NULL
+           && _recorded_regions == NULL,
+           "Inv");
+    _recorded_oops = NEW_C_HEAP_ARRAY(oop*, MaxRecorded);
+    _recorded_cards = NEW_C_HEAP_ARRAY(HeapWord*, MaxRecorded);
+    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*, MaxRecorded);
+  }
+  if (_n_recorded == MaxRecorded) {
+    gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded);
+  } else {
+    _recorded_cards[_n_recorded] =
+      (HeapWord*)align_size_down(uintptr_t(f),
+                                 CardTableModRefBS::card_size);
+    _recorded_oops[_n_recorded] = f;
+    _recorded_regions[_n_recorded] = hr;
+    _n_recorded++;
+  }
+}
+
+void HeapRegionRemSet::record_event(Event evnt) {
+  if (!G1RecordHRRSEvents) return;
+
+  if (_recorded_events == NULL) {
+    assert(_n_recorded_events == 0
+           && _recorded_event_index == NULL,
+           "Inv");
+    _recorded_events = NEW_C_HEAP_ARRAY(Event, MaxRecordedEvents);
+    _recorded_event_index = NEW_C_HEAP_ARRAY(int, MaxRecordedEvents);
+  }
+  if (_n_recorded_events == MaxRecordedEvents) {
+    gclog_or_tty->print_cr("Filled up 'recorded_events' (%d).", MaxRecordedEvents);
+  } else {
+    _recorded_events[_n_recorded_events] = evnt;
+    _recorded_event_index[_n_recorded_events] = _n_recorded;
+    _n_recorded_events++;
+  }
+}
+
+void HeapRegionRemSet::print_event(outputStream* str, Event evnt) {
+  switch (evnt) {
+  case Event_EvacStart:
+    str->print("Evac Start");
+    break;
+  case Event_EvacEnd:
+    str->print("Evac End");
+    break;
+  case Event_RSUpdateEnd:
+    str->print("RS Update End");
+    break;
+  }
+}
+
+void HeapRegionRemSet::print_recorded() {
+  int cur_evnt = 0;
+  Event cur_evnt_kind;
+  int cur_evnt_ind = 0;
+  if (_n_recorded_events > 0) {
+    cur_evnt_kind = _recorded_events[cur_evnt];
+    cur_evnt_ind = _recorded_event_index[cur_evnt];
+  }
+
+  for (int i = 0; i < _n_recorded; i++) {
+    while (cur_evnt < _n_recorded_events && i == cur_evnt_ind) {
+      gclog_or_tty->print("Event: ");
+      print_event(gclog_or_tty, cur_evnt_kind);
+      gclog_or_tty->print_cr("");
+      cur_evnt++;
+      if (cur_evnt < MaxRecordedEvents) {
+        cur_evnt_kind = _recorded_events[cur_evnt];
+        cur_evnt_ind = _recorded_event_index[cur_evnt];
+      }
+    }
+    gclog_or_tty->print("Added card " PTR_FORMAT " to region [" PTR_FORMAT "...]"
+                        " for ref " PTR_FORMAT ".\n",
+                        _recorded_cards[i], _recorded_regions[i]->bottom(),
+                        _recorded_oops[i]);
+  }
+}
+
+#ifndef PRODUCT
+void HeapRegionRemSet::test() {
+  os::sleep(Thread::current(), (jlong)5000, false);
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same
+  // hash bucket.
+  HeapRegion* hr0 = g1h->region_at(0);
+  HeapRegion* hr1 = g1h->region_at(1);
+  HeapRegion* hr2 = g1h->region_at(5);
+  HeapRegion* hr3 = g1h->region_at(6);
+  HeapRegion* hr4 = g1h->region_at(7);
+  HeapRegion* hr5 = g1h->region_at(8);
+
+  HeapWord* hr1_start = hr1->bottom();
+  HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2;
+  HeapWord* hr1_last = hr1->end() - 1;
+
+  HeapWord* hr2_start = hr2->bottom();
+  HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2;
+  HeapWord* hr2_last = hr2->end() - 1;
+
+  HeapWord* hr3_start = hr3->bottom();
+  HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2;
+  HeapWord* hr3_last = hr3->end() - 1;
+
+  HeapRegionRemSet* hrrs = hr0->rem_set();
+
+  // Make three references from region 0x101...
+  hrrs->add_reference((oop*)hr1_start);
+  hrrs->add_reference((oop*)hr1_mid);
+  hrrs->add_reference((oop*)hr1_last);
+
+  hrrs->add_reference((oop*)hr2_start);
+  hrrs->add_reference((oop*)hr2_mid);
+  hrrs->add_reference((oop*)hr2_last);
+
+  hrrs->add_reference((oop*)hr3_start);
+  hrrs->add_reference((oop*)hr3_mid);
+  hrrs->add_reference((oop*)hr3_last);
+
+  // Now cause a coarsening.
+  hrrs->add_reference((oop*)hr4->bottom());
+  hrrs->add_reference((oop*)hr5->bottom());
+
+  // Now, does iteration yield these three?
+  HeapRegionRemSetIterator iter;
+  hrrs->init_iterator(&iter);
+  size_t sum = 0;
+  size_t card_index;
+  while (iter.has_next(card_index)) {
+    HeapWord* card_start =
+      G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
+    gclog_or_tty->print_cr("  Card " PTR_FORMAT ".", card_start);
+    sum++;
+  }
+  guarantee(sum == 11 - 3 + 2048, "Failure");
+  guarantee(sum == hrrs->occupied(), "Failure");
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Remembered set for a heap region.  Represent a set of "cards" that
+// contain pointers into the owner heap region.  Cards are defined somewhat
+// abstractly, in terms of what the "BlockOffsetTable" in use can parse.
+
+class G1CollectedHeap;
+class G1BlockOffsetSharedArray;
+class HeapRegion;
+class HeapRegionRemSetIterator;
+class PosParPRT;
+class SparsePRT;
+
+
+// The "_coarse_map" is a bitmap with one bit for each region, where set
+// bits indicate that the corresponding region may contain some pointer
+// into the owning region.
+
+// The "_fine_grain_entries" array is an open hash table of PerRegionTables
+// (PRTs), indicating regions for which we're keeping the RS as a set of
+// cards.  The strategy is to cap the size of the fine-grain table,
+// deleting an entry and setting the corresponding coarse-grained bit when
+// we would overflow this cap.
+
+// We use a mixture of locking and lock-free techniques here.  We allow
+// threads to locate PRTs without locking, but threads attempting to alter
+// a bucket list obtain a lock.  This means that any failing attempt to
+// find a PRT must be retried with the lock.  It might seem dangerous that
+// a read can find a PRT that is concurrently deleted.  This is all right,
+// because:
+//
+//   1) We only actually free PRT's at safe points (though we reuse them at
+//      other times).
+//   2) We find PRT's in an attempt to add entries.  If a PRT is deleted,
+//      it's _coarse_map bit is set, so the that we were attempting to add
+//      is represented.  If a deleted PRT is re-used, a thread adding a bit,
+//      thinking the PRT is for a different region, does no harm.
+
+class OtherRegionsTable: public CHeapObj {
+  friend class HeapRegionRemSetIterator;
+
+  G1CollectedHeap* _g1h;
+  Mutex            _m;
+  HeapRegion*      _hr;
+
+  // These are protected by "_m".
+  BitMap      _coarse_map;
+  size_t      _n_coarse_entries;
+  static jint _n_coarsenings;
+
+  PosParPRT** _fine_grain_regions;
+  size_t      _n_fine_entries;
+
+#define SAMPLE_FOR_EVICTION 1
+#if SAMPLE_FOR_EVICTION
+  size_t        _fine_eviction_start;
+  static size_t _fine_eviction_stride;
+  static size_t _fine_eviction_sample_size;
+#endif
+
+  SparsePRT   _sparse_table;
+
+  // These are static after init.
+  static size_t _max_fine_entries;
+  static size_t _mod_max_fine_entries_mask;
+
+  // Requires "prt" to be the first element of the bucket list appropriate
+  // for "hr".  If this list contains an entry for "hr", return it,
+  // otherwise return "NULL".
+  PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const;
+
+  // Find, delete, and return a candidate PosParPRT, if any exists,
+  // adding the deleted region to the coarse bitmap.  Requires the caller
+  // to hold _m, and the fine-grain table to be full.
+  PosParPRT* delete_region_table();
+
+  // If a PRT for "hr" is in the bucket list indicated by "ind" (which must
+  // be the correct index for "hr"), delete it and return true; else return
+  // false.
+  bool del_single_region_table(size_t ind, HeapRegion* hr);
+
+  static jint _cache_probes;
+  static jint _cache_hits;
+
+  // Indexed by thread X heap region, to minimize thread contention.
+  static int** _from_card_cache;
+  static size_t _from_card_cache_max_regions;
+  static size_t _from_card_cache_mem_size;
+
+public:
+  OtherRegionsTable(HeapRegion* hr);
+
+  HeapRegion* hr() const { return _hr; }
+
+  // For now.  Could "expand" some tables in the future, so that this made
+  // sense.
+  void add_reference(oop* from, int tid);
+
+  void add_reference(oop* from) {
+    return add_reference(from, 0);
+  }
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // Not const because it takes a lock.
+  size_t occupied() const;
+  size_t occ_fine() const;
+  size_t occ_coarse() const;
+  size_t occ_sparse() const;
+
+  static jint n_coarsenings() { return _n_coarsenings; }
+
+  // Returns size in bytes.
+  // Not const because it takes a lock.
+  size_t mem_size() const;
+  static size_t static_mem_size();
+  static size_t fl_mem_size();
+
+  bool contains_reference(oop* from) const;
+  bool contains_reference_locked(oop* from) const;
+
+  void clear();
+
+  // Specifically clear the from_card_cache.
+  void clear_fcc();
+
+  // "from_hr" is being cleared; remove any entries from it.
+  void clear_incoming_entry(HeapRegion* from_hr);
+
+  // Declare the heap size (in # of regions) to the OtherRegionsTable.
+  // (Uses it to initialize from_card_cache).
+  static void init_from_card_cache(size_t max_regions);
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  // Make sure any entries for higher regions are invalid.
+  static void shrink_from_card_cache(size_t new_n_regs);
+
+  static void print_from_card_cache();
+
+};
+
+
+class HeapRegionRemSet : public CHeapObj {
+  friend class VMStructs;
+  friend class HeapRegionRemSetIterator;
+
+public:
+  enum Event {
+    Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd
+  };
+
+private:
+  G1BlockOffsetSharedArray* _bosa;
+  G1BlockOffsetSharedArray* bosa() const { return _bosa; }
+
+  static bool _par_traversal;
+
+  OtherRegionsTable _other_regions;
+
+  // One set bit for every region that has an entry for this one.
+  BitMap _outgoing_region_map;
+
+  // Clear entries for the current region in any rem sets named in
+  // the _outgoing_region_map.
+  void clear_outgoing_entries();
+
+#if MAYBE
+  // Audit the given card index.
+  void audit_card(size_t card_num, HeapRegion* hr, u2* rc_arr,
+                  HeapRegionRemSet* empty_cards, size_t* one_obj_cards);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects; returns "true" if any are found.
+  bool audit_find_pop(HeapRegion* hr, u2* rc_arr);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects, and determines the number of entries in "new_rs" if any such
+  // popular objects are ignored.  If this is sufficiently small, returns
+  // "false" to indicate that a constraint should not be introduced.
+  // Otherwise, returns "true" to indicate that we should go ahead with
+  // adding the constraint.
+  bool audit_stag(HeapRegion* hr, u2* rc_arr);
+
+
+  u2* alloc_rc_array();
+
+  SeqHeapRegionRemSet* audit_post(u2* rc_arr, size_t multi_obj_crds,
+                                  SeqHeapRegionRemSet* empty_cards);
+#endif
+
+  enum ParIterState { Unclaimed, Claimed, Complete };
+  ParIterState _iter_state;
+
+  // Unused unless G1RecordHRRSOops is true.
+
+  static const int MaxRecorded = 1000000;
+  static oop**        _recorded_oops;
+  static HeapWord**   _recorded_cards;
+  static HeapRegion** _recorded_regions;
+  static int          _n_recorded;
+
+  static const int MaxRecordedEvents = 1000;
+  static Event*       _recorded_events;
+  static int*         _recorded_event_index;
+  static int          _n_recorded_events;
+
+  static void print_event(outputStream* str, Event evnt);
+
+public:
+  HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
+                   HeapRegion* hr);
+
+  static int num_par_rem_sets();
+  static bool par_traversal() { return _par_traversal; }
+  static void set_par_traversal(bool b);
+
+  HeapRegion* hr() const {
+    return _other_regions.hr();
+  }
+
+  size_t occupied() const {
+    return _other_regions.occupied();
+  }
+  size_t occ_fine() const {
+    return _other_regions.occ_fine();
+  }
+  size_t occ_coarse() const {
+    return _other_regions.occ_coarse();
+  }
+  size_t occ_sparse() const {
+    return _other_regions.occ_sparse();
+  }
+
+  static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); }
+
+  /* Used in the sequential case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from) {
+    _other_regions.add_reference(from);
+    return false;
+  }
+
+  /* Used in the parallel case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from, int tid) {
+    _other_regions.add_reference(from, tid);
+    return false;
+  }
+
+  // Records the fact that the current region contains an outgoing
+  // reference into "to_hr".
+  void add_outgoing_reference(HeapRegion* to_hr);
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // The region is being reclaimed; clear its remset, and any mention of
+  // entries for this region in other remsets.
+  void clear();
+
+  // Forget any entries due to pointers from "from_hr".
+  void clear_incoming_entry(HeapRegion* from_hr) {
+    _other_regions.clear_incoming_entry(from_hr);
+  }
+
+#if 0
+  virtual void cleanup() = 0;
+#endif
+
+  // Should be called from single-threaded code.
+  void init_for_par_iteration();
+  // Attempt to claim the region.  Returns true iff this call caused an
+  // atomic transition from Unclaimed to Claimed.
+  bool claim_iter();
+  // Sets the iteration state to "complete".
+  void set_iter_complete();
+  // Returns "true" iff the region's iteration is complete.
+  bool iter_is_complete();
+
+  // Initialize the given iterator to iterate over this rem set.
+  void init_iterator(HeapRegionRemSetIterator* iter) const;
+
+#if 0
+  // Apply the "do_card" method to the start address of every card in the
+  // rem set.  Returns false if some application of the closure aborted.
+  virtual bool card_iterate(CardClosure* iter) = 0;
+#endif
+
+  // The actual # of bytes this hr_remset takes up.
+  size_t mem_size() {
+    return _other_regions.mem_size()
+      // This correction is necessary because the above includes the second
+      // part.
+      + sizeof(this) - sizeof(OtherRegionsTable);
+  }
+
+  // Returns the memory occupancy of all static data structures associated
+  // with remembered sets.
+  static size_t static_mem_size() {
+    return OtherRegionsTable::static_mem_size();
+  }
+
+  // Returns the memory occupancy of all free_list data structures associated
+  // with remembered sets.
+  static size_t fl_mem_size() {
+    return OtherRegionsTable::fl_mem_size();
+  }
+
+  bool contains_reference(oop* from) const {
+    return _other_regions.contains_reference(from);
+  }
+  void print() const;
+
+#if MAYBE
+  // We are about to introduce a constraint, requiring the collection time
+  // of the region owning this RS to be <= "hr", and forgetting pointers
+  // from the owning region to "hr."  Before doing so, examines this rem
+  // set for pointers to "hr", possibly identifying some popular objects.,
+  // and possibly finding some cards to no longer contain pointers to "hr",
+  //
+  // These steps may prevent the the constraint from being necessary; in
+  // which case returns a set of cards now thought to contain no pointers
+  // into HR.  In the normal (I assume) case, returns NULL, indicating that
+  // we should go ahead and add the constraint.
+  virtual SeqHeapRegionRemSet* audit(HeapRegion* hr) = 0;
+#endif
+
+  // Called during a stop-world phase to perform any deferred cleanups.
+  // The second version may be called by parallel threads after then finish
+  // collection work.
+  static void cleanup();
+  static void par_cleanup();
+
+  // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
+  // (Uses it to initialize from_card_cache).
+  static void init_heap(size_t max_regions) {
+    OtherRegionsTable::init_from_card_cache(max_regions);
+  }
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  static void shrink_heap(size_t new_n_regs) {
+    OtherRegionsTable::shrink_from_card_cache(new_n_regs);
+  }
+
+#ifndef PRODUCT
+  static void print_from_card_cache() {
+    OtherRegionsTable::print_from_card_cache();
+  }
+#endif
+
+  static void record(HeapRegion* hr, oop* f);
+  static void print_recorded();
+  static void record_event(Event evnt);
+
+  // Run unit tests.
+#ifndef PRODUCT
+  static void test();
+#endif
+
+};
+
+class HeapRegionRemSetIterator : public CHeapObj {
+
+  // The region over which we're iterating.
+  const HeapRegionRemSet* _hrrs;
+
+  // Local caching of HRRS fields.
+  const BitMap*             _coarse_map;
+  PosParPRT**               _fine_grain_regions;
+
+  G1BlockOffsetSharedArray* _bosa;
+  G1CollectedHeap*          _g1h;
+
+  // The number yielded since initialization.
+  size_t _n_yielded_fine;
+  size_t _n_yielded_coarse;
+  size_t _n_yielded_sparse;
+
+  // If true we're iterating over the coarse table; if false the fine
+  // table.
+  enum IterState {
+    Sparse,
+    Fine,
+    Coarse
+  };
+  IterState _is;
+
+  // In both kinds of iteration, heap offset of first card of current
+  // region.
+  size_t _cur_region_card_offset;
+  // Card offset within cur region.
+  size_t _cur_region_cur_card;
+
+  // Coarse table iteration fields:
+
+  // Current region index;
+  int _coarse_cur_region_index;
+  int _coarse_cur_region_cur_card;
+
+  bool coarse_has_next(size_t& card_index);
+
+  // Fine table iteration fields:
+
+  // Index of bucket-list we're working on.
+  int _fine_array_index;
+  // Per Region Table we're doing within current bucket list.
+  PosParPRT* _fine_cur_prt;
+
+  /* SparsePRT::*/ SparsePRTIter _sparse_iter;
+
+  void fine_find_next_non_null_prt();
+
+  bool fine_has_next();
+  bool fine_has_next(size_t& card_index);
+
+public:
+  // We require an iterator to be initialized before use, so the
+  // constructor does little.
+  HeapRegionRemSetIterator();
+
+  void initialize(const HeapRegionRemSet* hrrs);
+
+  // If there remains one or more cards to be yielded, returns true and
+  // sets "card_index" to one of those cards (which is then considered
+  // yielded.)   Otherwise, returns false (and leaves "card_index"
+  // undefined.)
+  bool has_next(size_t& card_index);
+
+  size_t n_yielded_fine() { return _n_yielded_fine; }
+  size_t n_yielded_coarse() { return _n_yielded_coarse; }
+  size_t n_yielded_sparse() { return _n_yielded_sparse; }
+  size_t n_yielded() {
+    return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse();
+  }
+};
+
+#if 0
+class CardClosure: public Closure {
+public:
+  virtual void do_card(HeapWord* card_start) = 0;
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,345 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegionSeq.cpp.incl"
+
+// Local to this file.
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  if ((*hr1p)->end() <= (*hr2p)->bottom()) return -1;
+  else if ((*hr2p)->end() <= (*hr1p)->bottom()) return 1;
+  else if (*hr1p == *hr2p) return 0;
+  else {
+    assert(false, "We should never compare distinct overlapping regions.");
+  }
+  return 0;
+}
+
+HeapRegionSeq::HeapRegionSeq(const size_t max_size) :
+  _alloc_search_start(0),
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _regions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                       (void*)&_regions,
+                                       ResourceObj::C_HEAP),
+            (int)max_size),
+           true),
+  _next_rr_candidate(0),
+  _seq_bottom(NULL)
+{}
+
+// Private methods.
+
+HeapWord*
+HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) {
+  assert(G1CollectedHeap::isHumongous(word_size),
+         "Allocation size should be humongous");
+  int cur = ind;
+  int first = cur;
+  size_t sumSizes = 0;
+  while (cur < _regions.length() && sumSizes < word_size) {
+    // Loop invariant:
+    //  For all i in [first, cur):
+    //       _regions.at(i)->is_empty()
+    //    && _regions.at(i) is contiguous with its predecessor, if any
+    //  && sumSizes is the sum of the sizes of the regions in the interval
+    //       [first, cur)
+    HeapRegion* curhr = _regions.at(cur);
+    if (curhr->is_empty()
+        && !curhr->is_reserved()
+        && (first == cur
+            || (_regions.at(cur-1)->end() ==
+                curhr->bottom()))) {
+      sumSizes += curhr->capacity() / HeapWordSize;
+    } else {
+      first = cur + 1;
+      sumSizes = 0;
+    }
+    cur++;
+  }
+  if (sumSizes >= word_size) {
+    _alloc_search_start = cur;
+    // Mark the allocated regions as allocated.
+    bool zf = G1CollectedHeap::heap()->allocs_are_zero_filled();
+    HeapRegion* first_hr = _regions.at(first);
+    for (int i = first; i < cur; i++) {
+      HeapRegion* hr = _regions.at(i);
+      if (zf)
+        hr->ensure_zero_filled();
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        hr->set_zero_fill_allocated();
+      }
+      size_t sz = hr->capacity() / HeapWordSize;
+      HeapWord* tmp = hr->allocate(sz);
+      assert(tmp != NULL, "Humongous allocation failure");
+      MemRegion mr = MemRegion(tmp, sz);
+      SharedHeap::fill_region_with_object(mr);
+      hr->declare_filled_region_to_BOT(mr);
+      if (i == first) {
+        first_hr->set_startsHumongous();
+      } else {
+        assert(i > first, "sanity");
+        hr->set_continuesHumongous(first_hr);
+      }
+    }
+    HeapWord* first_hr_bot = first_hr->bottom();
+    HeapWord* obj_end = first_hr_bot + word_size;
+    first_hr->set_top(obj_end);
+    return first_hr_bot;
+  } else {
+    // If we started from the beginning, we want to know why we can't alloc.
+    return NULL;
+  }
+}
+
+void HeapRegionSeq::print_empty_runs(bool reserved_are_empty) {
+  int empty_run = 0;
+  int n_empty = 0;
+  bool at_least_one_reserved = false;
+  int empty_run_start;
+  for (int i = 0; i < _regions.length(); i++) {
+    HeapRegion* r = _regions.at(i);
+    if (r->continuesHumongous()) continue;
+    if (r->is_empty() && (reserved_are_empty || !r->is_reserved())) {
+      assert(!r->isHumongous(), "H regions should not be empty.");
+      if (empty_run == 0) empty_run_start = i;
+      empty_run++;
+      n_empty++;
+      if (r->is_reserved()) {
+        at_least_one_reserved = true;
+      }
+    } else {
+      if (empty_run > 0) {
+        gclog_or_tty->print("  %d:%d", empty_run_start, empty_run);
+        if (reserved_are_empty && at_least_one_reserved)
+          gclog_or_tty->print("(R)");
+        empty_run = 0;
+        at_least_one_reserved = false;
+      }
+    }
+  }
+  if (empty_run > 0) {
+    gclog_or_tty->print(" %d:%d", empty_run_start, empty_run);
+    if (reserved_are_empty && at_least_one_reserved) gclog_or_tty->print("(R)");
+  }
+  gclog_or_tty->print_cr(" [tot = %d]", n_empty);
+}
+
+int HeapRegionSeq::find(HeapRegion* hr) {
+  // FIXME: optimized for adjacent regions of fixed size.
+  int ind = hr->hrs_index();
+  if (ind != -1) {
+    assert(_regions.at(ind) == hr, "Mismatch");
+  }
+  return ind;
+}
+
+
+// Public methods.
+
+void HeapRegionSeq::insert(HeapRegion* hr) {
+  assert(!_regions.is_full(), "Too many elements in HeapRegionSeq");
+  if (_regions.length() == 0
+      || _regions.top()->end() <= hr->bottom()) {
+    hr->set_hrs_index(_regions.length());
+    _regions.append(hr);
+  } else {
+    _regions.append(hr);
+    _regions.sort(orderRegions);
+    for (int i = 0; i < _regions.length(); i++) {
+      _regions.at(i)->set_hrs_index(i);
+    }
+  }
+  char* bot = (char*)_regions.at(0)->bottom();
+  if (_seq_bottom == NULL || bot < _seq_bottom) _seq_bottom = bot;
+}
+
+size_t HeapRegionSeq::length() {
+  return _regions.length();
+}
+
+size_t HeapRegionSeq::free_suffix() {
+  size_t res = 0;
+  int first = _regions.length() - 1;
+  int cur = first;
+  while (cur >= 0 &&
+         (_regions.at(cur)->is_empty()
+          && !_regions.at(cur)->is_reserved()
+          && (first == cur
+              || (_regions.at(cur+1)->bottom() ==
+                  _regions.at(cur)->end())))) {
+      res++;
+      cur--;
+  }
+  return res;
+}
+
+HeapWord* HeapRegionSeq::obj_allocate(size_t word_size) {
+  int cur = _alloc_search_start;
+  // Make sure "cur" is a valid index.
+  assert(cur >= 0, "Invariant.");
+  HeapWord* res = alloc_obj_from_region_index(cur, word_size);
+  if (res == NULL)
+    res = alloc_obj_from_region_index(0, word_size);
+  return res;
+}
+
+void HeapRegionSeq::iterate(HeapRegionClosure* blk) {
+  iterate_from((HeapRegion*)NULL, blk);
+}
+
+// The first argument r is the heap region at which iteration begins.
+// This operation runs fastest when r is NULL, or the heap region for
+// which a HeapRegionClosure most recently returned true, or the
+// heap region immediately to its right in the sequence.  In all
+// other cases a linear search is required to find the index of r.
+
+void HeapRegionSeq::iterate_from(HeapRegion* r, HeapRegionClosure* blk) {
+
+  // :::: FIXME ::::
+  // Static cache value is bad, especially when we start doing parallel
+  // remembered set update. For now just don't cache anything (the
+  // code in the def'd out blocks).
+
+#if 0
+  static int cached_j = 0;
+#endif
+  int len = _regions.length();
+  int j = 0;
+  // Find the index of r.
+  if (r != NULL) {
+#if 0
+    assert(cached_j >= 0, "Invariant.");
+    if ((cached_j < len) && (r == _regions.at(cached_j))) {
+      j = cached_j;
+    } else if ((cached_j + 1 < len) && (r == _regions.at(cached_j + 1))) {
+      j = cached_j + 1;
+    } else {
+      j = find(r);
+#endif
+      if (j < 0) {
+        j = 0;
+      }
+#if 0
+    }
+#endif
+  }
+  int i;
+  for (i = j; i < len; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < j; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+void HeapRegionSeq::iterate_from(int idx, HeapRegionClosure* blk) {
+  int len = _regions.length();
+  int i;
+  for (i = idx; i < len; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < idx; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+MemRegion HeapRegionSeq::shrink_by(size_t shrink_bytes,
+                                   size_t& num_regions_deleted) {
+  assert(shrink_bytes % os::vm_page_size() == 0, "unaligned");
+  assert(shrink_bytes % HeapRegion::GrainBytes == 0, "unaligned");
+
+  if (_regions.length() == 0) {
+    num_regions_deleted = 0;
+    return MemRegion();
+  }
+  int j = _regions.length() - 1;
+  HeapWord* end = _regions.at(j)->end();
+  HeapWord* last_start = end;
+  while (j >= 0 && shrink_bytes > 0) {
+    HeapRegion* cur = _regions.at(j);
+    // We have to leave humongous regions where they are,
+    // and work around them.
+    if (cur->isHumongous()) {
+      return MemRegion(last_start, end);
+    }
+    cur->reset_zero_fill();
+    assert(cur == _regions.top(), "Should be top");
+    if (!cur->is_empty()) break;
+    shrink_bytes -= cur->capacity();
+    num_regions_deleted++;
+    _regions.pop();
+    last_start = cur->bottom();
+    // We need to delete these somehow, but can't currently do so here: if
+    // we do, the ZF thread may still access the deleted region.  We'll
+    // leave this here as a reminder that we have to do something about
+    // this.
+    // delete cur;
+    j--;
+  }
+  return MemRegion(last_start, end);
+}
+
+
+class PrintHeapRegionClosure : public  HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    gclog_or_tty->print(PTR_FORMAT ":", r);
+    r->print();
+    return false;
+  }
+};
+
+void HeapRegionSeq::print() {
+  PrintHeapRegionClosure cl;
+  iterate(&cl);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class HeapRegionClosure;
+
+class HeapRegionSeq: public CHeapObj {
+
+  // _regions is kept sorted by start address order, and no two regions are
+  // overlapping.
+  GrowableArray<HeapRegion*> _regions;
+
+  // The index in "_regions" at which to start the next allocation search.
+  // (For efficiency only; private to obj_allocate after initialization.)
+  int _alloc_search_start;
+
+  // Attempts to allocate a block of the (assumed humongous) word_size,
+  // starting at the region "ind".
+  HeapWord* alloc_obj_from_region_index(int ind, size_t word_size);
+
+  // Currently, we're choosing collection sets in a round-robin fashion,
+  // starting here.
+  int _next_rr_candidate;
+
+  // The bottom address of the bottom-most region, or else NULL if there
+  // are no regions in the sequence.
+  char* _seq_bottom;
+
+ public:
+  // Initializes "this" to the empty sequence of regions.
+  HeapRegionSeq(const size_t max_size);
+
+  // Adds "hr" to "this" sequence.  Requires "hr" not to overlap with
+  // any region already in "this".  (Will perform better if regions are
+  // inserted in ascending address order.)
+  void insert(HeapRegion* hr);
+
+  // Given a HeapRegion*, returns its index within _regions,
+  // or returns -1 if not found.
+  int find(HeapRegion* hr);
+
+  // Requires the index to be valid, and return the region at the index.
+  HeapRegion* at(size_t i) { return _regions.at((int)i); }
+
+  // Return the number of regions in the sequence.
+  size_t length();
+
+  // Returns the number of contiguous regions at the end of the sequence
+  // that are available for allocation.
+  size_t free_suffix();
+
+  // Requires "word_size" to be humongous (in the technical sense).  If
+  // possible, allocates a contiguous subsequence of the heap regions to
+  // satisfy the allocation, and returns the address of the beginning of
+  // that sequence, otherwise returns NULL.
+  HeapWord* obj_allocate(size_t word_size);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // in address order, terminating the iteration early
+  // if the "doHeapRegion" method returns "true".
+  void iterate(HeapRegionClosure* blk);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // starting at "r" (or first region, if "r" is NULL), in a circular
+  // manner, terminating the iteration early if the "doHeapRegion" method
+  // returns "true".
+  void iterate_from(HeapRegion* r, HeapRegionClosure* blk);
+
+  // As above, but start from a given index in the sequence
+  // instead of a given heap region.
+  void iterate_from(int idx, HeapRegionClosure* blk);
+
+  // Requires "shrink_bytes" to be a multiple of the page size and heap
+  // region granularity.  Deletes as many "rightmost" completely free heap
+  // regions from the sequence as comprise shrink_bytes bytes.  Returns the
+  // MemRegion indicating the region those regions comprised, and sets
+  // "num_regions_deleted" to the number of regions deleted.
+  MemRegion shrink_by(size_t shrink_bytes, size_t& num_regions_deleted);
+
+  // If "addr" falls within a region in the sequence, return that region,
+  // or else NULL.
+  HeapRegion* addr_to_region(const void* addr);
+
+  void print();
+
+  // Prints out runs of empty regions.  If the arg is "true" reserved
+  // (popular regions are considered "empty".
+  void print_empty_runs(bool reserved_are_empty);
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapRegion* HeapRegionSeq::addr_to_region(const void* addr) {
+  assert(_seq_bottom != NULL, "bad _seq_bottom in addr_to_region");
+  if ((char*) addr >= _seq_bottom) {
+    size_t diff = (size_t) pointer_delta((HeapWord*) addr,
+                                         (HeapWord*) _seq_bottom);
+    int index = (int) (diff >> HeapRegion::LogOfHRGrainWords);
+    assert(index >= 0, "invariant / paranoia");
+    if (index < _regions.length()) {
+      HeapRegion* hr = _regions.at(index);
+      assert(hr->is_in_reserved(addr),
+             "addr_to_region is wrong...");
+      return hr;
+    }
+  }
+  return NULL;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_ptrQueue.cpp.incl"
+
+PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm) :
+  _qset(qset_), _buf(NULL), _index(0), _active(false),
+  _perm(perm), _lock(NULL)
+{}
+
+PtrQueue::~PtrQueue() {
+  if (!_perm && _buf != NULL) {
+    if (_index == _sz) {
+      // No work to do.
+      qset()->deallocate_buffer(_buf);
+    } else {
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < _index; i += oopSize) {
+        _buf[byte_index_to_index((int)i)] = NULL;
+      }
+      qset()->enqueue_complete_buffer(_buf);
+      _buf = NULL;
+    }
+  }
+}
+
+
+static int byte_index_to_index(int ind) {
+  assert((ind % oopSize) == 0, "Invariant.");
+  return ind / oopSize;
+}
+
+static int index_to_byte_index(int byte_ind) {
+  return byte_ind * oopSize;
+}
+
+void PtrQueue::enqueue_known_active(void* ptr) {
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  assert(_index == 0 || _buf != NULL, "invariant");
+
+  while (_index == 0) {
+    handle_zero_index();
+  }
+  assert(_index > 0, "postcondition");
+
+  _index -= oopSize;
+  _buf[byte_index_to_index((int)_index)] = ptr;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+}
+
+void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
+  assert(_lock->owned_by_self(), "Required.");
+  _lock->unlock();
+  qset()->enqueue_complete_buffer(buf);
+  // We must relock only because the caller will unlock, for the normal
+  // case.
+  _lock->lock_without_safepoint_check();
+}
+
+
+PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
+  _max_completed_queue(0),
+  _cbl_mon(NULL), _fl_lock(NULL),
+  _notify_when_complete(notify_when_complete),
+  _sz(0),
+  _completed_buffers_head(NULL),
+  _completed_buffers_tail(NULL),
+  _n_completed_buffers(0),
+  _process_completed_threshold(0), _process_completed(false),
+  _buf_free_list(NULL), _buf_free_list_sz(0)
+{}
+
+void** PtrQueueSet::allocate_buffer() {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_buf_free_list != NULL) {
+    void** res = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    _buf_free_list_sz--;
+    // Just override the next pointer with NULL, just in case we scan this part
+    // of the buffer.
+    res[0] = NULL;
+    return res;
+  } else {
+    return NEW_C_HEAP_ARRAY(void*, _sz);
+  }
+}
+
+void PtrQueueSet::deallocate_buffer(void** buf) {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  buf[0] = (void*)_buf_free_list;
+  _buf_free_list = buf;
+  _buf_free_list_sz++;
+}
+
+void PtrQueueSet::reduce_free_list() {
+  // For now we'll adopt the strategy of deleting half.
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  size_t n = _buf_free_list_sz / 2;
+  while (n > 0) {
+    assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
+    void** head = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    FREE_C_HEAP_ARRAY(void*,head);
+    n--;
+  }
+}
+
+void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) {
+  // I use explicit locking here because there's a bailout in the middle.
+  _cbl_mon->lock_without_safepoint_check();
+
+  Thread* thread = Thread::current();
+  assert( ignore_max_completed ||
+          thread->is_Java_thread() ||
+          SafepointSynchronize::is_at_safepoint(),
+          "invariant" );
+  ignore_max_completed = ignore_max_completed || !thread->is_Java_thread();
+
+  if (!ignore_max_completed && _max_completed_queue > 0 &&
+      _n_completed_buffers >= (size_t) _max_completed_queue) {
+    _cbl_mon->unlock();
+    bool b = mut_process_buffer(buf);
+    if (b) {
+      deallocate_buffer(buf);
+      return;
+    }
+
+    // Otherwise, go ahead and enqueue the buffer.  Must reaquire the lock.
+    _cbl_mon->lock_without_safepoint_check();
+  }
+
+  // Here we still hold the _cbl_mon.
+  CompletedBufferNode* cbn = new CompletedBufferNode;
+  cbn->buf = buf;
+  cbn->next = NULL;
+  cbn->index = index;
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = cbn;
+    _completed_buffers_tail = cbn;
+  } else {
+    _completed_buffers_tail->next = cbn;
+    _completed_buffers_tail = cbn;
+  }
+  _n_completed_buffers++;
+
+  if (!_process_completed &&
+      _n_completed_buffers == _process_completed_threshold) {
+    _process_completed = true;
+    if (_notify_when_complete)
+      _cbl_mon->notify_all();
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  _cbl_mon->unlock();
+}
+
+int PtrQueueSet::completed_buffers_list_length() {
+  int n = 0;
+  CompletedBufferNode* cbn = _completed_buffers_head;
+  while (cbn != NULL) {
+    n++;
+    cbn = cbn->next;
+  }
+  return n;
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct() {
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  assert_completed_buffer_list_len_correct_locked();
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() {
+  guarantee((size_t)completed_buffers_list_length() ==  _n_completed_buffers,
+            "Completed buffer length is wrong.");
+}
+
+void PtrQueueSet::set_buffer_size(size_t sz) {
+  assert(_sz == 0 && sz > 0, "Should be called only once.");
+  _sz = sz * oopSize;
+}
+
+void PtrQueueSet::set_process_completed_threshold(size_t sz) {
+  _process_completed_threshold = sz;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// There are various techniques that require threads to be able to log
+// addresses.  For example, a generational write barrier might log
+// the addresses of modified old-generation objects.  This type supports
+// this operation.
+
+class PtrQueueSet;
+
+class PtrQueue: public CHeapObj {
+
+protected:
+  // The ptr queue set to which this queue belongs.
+  PtrQueueSet* _qset;
+
+  // Whether updates should be logged.
+  bool _active;
+
+  // The buffer.
+  void** _buf;
+  // The index at which an object was last enqueued.  Starts at "_sz"
+  // (indicating an empty buffer) and goes towards zero.
+  size_t _index;
+
+  // The size of the buffer.
+  size_t _sz;
+
+  // If true, the queue is permanent, and doesn't need to deallocate
+  // its buffer in the destructor (since that obtains a lock which may not
+  // be legally locked by then.
+  bool _perm;
+
+  // If there is a lock associated with this buffer, this is that lock.
+  Mutex* _lock;
+
+  PtrQueueSet* qset() { return _qset; }
+
+public:
+  // Initialize this queue to contain a null buffer, and be part of the
+  // given PtrQueueSet.
+  PtrQueue(PtrQueueSet*, bool perm = false);
+  // Release any contained resources.
+  ~PtrQueue();
+
+  // Associate a lock with a ptr queue.
+  void set_lock(Mutex* lock) { _lock = lock; }
+
+  void reset() { if (_buf != NULL) _index = _sz; }
+
+  // Enqueues the given "obj".
+  void enqueue(void* ptr) {
+    if (!_active) return;
+    else enqueue_known_active(ptr);
+  }
+
+  inline void handle_zero_index();
+  void locking_enqueue_completed_buffer(void** buf);
+
+  void enqueue_known_active(void* ptr);
+
+  size_t size() {
+    assert(_sz >= _index, "Invariant.");
+    return _buf == NULL ? 0 : _sz - _index;
+  }
+
+  // Set the "active" property of the queue to "b".  An enqueue to an
+  // inactive thread is a no-op.  Setting a queue to inactive resets its
+  // log to the empty state.
+  void set_active(bool b) {
+    _active = b;
+    if (!b && _buf != NULL) {
+      _index = _sz;
+    } else if (b && _buf != NULL) {
+      assert(_index == _sz, "invariant: queues are empty when activated.");
+    }
+  }
+
+  static int byte_index_to_index(int ind) {
+    assert((ind % oopSize) == 0, "Invariant.");
+    return ind / oopSize;
+  }
+
+  static int index_to_byte_index(int byte_ind) {
+    return byte_ind * oopSize;
+  }
+
+  // To support compiler.
+  static ByteSize byte_offset_of_index() {
+    return byte_offset_of(PtrQueue, _index);
+  }
+  static ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }
+
+  static ByteSize byte_offset_of_buf() {
+    return byte_offset_of(PtrQueue, _buf);
+  }
+  static ByteSize byte_width_of_buf() { return in_ByteSize(sizeof(void*)); }
+
+  static ByteSize byte_offset_of_active() {
+    return byte_offset_of(PtrQueue, _active);
+  }
+  static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); }
+
+};
+
+// A PtrQueueSet represents resources common to a set of pointer queues.
+// In particular, the individual queues allocate buffers from this shared
+// set, and return completed buffers to the set.
+// All these variables are are protected by the TLOQ_CBL_mon. XXX ???
+class PtrQueueSet: public CHeapObj {
+
+protected:
+
+  class CompletedBufferNode: public CHeapObj {
+  public:
+    void** buf;
+    size_t index;
+    CompletedBufferNode* next;
+    CompletedBufferNode() : buf(NULL),
+      index(0), next(NULL){ }
+  };
+
+  Monitor* _cbl_mon;  // Protects the fields below.
+  CompletedBufferNode* _completed_buffers_head;
+  CompletedBufferNode* _completed_buffers_tail;
+  size_t _n_completed_buffers;
+  size_t _process_completed_threshold;
+  volatile bool _process_completed;
+
+  // This (and the interpretation of the first element as a "next"
+  // pointer) are protected by the TLOQ_FL_lock.
+  Mutex* _fl_lock;
+  void** _buf_free_list;
+  size_t _buf_free_list_sz;
+
+  // The size of all buffers in the set.
+  size_t _sz;
+
+  bool _all_active;
+
+  // If true, notify_all on _cbl_mon when the threshold is reached.
+  bool _notify_when_complete;
+
+  // Maximum number of elements allowed on completed queue: after that,
+  // enqueuer does the work itself.  Zero indicates no maximum.
+  int _max_completed_queue;
+
+  int completed_buffers_list_length();
+  void assert_completed_buffer_list_len_correct_locked();
+  void assert_completed_buffer_list_len_correct();
+
+protected:
+  // A mutator thread does the the work of processing a buffer.
+  // Returns "true" iff the work is complete (and the buffer may be
+  // deallocated).
+  virtual bool mut_process_buffer(void** buf) {
+    ShouldNotReachHere();
+    return false;
+  }
+
+public:
+  // Create an empty ptr queue set.
+  PtrQueueSet(bool notify_when_complete = false);
+
+  // Because of init-order concerns, we can't pass these as constructor
+  // arguments.
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0) {
+    _max_completed_queue = max_completed_queue;
+    assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
+    _cbl_mon = cbl_mon; _fl_lock = fl_lock;
+  }
+
+  // Return an empty oop array of size _sz (required to be non-zero).
+  void** allocate_buffer();
+
+  // Return an empty buffer to the free list.  The "buf" argument is
+  // required to be a pointer to the head of an array of length "_sz".
+  void deallocate_buffer(void** buf);
+
+  // Declares that "buf" is a complete buffer.
+  void enqueue_complete_buffer(void** buf, size_t index = 0,
+                               bool ignore_max_completed = false);
+
+  bool completed_buffers_exist_dirty() {
+    return _n_completed_buffers > 0;
+  }
+
+  bool process_completed_buffers() { return _process_completed; }
+
+  bool active() { return _all_active; }
+
+  // Set the buffer size.  Should be called before any "enqueue" operation
+  // can be called.  And should only be called once.
+  void set_buffer_size(size_t sz);
+
+  // Get the buffer size.
+  size_t buffer_size() { return _sz; }
+
+  // Set the number of completed buffers that triggers log processing.
+  void set_process_completed_threshold(size_t sz);
+
+  // Must only be called at a safe point.  Indicates that the buffer free
+  // list size may be reduced, if that is deemed desirable.
+  void reduce_free_list();
+
+  size_t completed_buffers_num() { return _n_completed_buffers; }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+void PtrQueue::handle_zero_index() {
+  assert(0 == _index, "Precondition.");
+  // This thread records the full buffer and allocates a new one (while
+  // holding the lock if there is one).
+  void** buf = _buf;
+  _buf = qset()->allocate_buffer();
+  _sz = qset()->buffer_size();
+  _index = _sz;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  if (buf != NULL) {
+    if (_lock) {
+      locking_enqueue_completed_buffer(buf);
+    } else {
+      qset()->enqueue_complete_buffer(buf);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_satbQueue.cpp.incl"
+
+void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
+    _index = _sz;
+  }
+}
+
+void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl,
+                                          void** buf, size_t index, size_t sz) {
+  if (cl == NULL) return;
+  for (size_t i = index; i < sz; i += oopSize) {
+    oop obj = (oop)buf[byte_index_to_index((int)i)];
+    // There can be NULL entries because of destructors.
+    if (obj != NULL) {
+      cl->do_object(obj);
+    }
+  }
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+SATBMarkQueueSet::SATBMarkQueueSet() :
+  PtrQueueSet(),
+  _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/)
+{}
+
+void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                  int max_completed_queue,
+                                  Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  _shared_satb_queue.set_lock(lock);
+  if (ParallelGCThreads > 0) {
+    _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads);
+  }
+}
+
+
+void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->satb_mark_queue().handle_zero_index();
+}
+
+void SATBMarkQueueSet::set_active_all_threads(bool b) {
+  _all_active = b;
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().set_active(b);
+  }
+}
+
+void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
+  _closure = closure;
+}
+
+void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) {
+  assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition");
+  _par_closures[i] = par_closure;
+}
+
+void SATBMarkQueueSet::iterate_closure_all_threads() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(_closure);
+  }
+  shared_satb_queue()->apply_closure(_closure);
+}
+
+void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
+  SharedHeap* sh = SharedHeap::heap();
+  int parity = sh->strong_roots_parity();
+
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    if (t->claim_oops_do(true, parity)) {
+      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+    }
+  }
+  // We'll have worker 0 do this one.
+  if (worker == 0) {
+    shared_satb_queue()->apply_closure(_par_closures[0]);
+  }
+}
+
+bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
+                                                              int worker) {
+  CompletedBufferNode* nd = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    if (_completed_buffers_head != NULL) {
+      nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL;
+      _n_completed_buffers--;
+      if (_n_completed_buffers == 0) _process_completed = false;
+    }
+  }
+  ObjectClosure* cl = (par ? _par_closures[worker] : _closure);
+  if (nd != NULL) {
+    ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz);
+    deallocate_buffer(nd->buf);
+    delete nd;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void SATBMarkQueueSet::abandon_partial_marking() {
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _completed_buffers_tail = NULL;
+    _n_completed_buffers = 0;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  // So we can safely manipulate these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().reset();
+  }
+  shared_satb_queue()->reset();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ObjectClosure;
+class JavaThread;
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class ObjPtrQueue: public PtrQueue {
+public:
+  ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {}
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
+};
+
+
+
+class SATBMarkQueueSet: public PtrQueueSet {
+  ObjectClosure* _closure;
+  ObjectClosure** _par_closures;  // One per ParGCThread.
+
+  ObjPtrQueue _shared_satb_queue;
+
+  // Utility function to support sequential and parallel versions.  If
+  // "par" is true, then "worker" is the par thread id; if "false", worker
+  // is ignored.
+  bool apply_closure_to_completed_buffer_work(bool par, int worker);
+
+
+public:
+  SATBMarkQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Apply "set_active(b)" to all thread tloq's.  Should be called only
+  // with the world stopped.
+  void set_active_all_threads(bool b);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(ObjectClosure* closure);
+  // Set the parallel closures: pointer is an array of pointers to
+  // closures, one for each parallel GC thread.
+  void set_par_closure(int i, ObjectClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)
+  void iterate_closure_all_threads();
+  // Parallel version of the above.
+  void par_iterate_closure_all_threads(int worker);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, and return true.  If no
+  // completed buffers exist, return false.
+  bool apply_closure_to_completed_buffer() {
+    return apply_closure_to_completed_buffer_work(false, 0);
+  }
+  // Parallel version of the above.
+  bool par_apply_closure_to_completed_buffer(int worker) {
+    return apply_closure_to_completed_buffer_work(true, worker);
+  }
+
+  ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
+
+  // If a marking is being abandoned, reset any unprocessed log buffers.
+  void abandon_partial_marking();
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,530 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_sparsePRT.cpp.incl"
+
+#define SPARSE_PRT_VERBOSE 0
+
+#define UNROLL_CARD_LOOPS 1
+
+void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) {
+    sprt_iter->init(this);
+}
+
+void SparsePRTEntry::init(short region_ind) {
+  _region_ind = region_ind;
+  _next_index = NullEntry;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  _cards[0] = NullEntry;
+  _cards[1] = NullEntry;
+  _cards[2] = NullEntry;
+  _cards[3] = NullEntry;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry;
+#endif
+}
+
+bool SparsePRTEntry::contains_card(short card_index) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] == card_index) return true;
+  if (_cards[1] == card_index) return true;
+  if (_cards[2] == card_index) return true;
+  if (_cards[3] == card_index) return true;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] == card_index) return true;
+  }
+#endif
+  // Otherwise, we're full.
+  return false;
+}
+
+int SparsePRTEntry::num_valid_cards() const {
+  int sum = 0;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] != NullEntry) sum++;
+  if (_cards[1] != NullEntry) sum++;
+  if (_cards[2] != NullEntry) sum++;
+  if (_cards[3] != NullEntry) sum++;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] != NulLEntry) sum++;
+  }
+#endif
+  // Otherwise, we're full.
+  return sum;
+}
+
+SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  short c = _cards[0];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[0] = card_index; return added; }
+  c = _cards[1];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[1] = card_index; return added; }
+  c = _cards[2];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[2] = card_index; return added; }
+  c = _cards[3];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[3] = card_index; return added; }
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    short c = _cards[i];
+    if (c == card_index) return found;
+    if (c == NullEntry) { _cards[i] = card_index; return added; }
+  }
+#endif
+  // Otherwise, we're full.
+  return overflow;
+}
+
+void SparsePRTEntry::copy_cards(short* cards) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  cards[0] = _cards[0];
+  cards[1] = _cards[1];
+  cards[2] = _cards[2];
+  cards[3] = _cards[3];
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    cards[i] = _cards[i];
+  }
+#endif
+}
+
+void SparsePRTEntry::copy_cards(SparsePRTEntry* e) const {
+  copy_cards(&e->_cards[0]);
+}
+
+// ----------------------------------------------------------------------
+
+RSHashTable::RSHashTable(size_t capacity) :
+  _capacity(capacity), _capacity_mask(capacity-1),
+  _occupied_entries(0), _occupied_cards(0),
+  _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)),
+  _buckets(NEW_C_HEAP_ARRAY(short, capacity)),
+  _next_deleted(NULL), _deleted(false),
+  _free_list(NullEntry), _free_region(0)
+{
+  clear();
+}
+
+RSHashTable::~RSHashTable() {
+  if (_entries != NULL) {
+    FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries);
+    _entries = NULL;
+  }
+  if (_buckets != NULL) {
+    FREE_C_HEAP_ARRAY(short, _buckets);
+    _buckets = NULL;
+  }
+}
+
+void RSHashTable::clear() {
+  _occupied_entries = 0;
+  _occupied_cards = 0;
+  guarantee(_entries != NULL, "INV");
+  guarantee(_buckets != NULL, "INV");
+  // This will put -1 == NullEntry in the key field of all entries.
+  memset(_entries, -1, _capacity * sizeof(SparsePRTEntry));
+  memset(_buckets, -1, _capacity * sizeof(short));
+  _free_list = NullEntry;
+  _free_region = 0;
+}
+
+bool RSHashTable::add_card(short region_ind, short card_index) {
+  SparsePRTEntry* e = entry_for_region_ind_create(region_ind);
+  assert(e != NULL && e->r_ind() == region_ind,
+         "Postcondition of call above.");
+  SparsePRTEntry::AddCardResult res = e->add_card(card_index);
+  if (res == SparsePRTEntry::added) _occupied_cards++;
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("       after add_card[%d]: valid-cards = %d.",
+                pointer_delta(e, _entries, sizeof(SparsePRTEntry)),
+                e->num_valid_cards());
+#endif
+  assert(e->num_valid_cards() > 0, "Postcondition");
+  return res != SparsePRTEntry::overflow;
+}
+
+bool RSHashTable::get_cards(short region_ind, short* cards) {
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise...
+  assert(cur->r_ind() == region_ind, "Postcondition of loop + test above.");
+  assert(cur->num_valid_cards() > 0, "Inv");
+  cur->copy_cards(cards);
+  return true;
+}
+
+bool RSHashTable::delete_entry(short region_ind) {
+  short ind = (short) (region_ind & capacity_mask());
+  short* prev_loc = &_buckets[ind];
+  short cur_ind = *prev_loc;
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    prev_loc = cur->next_index_addr();
+    cur_ind = *prev_loc;
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise, splice out "cur".
+  *prev_loc = cur->next_index();
+  _occupied_cards -= cur->num_valid_cards();
+  free_entry(cur_ind);
+  _occupied_entries--;
+  return true;
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const {
+  assert(occupied_entries() < capacity(), "Precondition");
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  // XXX
+  // int k = 0;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    /*
+    k++;
+    if (k > 10) {
+      gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): "
+                    "k = %d, cur_ind = %d.", region_ind, k, cur_ind);
+      if (k >= 1000) {
+        while (1) ;
+      }
+    }
+    */
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind != NullEntry) {
+    assert(cur->r_ind() == region_ind, "Loop postcondition + test");
+    return cur;
+  } else {
+    return NULL;
+  }
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) {
+  SparsePRTEntry* res = entry_for_region_ind(region_ind);
+  if (res == NULL) {
+    short new_ind = alloc_entry();
+    assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room.");
+    res = entry(new_ind);
+    res->init(region_ind);
+    // Insert at front.
+    short ind = (short) (region_ind & capacity_mask());
+    res->set_next_index(_buckets[ind]);
+    _buckets[ind] = new_ind;
+    _occupied_entries++;
+  }
+  return res;
+}
+
+short RSHashTable::alloc_entry() {
+  short res;
+  if (_free_list != NullEntry) {
+    res = _free_list;
+    _free_list = entry(res)->next_index();
+    return res;
+  } else if ((size_t) _free_region+1 < capacity()) {
+    res = _free_region;
+    _free_region++;
+    return res;
+  } else {
+    return NullEntry;
+  }
+}
+
+
+void RSHashTable::free_entry(short fi) {
+  entry(fi)->set_next_index(_free_list);
+  _free_list = fi;
+}
+
+
+void RSHashTable::add_entry(SparsePRTEntry* e) {
+  assert(e->num_valid_cards() > 0, "Precondition.");
+  SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind());
+  e->copy_cards(e2);
+  _occupied_cards += e2->num_valid_cards();
+  assert(e2->num_valid_cards() > 0, "Postcondition.");
+}
+
+RSHashTable* RSHashTable::_head_deleted_list = NULL;
+
+void RSHashTable::add_to_deleted_list(RSHashTable* rsht) {
+  assert(!rsht->deleted(), "Should delete only once.");
+  rsht->set_deleted(true);
+  RSHashTable* hd = _head_deleted_list;
+  while (true) {
+    rsht->_next_deleted = hd;
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(rsht, &_head_deleted_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+RSHashTable* RSHashTable::get_from_deleted_list() {
+  RSHashTable* hd = _head_deleted_list;
+  while (hd != NULL) {
+    RSHashTable* next = hd->next_deleted();
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(next, &_head_deleted_list, hd);
+    if (res == hd) {
+      hd->set_next_deleted(NULL);
+      hd->set_deleted(false);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() {
+  short res;
+  while (_bl_ind != RSHashTable::NullEntry) {
+    res = _rsht->entry(_bl_ind)->card(0);
+    if (res != SparsePRTEntry::NullEntry) {
+      return res;
+    } else {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+    }
+  }
+  // Otherwise, none found:
+  return SparsePRTEntry::NullEntry;
+}
+
+size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) {
+  return
+    _heap_bot_card_ind
+    + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion)
+    + ci;
+}
+
+bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) {
+  _card_ind++;
+  short ci;
+  if (_card_ind < SparsePRTEntry::CardsPerEntry &&
+      ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) !=
+       SparsePRTEntry::NullEntry)) {
+    card_index = compute_card_ind(ci);
+    return true;
+  }
+  // Otherwise, must find the next valid entry.
+  _card_ind = 0;
+
+  if (_bl_ind != RSHashTable::NullEntry) {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+      ci = find_first_card_in_list();
+      if (ci != SparsePRTEntry::NullEntry) {
+        card_index = compute_card_ind(ci);
+        return true;
+      }
+  }
+  // If we didn't return above, must go to the next non-null table index.
+  _tbl_ind++;
+  while ((size_t)_tbl_ind < _rsht->capacity()) {
+    _bl_ind = _rsht->_buckets[_tbl_ind];
+    ci = find_first_card_in_list();
+    if (ci != SparsePRTEntry::NullEntry) {
+      card_index = compute_card_ind(ci);
+      return true;
+    }
+    // Otherwise, try next entry.
+    _tbl_ind++;
+  }
+  // Otherwise, there were no entry.
+  return false;
+}
+
+bool RSHashTable::contains_card(short region_index, short card_index) const {
+  SparsePRTEntry* e = entry_for_region_ind(region_index);
+  return (e != NULL && e->contains_card(card_index));
+}
+
+size_t RSHashTable::mem_size() const {
+  return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short));
+}
+
+
+// ----------------------------------------------------------------------
+
+SparsePRT* SparsePRT::_head_expanded_list = NULL;
+
+void SparsePRT::add_to_expanded_list(SparsePRT* sprt) {
+  // We could expand multiple times in a pause -- only put on list once.
+  if (sprt->expanded()) return;
+  sprt->set_expanded(true);
+  SparsePRT* hd = _head_expanded_list;
+  while (true) {
+    sprt->_next_expanded = hd;
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(sprt, &_head_expanded_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+SparsePRT* SparsePRT::get_from_expanded_list() {
+  SparsePRT* hd = _head_expanded_list;
+  while (hd != NULL) {
+    SparsePRT* next = hd->next_expanded();
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(next, &_head_expanded_list, hd);
+    if (res == hd) {
+      hd->set_next_expanded(NULL);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+
+void SparsePRT::cleanup_all() {
+  // First clean up all expanded tables so they agree on next and cur.
+  SparsePRT* sprt = get_from_expanded_list();
+  while (sprt != NULL) {
+    sprt->cleanup();
+    sprt = get_from_expanded_list();
+  }
+  // Now delete all deleted RSHashTables.
+  RSHashTable* rsht = RSHashTable::get_from_deleted_list();
+  while (rsht != NULL) {
+#if SPARSE_PRT_VERBOSE
+    gclog_or_tty->print_cr("About to delete RSHT " PTR_FORMAT ".", rsht);
+#endif
+    delete rsht;
+    rsht = RSHashTable::get_from_deleted_list();
+  }
+}
+
+
+SparsePRT::SparsePRT(HeapRegion* hr) :
+  _expanded(false), _next_expanded(NULL)
+{
+  _cur = new RSHashTable(InitialCapacity);
+  _next = _cur;
+}
+
+SparsePRT::~SparsePRT() {
+  assert(_next != NULL && _cur != NULL, "Inv");
+  if (_cur != _next) { delete _cur; }
+  delete _next;
+}
+
+
+size_t SparsePRT::mem_size() const {
+  // We ignore "_cur" here, because it either = _next, or else it is
+  // on the deleted list.
+  return sizeof(this) + _next->mem_size();
+}
+
+bool SparsePRT::add_card(short region_id, short card_index) {
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Adding card %d from region %d to region %d sparse.",
+                card_index, region_id, _hr->hrs_index());
+#endif
+  if (_next->occupied_entries() * 2 > _next->capacity()) {
+    expand();
+  }
+  return _next->add_card(region_id, card_index);
+}
+
+bool SparsePRT::get_cards(short region_id, short* cards) {
+  return _next->get_cards(region_id, cards);
+}
+
+bool SparsePRT::delete_entry(short region_id) {
+  return _next->delete_entry(region_id);
+}
+
+void SparsePRT::clear() {
+  // If they differ, _next is bigger then cur, so next has no chance of
+  // being the initial size.
+  if (_next != _cur) {
+    delete _next;
+  }
+
+  if (_cur->capacity() != InitialCapacity) {
+    delete _cur;
+    _cur = new RSHashTable(InitialCapacity);
+  } else {
+    _cur->clear();
+  }
+  _next = _cur;
+}
+
+void SparsePRT::cleanup() {
+  // Make sure that the current and next tables agree.  (Another mechanism
+  // takes care of deleting now-unused tables.)
+  _cur = _next;
+}
+
+void SparsePRT::expand() {
+  RSHashTable* last = _next;
+  _next = new RSHashTable(last->capacity() * 2);
+
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Expanded sparse table for %d to %d.",
+                _hr->hrs_index(), _next->capacity());
+#endif
+  for (size_t i = 0; i < last->capacity(); i++) {
+    SparsePRTEntry* e = last->entry((int)i);
+    if (e->valid_entry()) {
+#if SPARSE_PRT_VERBOSE
+      gclog_or_tty->print_cr("    During expansion, transferred entry for %d.",
+                    e->r_ind());
+#endif
+      _next->add_entry(e);
+    }
+  }
+  if (last != _cur)
+    RSHashTable::add_to_deleted_list(last);
+  add_to_expanded_list(this);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Sparse remembered set for a heap region (the "owning" region).  Maps
+// indices of other regions to short sequences of cards in the other region
+// that might contain pointers into the owner region.
+
+// These tables only expand while they are accessed in parallel --
+// deletions may be done in single-threaded code.  This allows us to allow
+// unsynchronized reads/iterations, as long as expansions caused by
+// insertions only enqueue old versions for deletions, but do not delete
+// old versions synchronously.
+
+
+class SparsePRTEntry {
+public:
+  enum SomePublicConstants {
+    CardsPerEntry = (short)4,
+    NullEntry = (short)-1,
+    DeletedEntry = (short)-2
+  };
+
+private:
+  short _region_ind;
+  short _next_index;
+  short _cards[CardsPerEntry];
+
+public:
+
+  // Set the region_ind to the given value, and delete all cards.
+  inline void init(short region_ind);
+
+  short r_ind() const { return _region_ind; }
+  bool valid_entry() const { return r_ind() >= 0; }
+  void set_r_ind(short rind) { _region_ind = rind; }
+
+  short next_index() const { return _next_index; }
+  short* next_index_addr() { return &_next_index; }
+  void set_next_index(short ni) { _next_index = ni; }
+
+  // Returns "true" iff the entry contains the given card index.
+  inline bool contains_card(short card_index) const;
+
+  // Returns the number of non-NULL card entries.
+  inline int num_valid_cards() const;
+
+  // Requires that the entry not contain the given card index.  If there is
+  // space available, add the given card index to the entry and return
+  // "true"; otherwise, return "false" to indicate that the entry is full.
+  enum AddCardResult {
+    overflow,
+    found,
+    added
+  };
+  inline AddCardResult add_card(short card_index);
+
+  // Copy the current entry's cards into "cards".
+  inline void copy_cards(short* cards) const;
+  // Copy the current entry's cards into the "_card" array of "e."
+  inline void copy_cards(SparsePRTEntry* e) const;
+
+  inline short card(int i) const { return _cards[i]; }
+};
+
+
+class RSHashTable : public CHeapObj {
+
+  friend class RSHashTableIter;
+
+  enum SomePrivateConstants {
+    NullEntry = -1
+  };
+
+  size_t _capacity;
+  size_t _capacity_mask;
+  size_t _occupied_entries;
+  size_t _occupied_cards;
+
+  SparsePRTEntry* _entries;
+  short* _buckets;
+  short  _free_region;
+  short  _free_list;
+
+  static RSHashTable* _head_deleted_list;
+  RSHashTable* _next_deleted;
+  RSHashTable* next_deleted() { return _next_deleted; }
+  void set_next_deleted(RSHashTable* rsht) { _next_deleted = rsht; }
+  bool _deleted;
+  void set_deleted(bool b) { _deleted = b; }
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise returns "NULL."
+  SparsePRTEntry* entry_for_region_ind(short region_ind) const;
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise allocates, initializes, inserts and
+  // returns a new entry for "region_ind".
+  SparsePRTEntry* entry_for_region_ind_create(short region_ind);
+
+  // Returns the index of the next free entry in "_entries".
+  short alloc_entry();
+  // Declares the entry "fi" to be free.  (It must have already been
+  // deleted from any bucket lists.
+  void free_entry(short fi);
+
+public:
+  RSHashTable(size_t capacity);
+  ~RSHashTable();
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  bool get_cards(short region_id, short* cards);
+  bool delete_entry(short region_id);
+
+  bool contains_card(short region_id, short card_index) const;
+
+  void add_entry(SparsePRTEntry* e);
+
+  void clear();
+
+  size_t capacity() const      { return _capacity;       }
+  size_t capacity_mask() const { return _capacity_mask;  }
+  size_t occupied_entries() const { return _occupied_entries; }
+  size_t occupied_cards() const   { return _occupied_cards;   }
+  size_t mem_size() const;
+  bool deleted() { return _deleted; }
+
+  SparsePRTEntry* entry(int i) const { return &_entries[i]; }
+
+  void print();
+
+  static void add_to_deleted_list(RSHashTable* rsht);
+  static RSHashTable* get_from_deleted_list();
+
+
+};
+
+  // ValueObj because will be embedded in HRRS iterator.
+class RSHashTableIter: public CHeapObj {
+    short _tbl_ind;
+    short _bl_ind;
+    short _card_ind;
+    RSHashTable* _rsht;
+    size_t _heap_bot_card_ind;
+
+    enum SomePrivateConstants {
+      CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+    };
+
+    // If the bucket list pointed to by _bl_ind contains a card, sets
+    // _bl_ind to the index of that entry, and returns the card.
+    // Otherwise, returns SparseEntry::NullEnty.
+    short find_first_card_in_list();
+    // Computes the proper card index for the card whose offset in the
+    // current region (as indicated by _bl_ind) is "ci".
+    // This is subject to errors when there is iteration concurrent with
+    // modification, but these errors should be benign.
+    size_t compute_card_ind(short ci);
+
+  public:
+    RSHashTableIter(size_t heap_bot_card_ind) :
+      _tbl_ind(RSHashTable::NullEntry),
+      _bl_ind(RSHashTable::NullEntry),
+      _card_ind((SparsePRTEntry::CardsPerEntry-1)),
+      _rsht(NULL),
+      _heap_bot_card_ind(heap_bot_card_ind)
+    {}
+
+    void init(RSHashTable* rsht) {
+      _rsht = rsht;
+      _tbl_ind = -1; // So that first increment gets to 0.
+      _bl_ind = RSHashTable::NullEntry;
+      _card_ind = (SparsePRTEntry::CardsPerEntry-1);
+    }
+
+    bool has_next(size_t& card_index);
+
+  };
+
+// Concurrent accesss to a SparsePRT must be serialized by some external
+// mutex.
+
+class SparsePRTIter;
+
+class SparsePRT : public CHeapObj {
+  //  Iterations are done on the _cur hash table, since they only need to
+  //  see entries visible at the start of a collection pause.
+  //  All other operations are done using the _next hash table.
+  RSHashTable* _cur;
+  RSHashTable* _next;
+
+  HeapRegion* _hr;
+
+  enum SomeAdditionalPrivateConstants {
+    InitialCapacity = 16
+  };
+
+  void expand();
+
+  bool _expanded;
+
+  bool expanded() { return _expanded; }
+  void set_expanded(bool b) { _expanded = b; }
+
+  SparsePRT* _next_expanded;
+
+  SparsePRT* next_expanded() { return _next_expanded; }
+  void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
+
+
+  static SparsePRT* _head_expanded_list;
+
+public:
+  SparsePRT(HeapRegion* hr);
+
+  ~SparsePRT();
+
+  size_t occupied() const { return _next->occupied_cards(); }
+  size_t mem_size() const;
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  // If the table hold an entry for "region_ind",  Copies its
+  // cards into "cards", which must be an array of length at least
+  // "CardsPerEntry", and returns "true"; otherwise, returns "false".
+  bool get_cards(short region_ind, short* cards);
+
+  // If there is an entry for "region_ind", removes it and return "true";
+  // otherwise returns "false."
+  bool delete_entry(short region_ind);
+
+  // Clear the table, and reinitialize to initial capacity.
+  void clear();
+
+  // Ensure that "_cur" and "_next" point to the same table.
+  void cleanup();
+
+  // Clean up all tables on the expanded list.  Called single threaded.
+  static void cleanup_all();
+  RSHashTable* next() const { return _next; }
+
+
+  void init_iterator(SparsePRTIter* sprt_iter);
+
+  static void add_to_expanded_list(SparsePRT* sprt);
+  static SparsePRT* get_from_expanded_list();
+
+  bool contains_card(short region_id, short card_index) const {
+    return _next->contains_card(region_id, card_index);
+  }
+
+#if 0
+  void verify_is_cleared();
+  void print();
+#endif
+};
+
+
+class SparsePRTIter: public /* RSHashTable:: */RSHashTableIter {
+public:
+  SparsePRTIter(size_t heap_bot_card_ind) :
+    /* RSHashTable:: */RSHashTableIter(heap_bot_card_ind)
+  {}
+
+  void init(const SparsePRT* sprt) {
+    RSHashTableIter::init(sprt->next());
+  }
+  bool has_next(size_t& card_index) {
+    return RSHashTableIter::has_next(card_index);
+  }
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_survRateGroup.cpp.incl"
+
+SurvRateGroup::SurvRateGroup(G1CollectorPolicy* g1p,
+                             const char* name,
+                             size_t summary_surv_rates_len) :
+    _g1p(g1p), _name(name),
+    _all_regions_allocated(0),
+    _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0),
+    _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL),
+    _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0),
+    _summary_surv_rates_len(summary_surv_rates_len),
+    _summary_surv_rates_max_len(0),
+    _summary_surv_rates(NULL) {
+
+  // the following will set up the arrays with length 1
+  _curr_length = 1;
+  stop_adding_regions();
+  guarantee( _array_length == 1, "invariant" );
+  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
+  _surv_rate_pred[0]->add(0.4);
+  all_surviving_words_recorded(false);
+  _curr_length = 0;
+
+  if (summary_surv_rates_len > 0) {
+    size_t length = summary_surv_rates_len;
+      _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
+    if (_summary_surv_rates == NULL) {
+      vm_exit_out_of_memory(sizeof(NumberSeq*) * length,
+                            "Not enough space for surv rate summary");
+    }
+    for (size_t i = 0; i < length; ++i)
+      _summary_surv_rates[i] = new NumberSeq();
+  }
+
+  start_adding_regions();
+}
+
+void
+SurvRateGroup::start_adding_regions() {
+  _setup_seq_num   = _array_length;
+  _curr_length     = _scan_only_prefix;
+  _accum_surv_rate = 0.0;
+
+#if 0
+  gclog_or_tty->print_cr("start adding regions, seq num %d, length %d",
+                         _setup_seq_num, _curr_length);
+#endif // 0
+}
+
+void
+SurvRateGroup::stop_adding_regions() {
+  size_t length = _curr_length;
+
+#if 0
+  gclog_or_tty->print_cr("stop adding regions, length %d", length);
+#endif // 0
+
+  if (length > _array_length) {
+    double* old_surv_rate = _surv_rate;
+    double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
+    TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;
+
+    _surv_rate = NEW_C_HEAP_ARRAY(double, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                            "Not enough space for surv rate array.");
+    }
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length);
+    if (_accum_surv_rate_pred == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                         "Not enough space for accum surv rate pred array.");
+    }
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length,
+                            "Not enough space for surv rate pred array.");
+    }
+
+    for (size_t i = 0; i < _array_length; ++i)
+      _surv_rate_pred[i] = old_surv_rate_pred[i];
+
+#if 0
+    gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d",
+                  _array_length, length - 1);
+#endif // 0
+
+    for (size_t i = _array_length; i < length; ++i) {
+      _surv_rate_pred[i] = new TruncatedSeq(10);
+      // _surv_rate_pred[i]->add(last_pred);
+    }
+
+    _array_length = length;
+
+    if (old_surv_rate != NULL)
+      FREE_C_HEAP_ARRAY(double, old_surv_rate);
+    if (old_accum_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred);
+    if (old_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred);
+  }
+
+  for (size_t i = 0; i < _array_length; ++i)
+    _surv_rate[i] = 0.0;
+}
+
+double
+SurvRateGroup::accum_surv_rate(size_t adjustment) {
+  // we might relax this one in the future...
+  guarantee( adjustment == 0 || adjustment == 1, "pre-condition" );
+
+  double ret = _accum_surv_rate;
+  if (adjustment > 0) {
+    TruncatedSeq* seq = get_seq(_curr_length+1);
+    double surv_rate = _g1p->get_new_prediction(seq);
+    ret += surv_rate;
+  }
+
+  return ret;
+}
+
+int
+SurvRateGroup::next_age_index() {
+  TruncatedSeq* seq = get_seq(_curr_length);
+  double surv_rate = _g1p->get_new_prediction(seq);
+  _accum_surv_rate += surv_rate;
+
+  ++_curr_length;
+  return (int) ++_all_regions_allocated;
+}
+
+void
+SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) {
+  guarantee( scan_only_prefix <= _curr_length, "pre-condition" );
+  _scan_only_prefix = scan_only_prefix;
+}
+
+void
+SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) {
+  guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length,
+             "pre-condition" );
+  guarantee( _surv_rate[age_in_group] <= 0.00001,
+             "should only update each slot once" );
+
+  double surv_rate = (double) surv_words / (double) HeapRegion::GrainWords;
+  _surv_rate[age_in_group] = surv_rate;
+  _surv_rate_pred[age_in_group]->add(surv_rate);
+  if ((size_t)age_in_group < _summary_surv_rates_len) {
+    _summary_surv_rates[age_in_group]->add(surv_rate);
+    if ((size_t)(age_in_group+1) > _summary_surv_rates_max_len)
+      _summary_surv_rates_max_len = age_in_group+1;
+  }
+}
+
+void
+SurvRateGroup::all_surviving_words_recorded(bool propagate) {
+  if (propagate && _curr_length > 0) { // conservative
+    double surv_rate = _surv_rate_pred[_curr_length-1]->last();
+
+#if 0
+    gclog_or_tty->print_cr("propagating %1.2lf from %d to %d",
+                  surv_rate, _curr_length, _array_length - 1);
+#endif // 0
+
+    for (size_t i = _curr_length; i < _array_length; ++i) {
+      guarantee( _surv_rate[i] <= 0.00001,
+                 "the slot should not have been updated" );
+      _surv_rate_pred[i]->add(surv_rate);
+    }
+  }
+
+  double accum = 0.0;
+  double pred = 0.0;
+  for (size_t i = 0; i < _array_length; ++i) {
+    pred = _g1p->get_new_prediction(_surv_rate_pred[i]);
+    if (pred > 1.0) pred = 1.0;
+    accum += pred;
+    _accum_surv_rate_pred[i] = accum;
+    // gclog_or_tty->print_cr("age %3d, accum %10.2lf", i, accum);
+  }
+  _last_pred = pred;
+}
+
+#ifndef PRODUCT
+void
+SurvRateGroup::print() {
+  gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)",
+                _name, _curr_length, _scan_only_prefix);
+  for (size_t i = 0; i < _curr_length; ++i) {
+    gclog_or_tty->print_cr("    age %4d   surv rate %6.2lf %%   pred %6.2lf %%%s",
+                  i, _surv_rate[i] * 100.0,
+                  _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
+                  (i < _scan_only_prefix) ? " S-O" : "    ");
+  }
+}
+
+void
+SurvRateGroup::print_surv_rate_summary() {
+  size_t length = _summary_surv_rates_max_len;
+  if (length == 0)
+    return;
+
+  gclog_or_tty->print_cr("");
+  gclog_or_tty->print_cr("%s Rate Summary (for up to age %d)", _name, length-1);
+  gclog_or_tty->print_cr("      age range     survival rate (avg)      samples (avg)");
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  size_t index = 0;
+  size_t limit = MIN2((int) length, 10);
+  while (index < limit) {
+    gclog_or_tty->print_cr("           %4d                 %6.2lf%%             %6.2lf",
+                  index, _summary_surv_rates[index]->avg() * 100.0,
+                  (double) _summary_surv_rates[index]->num());
+    ++index;
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  int num = 0;
+  double sum = 0.0;
+  int samples = 0;
+  while (index < length) {
+    ++num;
+    sum += _summary_surv_rates[index]->avg() * 100.0;
+    samples += _summary_surv_rates[index]->num();
+    ++index;
+
+    if (index == length || num % 10 == 0) {
+      gclog_or_tty->print_cr("   %4d .. %4d                 %6.2lf%%             %6.2lf",
+                    (index-1) / 10 * 10, index-1, sum / (double) num,
+                    (double) samples / (double) num);
+      sum = 0.0;
+      num = 0;
+      samples = 0;
+    }
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+}
+#endif // PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectorPolicy;
+
+class SurvRateGroup : public CHeapObj {
+private:
+  G1CollectorPolicy* _g1p;
+  const char* _name;
+
+  size_t  _array_length;
+  double* _surv_rate;
+  double* _accum_surv_rate_pred;
+  double  _last_pred;
+  double  _accum_surv_rate;
+  TruncatedSeq** _surv_rate_pred;
+  NumberSeq**    _summary_surv_rates;
+  size_t         _summary_surv_rates_len;
+  size_t         _summary_surv_rates_max_len;
+
+  int _all_regions_allocated;
+  size_t _curr_length;
+  size_t _scan_only_prefix;
+  size_t _setup_seq_num;
+
+public:
+  SurvRateGroup(G1CollectorPolicy* g1p,
+                const char* name,
+                size_t summary_surv_rates_len);
+  void start_adding_regions();
+  void stop_adding_regions();
+  void record_scan_only_prefix(size_t scan_only_prefix);
+  void record_surviving_words(int age_in_group, size_t surv_words);
+  void all_surviving_words_recorded(bool propagate);
+  const char* name() { return _name; }
+
+  size_t region_num() { return _curr_length; }
+  size_t scan_only_length() { return _scan_only_prefix; }
+  double accum_surv_rate_pred(int age) {
+    assert(age >= 0, "must be");
+    if ((size_t)age < _array_length)
+      return _accum_surv_rate_pred[age];
+    else {
+      double diff = (double) (age - _array_length + 1);
+      return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred;
+    }
+  }
+
+  double accum_surv_rate(size_t adjustment);
+
+  TruncatedSeq* get_seq(size_t age) {
+    guarantee( 0 <= age, "pre-condition" );
+    if (age >= _setup_seq_num) {
+      guarantee( _setup_seq_num > 0, "invariant" );
+      age = _setup_seq_num-1;
+    }
+    TruncatedSeq* seq = _surv_rate_pred[age];
+    guarantee( seq != NULL, "invariant" );
+    return seq;
+  }
+
+  int next_age_index();
+  int age_in_group(int age_index) {
+    int ret = (int) (_all_regions_allocated -  age_index);
+    assert( ret >= 0, "invariant" );
+    return ret;
+  }
+  int recalculate_age_index(int age_index) {
+    int new_age_index = (int) _scan_only_prefix - age_in_group(age_index);
+    guarantee( new_age_index >= 0, "invariant" );
+    return new_age_index;
+  }
+  void finished_recalculating_age_indexes() {
+    _all_regions_allocated = (int) _scan_only_prefix;
+  }
+
+#ifndef PRODUCT
+  void print();
+  void print_surv_rate_summary();
+#endif // PRODUCT
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vm_operations_g1.cpp.incl"
+
+void VM_G1CollectForAllocation::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _res = g1h->satisfy_failed_allocation(_size);
+  assert(g1h->is_in_or_null(_res), "result not in heap");
+}
+
+void VM_G1CollectFull::doit() {
+  JvmtiGCFullMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, _gc_cause);
+  g1h->do_full_collection(false /* clear_all_soft_refs */);
+}
+
+void VM_G1IncCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause);
+  g1h->do_collection_pause_at_safepoint(NULL);
+}
+
+void VM_G1PopRegionCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  g1h->do_collection_pause_at_safepoint(_pop_region);
+}
+
+
+void VM_CGC_Operation::doit() {
+  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+  TraceTime t(_printGCMessage, PrintGC, true, gclog_or_tty);
+  SharedHeap* sh = SharedHeap::heap();
+  // This could go away if CollectedHeap gave access to _gc_is_active...
+  if (sh != NULL) {
+    IsGCActiveMark x;
+    _cl->do_void();
+  } else {
+    _cl->do_void();
+  }
+}
+
+bool VM_CGC_Operation::doit_prologue() {
+  Heap_lock->lock();
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true;
+  return true;
+}
+
+void VM_CGC_Operation::doit_epilogue() {
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false;
+  Heap_lock->unlock();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// VM_operations for the G1 collector.
+// VM_GC_Operation:
+//   - VM_CGC_Operation
+//   - VM_G1CollectFull
+//   - VM_G1CollectForAllocation
+//   - VM_G1IncCollectionPause
+//   - VM_G1PopRegionCollectionPause
+
+class VM_G1CollectFull: public VM_GC_Operation {
+ private:
+ public:
+  VM_G1CollectFull(int gc_count_before,
+                   GCCause::Cause gc_cause)
+    : VM_GC_Operation(gc_count_before)
+  {
+    _gc_cause = gc_cause;
+  }
+  ~VM_G1CollectFull() {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectFull; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "full garbage-first collection";
+  }
+};
+
+class VM_G1CollectForAllocation: public VM_GC_Operation {
+ private:
+  HeapWord*   _res;
+  size_t      _size;                       // size of object to be allocated
+ public:
+  VM_G1CollectForAllocation(size_t size, int gc_count_before)
+    : VM_GC_Operation(gc_count_before) {
+    _size        = size;
+    _res         = NULL;
+  }
+  ~VM_G1CollectForAllocation()        {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first collection to satisfy allocation";
+  }
+  HeapWord* result() { return _res; }
+};
+
+class VM_G1IncCollectionPause: public VM_GC_Operation {
+ public:
+  VM_G1IncCollectionPause(int gc_count_before) :
+    VM_GC_Operation(gc_count_before) {}
+  virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first incremental collection pause";
+  }
+};
+
+class VM_G1PopRegionCollectionPause: public VM_GC_Operation {
+  HeapRegion* _pop_region;
+ public:
+  VM_G1PopRegionCollectionPause(int gc_count_before, HeapRegion* pop_region) :
+    VM_GC_Operation(gc_count_before),
+    _pop_region(pop_region)
+  {}
+  virtual VMOp_Type type() const { return VMOp_G1PopRegionCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first popular region collection pause";
+  }
+};
+
+// Concurrent GC stop-the-world operations such as initial and final mark;
+// consider sharing these with CMS's counterparts.
+class VM_CGC_Operation: public VM_Operation {
+  VoidClosure* _cl;
+  const char* _printGCMessage;
+ public:
+  VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg) :
+    _cl(cl),
+    _printGCMessage(printGCMsg)
+    {}
+
+  ~VM_CGC_Operation() {}
+
+  virtual VMOp_Type type() const { return VMOp_CGC_Operation; }
+  virtual void doit();
+  virtual bool doit_prologue();
+  virtual void doit_epilogue();
+  virtual const char* name() const {
+    return "concurrent gc";
+  }
+};
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Jul 05 16:43:17 2017 +0200
@@ -125,17 +125,6 @@
 
 compactingPermGenGen.cpp                concurrentMarkSweepGeneration.inline.hpp
 
-concurrentGCThread.cpp                  concurrentGCThread.hpp
-concurrentGCThread.cpp                  init.hpp
-concurrentGCThread.cpp                  instanceRefKlass.hpp
-concurrentGCThread.cpp                  interfaceSupport.hpp
-concurrentGCThread.cpp                  java.hpp
-concurrentGCThread.cpp                  javaCalls.hpp
-concurrentGCThread.cpp                  oop.inline.hpp
-concurrentGCThread.cpp                  systemDictionary.hpp
-
-concurrentGCThread.hpp                  thread.hpp
-
 concurrentMarkSweepGeneration.cpp       cardTableRS.hpp
 concurrentMarkSweepGeneration.cpp       cmsAdaptiveSizePolicy.hpp
 concurrentMarkSweepGeneration.cpp       cmsCollectorPolicy.hpp
@@ -167,7 +156,7 @@
 concurrentMarkSweepGeneration.cpp       vmCMSOperations.hpp
 concurrentMarkSweepGeneration.cpp       vmThread.hpp
 
-concurrentMarkSweepGeneration.hpp       bitMap.hpp
+concurrentMarkSweepGeneration.hpp       bitMap.inline.hpp
 concurrentMarkSweepGeneration.hpp       freeBlockDictionary.hpp
 concurrentMarkSweepGeneration.hpp       gSpaceCounters.hpp
 concurrentMarkSweepGeneration.hpp       gcStats.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,351 @@
+//
+// Copyright 2004-2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+
+// NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps!
+
+bufferingOopClosure.hpp			genOopClosures.hpp
+bufferingOopClosure.hpp			generation.hpp
+bufferingOopClosure.hpp			os.hpp
+
+cardTableRS.cpp				concurrentMark.hpp
+cardTableRS.cpp				g1SATBCardTableModRefBS.hpp
+
+collectionSetChooser.cpp		g1CollectedHeap.hpp
+collectionSetChooser.cpp		g1CollectorPolicy.hpp
+collectionSetChooser.cpp		collectionSetChooser.hpp
+
+collectionSetChooser.hpp		heapRegion.hpp
+collectionSetChooser.hpp                growableArray.hpp
+
+concurrentG1Refine.cpp			atomic.hpp
+concurrentG1Refine.cpp			concurrentG1Refine.hpp
+concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
+concurrentG1Refine.cpp			copy.hpp
+concurrentG1Refine.cpp			g1CollectedHeap.hpp
+concurrentG1Refine.cpp			g1RemSet.hpp
+
+concurrentG1Refine.hpp			globalDefinitions.hpp
+
+concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
+concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
+concurrentG1RefineThread.cpp		g1CollectedHeap.hpp
+concurrentG1RefineThread.cpp            g1CollectorPolicy.hpp
+concurrentG1RefineThread.cpp		handles.inline.hpp
+concurrentG1RefineThread.cpp		mutexLocker.hpp
+concurrentG1RefineThread.cpp		resourceArea.hpp
+
+concurrentG1RefineThread.hpp		concurrentGCThread.hpp
+concurrentG1RefineThread.hpp		coTracker.hpp
+
+concurrentMark.cpp			concurrentMark.hpp
+concurrentMark.cpp			concurrentMarkThread.inline.hpp
+concurrentMark.cpp			g1CollectedHeap.inline.hpp
+concurrentMark.cpp                      g1CollectorPolicy.hpp
+concurrentMark.cpp                      g1RemSet.hpp
+concurrentMark.cpp		        gcOverheadReporter.hpp
+concurrentMark.cpp		        genOopClosures.inline.hpp
+concurrentMark.cpp                      heapRegionRemSet.hpp
+concurrentMark.cpp                      heapRegionSeq.inline.hpp
+concurrentMark.cpp                      handles.inline.hpp
+concurrentMark.cpp			java.hpp
+concurrentMark.cpp			oop.inline.hpp
+concurrentMark.cpp                      referencePolicy.hpp
+concurrentMark.cpp			resourceArea.hpp
+concurrentMark.cpp			symbolTable.hpp
+
+concurrentMark.hpp			coTracker.hpp
+concurrentMark.hpp			heapRegion.hpp
+concurrentMark.hpp			taskqueue.hpp
+
+concurrentMarkThread.cpp		concurrentMarkThread.inline.hpp
+concurrentMarkThread.cpp		g1CollectedHeap.inline.hpp
+concurrentMarkThread.cpp		g1CollectorPolicy.hpp
+concurrentMarkThread.cpp                g1MMUTracker.hpp
+concurrentMarkThread.cpp		resourceArea.hpp
+concurrentMarkThread.cpp		vm_operations_g1.hpp
+concurrentMarkThread.cpp                vmThread.hpp
+
+concurrentMarkThread.hpp		concurrentGCThread.hpp
+
+concurrentMarkThread.inline.hpp		concurrentMark.hpp
+concurrentMarkThread.inline.hpp		concurrentMarkThread.hpp
+
+concurrentZFThread.cpp			concurrentZFThread.hpp
+concurrentZFThread.cpp			heapRegion.hpp
+concurrentZFThread.cpp			g1CollectedHeap.inline.hpp
+concurrentZFThread.cpp			copy.hpp
+concurrentZFThread.cpp			mutexLocker.hpp
+concurrentZFThread.cpp			space.inline.hpp
+
+concurrentZFThread.hpp			concurrentGCThread.hpp
+concurrentZFThread.hpp			coTracker.hpp
+
+dirtyCardQueue.cpp                      atomic.hpp
+dirtyCardQueue.cpp                      dirtyCardQueue.hpp
+dirtyCardQueue.cpp			heapRegionRemSet.hpp
+dirtyCardQueue.cpp                      mutexLocker.hpp
+dirtyCardQueue.cpp                      ptrQueue.inline.hpp
+dirtyCardQueue.cpp                      safepoint.hpp
+dirtyCardQueue.cpp                      thread.hpp
+dirtyCardQueue.cpp                      thread_<os_family>.inline.hpp
+dirtyCardQueue.cpp                      workgroup.hpp
+
+dirtyCardQueue.hpp                      allocation.hpp
+dirtyCardQueue.hpp                      ptrQueue.hpp
+
+g1BlockOffsetTable.cpp			g1BlockOffsetTable.inline.hpp
+g1BlockOffsetTable.cpp			java.hpp
+g1BlockOffsetTable.cpp			oop.inline.hpp
+g1BlockOffsetTable.cpp			space.hpp
+
+g1BlockOffsetTable.hpp			globalDefinitions.hpp
+g1BlockOffsetTable.hpp			memRegion.hpp
+g1BlockOffsetTable.hpp			virtualspace.hpp
+
+g1BlockOffsetTable.inline.hpp		g1BlockOffsetTable.hpp
+g1BlockOffsetTable.inline.hpp		space.hpp
+
+g1CollectedHeap.cpp                     aprofiler.hpp
+g1CollectedHeap.cpp                     bufferingOopClosure.hpp
+g1CollectedHeap.cpp                     concurrentG1Refine.hpp
+g1CollectedHeap.cpp                     concurrentG1RefineThread.hpp
+g1CollectedHeap.cpp			concurrentMarkThread.inline.hpp
+g1CollectedHeap.cpp                     concurrentZFThread.hpp
+g1CollectedHeap.cpp                     g1CollectedHeap.inline.hpp
+g1CollectedHeap.cpp                     g1CollectorPolicy.hpp
+g1CollectedHeap.cpp                     g1MarkSweep.hpp
+g1CollectedHeap.cpp                     g1RemSet.hpp
+g1CollectedHeap.cpp                     g1OopClosures.inline.hpp
+g1CollectedHeap.cpp                     genOopClosures.inline.hpp
+g1CollectedHeap.cpp                     gcLocker.inline.hpp
+g1CollectedHeap.cpp                     gcOverheadReporter.hpp
+g1CollectedHeap.cpp                     generationSpec.hpp
+g1CollectedHeap.cpp                     heapRegionRemSet.hpp
+g1CollectedHeap.cpp                     heapRegionSeq.inline.hpp
+g1CollectedHeap.cpp                     icBuffer.hpp
+g1CollectedHeap.cpp                     isGCActiveMark.hpp
+g1CollectedHeap.cpp			oop.inline.hpp
+g1CollectedHeap.cpp			oop.pcgc.inline.hpp
+g1CollectedHeap.cpp			parGCAllocBuffer.hpp
+g1CollectedHeap.cpp                     vm_operations_g1.hpp
+g1CollectedHeap.cpp                     vmThread.hpp
+
+g1CollectedHeap.hpp                     barrierSet.hpp
+g1CollectedHeap.hpp                     heapRegion.hpp
+g1CollectedHeap.hpp                     memRegion.hpp
+g1CollectedHeap.hpp                     sharedHeap.hpp
+
+g1CollectedHeap.inline.hpp              concurrentMark.hpp
+g1CollectedHeap.inline.hpp              g1CollectedHeap.hpp
+g1CollectedHeap.inline.hpp              heapRegionSeq.hpp
+g1CollectedHeap.inline.hpp		taskqueue.hpp
+
+g1CollectorPolicy.cpp			concurrentG1Refine.hpp
+g1CollectorPolicy.cpp			concurrentMark.hpp
+g1CollectorPolicy.cpp			concurrentMarkThread.inline.hpp
+g1CollectorPolicy.cpp			debug.hpp
+g1CollectorPolicy.cpp			java.hpp
+g1CollectorPolicy.cpp                   g1CollectedHeap.hpp
+g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
+g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
+g1CollectorPolicy.cpp			mutexLocker.hpp
+
+g1CollectorPolicy.hpp                   collectorPolicy.hpp
+g1CollectorPolicy.hpp                   collectionSetChooser.hpp
+g1CollectorPolicy.hpp			g1MMUTracker.hpp
+
+g1_globals.cpp				g1_globals.hpp
+
+g1_globals.hpp                          globals.hpp
+
+globals.cpp                             g1_globals.hpp
+top.hpp                                 g1_globals.hpp
+
+g1MarkSweep.cpp                         aprofiler.hpp
+g1MarkSweep.cpp                         biasedLocking.hpp
+g1MarkSweep.cpp                         codeCache.hpp
+g1MarkSweep.cpp                         events.hpp
+g1MarkSweep.cpp                         fprofiler.hpp
+g1MarkSweep.hpp                         g1CollectedHeap.hpp
+g1MarkSweep.cpp                         g1MarkSweep.hpp
+g1MarkSweep.cpp                         gcLocker.hpp
+g1MarkSweep.cpp                         genCollectedHeap.hpp
+g1MarkSweep.hpp                         heapRegion.hpp
+g1MarkSweep.cpp                         icBuffer.hpp
+g1MarkSweep.cpp                         instanceRefKlass.hpp
+g1MarkSweep.cpp                         javaClasses.hpp
+g1MarkSweep.cpp				jvmtiExport.hpp
+g1MarkSweep.cpp                         copy.hpp
+g1MarkSweep.cpp                         modRefBarrierSet.hpp
+g1MarkSweep.cpp                         oop.inline.hpp
+g1MarkSweep.cpp                         referencePolicy.hpp
+g1MarkSweep.cpp                         space.hpp
+g1MarkSweep.cpp                         symbolTable.hpp
+g1MarkSweep.cpp                         synchronizer.hpp
+g1MarkSweep.cpp                         systemDictionary.hpp
+g1MarkSweep.cpp                         thread.hpp
+g1MarkSweep.cpp                         vmSymbols.hpp
+g1MarkSweep.cpp                         vmThread.hpp
+
+g1MarkSweep.hpp                         generation.hpp
+g1MarkSweep.hpp                         growableArray.hpp
+g1MarkSweep.hpp                         markOop.hpp
+g1MarkSweep.hpp                         genMarkSweep.hpp
+g1MarkSweep.hpp                         oop.hpp
+g1MarkSweep.hpp                         timer.hpp
+g1MarkSweep.hpp                         universe.hpp
+
+g1OopClosures.inline.hpp		concurrentMark.hpp
+g1OopClosures.inline.hpp		g1OopClosures.hpp
+g1OopClosures.inline.hpp		g1CollectedHeap.hpp
+g1OopClosures.inline.hpp		g1RemSet.hpp
+
+g1MMUTracker.cpp			g1MMUTracker.hpp
+g1MMUTracker.cpp			ostream.hpp
+g1MMUTracker.cpp			mutexLocker.hpp
+
+g1MMUTracker.hpp			debug.hpp
+
+g1RemSet.cpp				bufferingOopClosure.hpp
+g1RemSet.cpp				concurrentG1Refine.hpp
+g1RemSet.cpp				concurrentG1RefineThread.hpp
+g1RemSet.cpp				g1BlockOffsetTable.inline.hpp
+g1RemSet.cpp				g1CollectedHeap.inline.hpp
+g1RemSet.cpp				g1CollectorPolicy.hpp
+g1RemSet.cpp				g1RemSet.inline.hpp
+g1RemSet.cpp				g1OopClosures.inline.hpp
+g1RemSet.cpp				heapRegionSeq.inline.hpp
+g1RemSet.cpp				intHisto.hpp
+g1RemSet.cpp				iterator.hpp
+g1RemSet.cpp				oop.inline.hpp
+
+g1RemSet.inline.hpp			g1RemSet.hpp
+g1RemSet.inline.hpp			heapRegionRemSet.hpp
+
+g1SATBCardTableModRefBS.cpp		g1SATBCardTableModRefBS.hpp
+g1SATBCardTableModRefBS.cpp		heapRegion.hpp
+g1SATBCardTableModRefBS.cpp		mutexLocker.hpp
+g1SATBCardTableModRefBS.cpp		thread.hpp
+g1SATBCardTableModRefBS.cpp		thread_<os_family>.inline.hpp
+g1SATBCardTableModRefBS.cpp		satbQueue.hpp
+
+g1SATBCardTableModRefBS.hpp		cardTableModRefBS.hpp
+g1SATBCardTableModRefBS.hpp		memRegion.hpp
+
+heapRegion.cpp                          concurrentZFThread.hpp
+heapRegion.cpp                          g1BlockOffsetTable.inline.hpp
+heapRegion.cpp                          g1CollectedHeap.inline.hpp
+heapRegion.cpp                          g1OopClosures.inline.hpp
+heapRegion.cpp                          genOopClosures.inline.hpp
+heapRegion.cpp                          heapRegion.inline.hpp
+heapRegion.cpp                          heapRegionRemSet.hpp
+heapRegion.cpp                          heapRegionSeq.inline.hpp
+heapRegion.cpp                          iterator.hpp
+heapRegion.cpp                          oop.inline.hpp
+
+heapRegion.hpp                          space.hpp
+heapRegion.hpp                          spaceDecorator.hpp
+heapRegion.hpp                          g1BlockOffsetTable.inline.hpp
+heapRegion.hpp                          watermark.hpp
+heapRegion.hpp				g1_specialized_oop_closures.hpp
+heapRegion.hpp				survRateGroup.hpp
+
+heapRegionRemSet.hpp			sparsePRT.hpp
+
+heapRegionRemSet.cpp                    allocation.hpp
+heapRegionRemSet.cpp                    bitMap.inline.hpp
+heapRegionRemSet.cpp                    g1BlockOffsetTable.inline.hpp
+heapRegionRemSet.cpp                    g1CollectedHeap.inline.hpp
+heapRegionRemSet.cpp                    heapRegionRemSet.hpp
+heapRegionRemSet.cpp			heapRegionSeq.inline.hpp
+heapRegionRemSet.cpp                    globalDefinitions.hpp
+heapRegionRemSet.cpp                    space.inline.hpp
+
+heapRegionSeq.cpp                       allocation.hpp
+heapRegionSeq.cpp                       g1CollectedHeap.hpp
+heapRegionSeq.cpp                       heapRegionSeq.hpp
+
+heapRegionSeq.hpp                       growableArray.hpp
+heapRegionSeq.hpp                       heapRegion.hpp
+
+heapRegionSeq.inline.hpp                heapRegionSeq.hpp
+
+klass.hpp				g1OopClosures.hpp
+
+ptrQueue.cpp                            allocation.hpp
+ptrQueue.cpp                            allocation.inline.hpp
+ptrQueue.cpp                            mutex.hpp
+ptrQueue.cpp                            mutexLocker.hpp
+ptrQueue.cpp                            ptrQueue.hpp
+ptrQueue.cpp                            ptrQueue.inline.hpp
+ptrQueue.cpp                            thread_<os_family>.inline.hpp
+
+ptrQueue.hpp                            allocation.hpp
+ptrQueue.hpp                            sizes.hpp
+
+ptrQueue.inline.hpp                     ptrQueue.hpp
+
+satbQueue.cpp                           allocation.inline.hpp
+satbQueue.cpp                           mutexLocker.hpp
+satbQueue.cpp                           ptrQueue.inline.hpp
+satbQueue.cpp                           satbQueue.hpp
+satbQueue.cpp                           sharedHeap.hpp
+satbQueue.cpp                           thread.hpp
+
+satbQueue.hpp                           ptrQueue.hpp
+
+sparsePRT.cpp				allocation.inline.hpp
+sparsePRT.cpp				cardTableModRefBS.hpp
+sparsePRT.cpp				heapRegion.hpp
+sparsePRT.cpp				heapRegionRemSet.hpp
+sparsePRT.cpp				mutexLocker.hpp
+sparsePRT.cpp				sparsePRT.hpp
+sparsePRT.cpp				space.inline.hpp
+
+sparsePRT.hpp				allocation.hpp
+sparsePRT.hpp				cardTableModRefBS.hpp
+sparsePRT.hpp				globalDefinitions.hpp
+sparsePRT.hpp				heapRegion.hpp
+sparsePRT.hpp				mutex.hpp
+
+specialized_oop_closures.hpp		g1_specialized_oop_closures.hpp
+
+survRateGroup.hpp			numberSeq.hpp
+
+survRateGroup.cpp			allocation.hpp
+survRateGroup.cpp			g1CollectedHeap.hpp
+survRateGroup.cpp			g1CollectorPolicy.hpp
+survRateGroup.cpp			heapRegion.hpp
+survRateGroup.cpp			survRateGroup.hpp
+
+thread.cpp				concurrentMarkThread.inline.hpp
+
+universe.cpp                            g1CollectedHeap.hpp
+universe.cpp                            g1CollectorPolicy.hpp
+
+vm_operations_g1.hpp			vmGCOperations.hpp
+
+vm_operations_g1.cpp			vm_operations_g1.hpp
+vm_operations_g1.cpp                    g1CollectedHeap.hpp
+vm_operations_g1.cpp                    isGCActiveMark.hpp
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Wed Jul 05 16:43:17 2017 +0200
@@ -150,7 +150,6 @@
 parallelScavengeHeap.hpp                psYoungGen.hpp
 parallelScavengeHeap.hpp                ostream.hpp
 
-parMarkBitMap.cpp			bitMap.hpp
 parMarkBitMap.cpp			bitMap.inline.hpp
 parMarkBitMap.cpp			oop.inline.hpp
 parMarkBitMap.cpp			os.hpp
@@ -159,7 +158,6 @@
 parMarkBitMap.cpp			parMarkBitMap.inline.hpp
 parMarkBitMap.cpp                       psParallelCompact.hpp
 
-parMarkBitMap.hpp			bitMap.hpp
 parMarkBitMap.hpp			bitMap.inline.hpp
 parMarkBitMap.hpp			psVirtualspace.hpp
 
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared	Wed Jul 05 16:43:17 2017 +0200
@@ -24,6 +24,23 @@
 
 // NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps!
 
+concurrentGCThread.cpp                  concurrentGCThread.hpp
+concurrentGCThread.cpp                  init.hpp
+concurrentGCThread.cpp                  instanceRefKlass.hpp
+concurrentGCThread.cpp                  interfaceSupport.hpp
+concurrentGCThread.cpp                  java.hpp
+concurrentGCThread.cpp                  javaCalls.hpp
+concurrentGCThread.cpp                  oop.inline.hpp
+concurrentGCThread.cpp                  systemDictionary.hpp
+
+concurrentGCThread.hpp                  thread.hpp
+
+coTracker.hpp                           globalDefinitions.hpp
+coTracker.hpp                           numberSeq.hpp
+
+coTracker.cpp                           coTracker.hpp
+coTracker.cpp                           os.hpp
+
 allocationStats.cpp                     allocationStats.hpp
 allocationStats.cpp                     ostream.hpp
 
@@ -37,6 +54,13 @@
 gcAdaptivePolicyCounters.cpp            resourceArea.hpp
 gcAdaptivePolicyCounters.cpp            gcAdaptivePolicyCounters.hpp
 
+gcOverheadReporter.cpp                  allocation.inline.hpp
+gcOverheadReporter.cpp                  concurrentGCThread.hpp
+gcOverheadReporter.cpp                  coTracker.hpp
+gcOverheadReporter.cpp                  gcOverheadReporter.hpp
+gcOverheadReporter.cpp                  ostream.hpp
+gcOverheadReporter.cpp                  thread_<os_family>.inline.hpp
+
 gSpaceCounters.cpp                      generation.hpp
 gSpaceCounters.cpp                      resourceArea.hpp
 gSpaceCounters.cpp                      gSpaceCounters.hpp
@@ -75,3 +99,5 @@
 spaceCounters.hpp                       mutableSpace.hpp
 spaceCounters.hpp                       perfData.hpp
 spaceCounters.hpp                       generationCounters.hpp
+
+vmGCOperations.cpp                      g1CollectedHeap.hpp
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -192,16 +192,16 @@
 };
 
 inline ParMarkBitMap::ParMarkBitMap():
-  _beg_bits(NULL, 0),
-  _end_bits(NULL, 0)
+  _beg_bits(),
+  _end_bits()
 {
   _region_start = 0;
   _virtual_space = 0;
 }
 
 inline ParMarkBitMap::ParMarkBitMap(MemRegion covered_region):
-  _beg_bits(NULL, 0),
-  _end_bits(NULL, 0)
+  _beg_bits(),
+  _end_bits()
 {
   initialize(covered_region);
 }
@@ -325,7 +325,7 @@
 
 inline size_t ParMarkBitMap::obj_size(idx_t beg_bit) const
 {
-  const idx_t end_bit = _end_bits.find_next_one_bit(beg_bit, size());
+  const idx_t end_bit = _end_bits.get_next_one_offset_inline(beg_bit, size());
   assert(is_marked(beg_bit), "obj not marked");
   assert(end_bit < size(), "end bit missing");
   return obj_size(beg_bit, end_bit);
@@ -384,13 +384,13 @@
 inline ParMarkBitMap::idx_t
 ParMarkBitMap::find_obj_beg(idx_t beg, idx_t end) const
 {
-  return _beg_bits.find_next_one_bit(beg, end);
+  return _beg_bits.get_next_one_offset_inline_aligned_right(beg, end);
 }
 
 inline ParMarkBitMap::idx_t
 ParMarkBitMap::find_obj_end(idx_t beg, idx_t end) const
 {
-  return _end_bits.find_next_one_bit(beg, end);
+  return _end_bits.get_next_one_offset_inline_aligned_right(beg, end);
 }
 
 inline HeapWord*
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -210,10 +210,6 @@
   PSScavenge::initialize();
   if (UseParallelOldGC) {
     PSParallelCompact::post_initialize();
-    if (VerifyParallelOldWithMarkSweep) {
-      // Will be used for verification of par old.
-      PSMarkSweep::initialize();
-    }
   } else {
     PSMarkSweep::initialize();
   }
@@ -402,7 +398,7 @@
         return result;
       }
       if (!is_tlab &&
-          size >= (young_gen()->eden_space()->capacity_in_words() / 2)) {
+          size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) {
         result = old_gen()->allocate(size, is_tlab);
         if (result != NULL) {
           return result;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -184,6 +184,20 @@
   size_t tlab_capacity(Thread* thr) const;
   size_t unsafe_max_tlab_alloc(Thread* thr) const;
 
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    return true;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    return true;
+  }
+
   void oop_iterate(OopClosure* cl);
   void object_iterate(ObjectClosure* cl);
   void permanent_oop_iterate(OopClosure* cl);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -146,7 +146,7 @@
 {
   ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array();
+  RegionTaskQueueSet* qset = ParCompactionManager::region_array();
   ParallelTaskTerminator terminator(parallel_gc_threads, qset);
   GCTaskQueue* q = GCTaskQueue::create();
   for(uint i=0; i<parallel_gc_threads; i++) {
@@ -205,38 +205,38 @@
 }
 
 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //
 
 
-StealChunkCompactionTask::StealChunkCompactionTask(ParallelTaskTerminator* t) :
-  _terminator(t) {};
+StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t):
+  _terminator(t) {}
 
-void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) {
+void StealRegionCompactionTask::do_it(GCTaskManager* manager, uint which) {
   assert(Universe::heap()->is_gc_active(), "called outside gc");
 
-  NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask",
+  NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask",
     PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));
 
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
-  // Has to drain stacks first because there may be chunks on
+  // Has to drain stacks first because there may be regions on
   // preloaded onto the stack and this thread may never have
   // done a draining task.  Are the draining tasks needed?
 
-  cm->drain_chunk_stacks();
+  cm->drain_region_stacks();
 
-  size_t chunk_index = 0;
+  size_t region_index = 0;
   int random_seed = 17;
 
   // If we're the termination task, try 10 rounds of stealing before
   // setting the termination flag
 
   while(true) {
-    if (ParCompactionManager::steal(which, &random_seed, chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(cm, chunk_index);
-      cm->drain_chunk_stacks();
+    if (ParCompactionManager::steal(which, &random_seed, region_index)) {
+      PSParallelCompact::fill_and_update_region(cm, region_index);
+      cm->drain_region_stacks();
     } else {
       if (terminator()->offer_termination()) {
         break;
@@ -249,11 +249,10 @@
 
 UpdateDensePrefixTask::UpdateDensePrefixTask(
                                    PSParallelCompact::SpaceId space_id,
-                                   size_t chunk_index_start,
-                                   size_t chunk_index_end) :
-  _space_id(space_id), _chunk_index_start(chunk_index_start),
-  _chunk_index_end(chunk_index_end)
-{}
+                                   size_t region_index_start,
+                                   size_t region_index_end) :
+  _space_id(space_id), _region_index_start(region_index_start),
+  _region_index_end(region_index_end) {}
 
 void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
 
@@ -265,8 +264,8 @@
 
   PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
                                                          _space_id,
-                                                         _chunk_index_start,
-                                                         _chunk_index_end);
+                                                         _region_index_start,
+                                                         _region_index_end);
 }
 
 void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) {
@@ -278,6 +277,6 @@
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
-  // Process any chunks already in the compaction managers stacks.
-  cm->drain_chunk_stacks();
+  // Process any regions already in the compaction managers stacks.
+  cm->drain_region_stacks();
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -188,18 +188,18 @@
 };
 
 //
-// StealChunkCompactionTask
+// StealRegionCompactionTask
 //
 // This task is used to distribute work to idle threads.
 //
 
-class StealChunkCompactionTask : public GCTask {
+class StealRegionCompactionTask : public GCTask {
  private:
    ParallelTaskTerminator* const _terminator;
  public:
-  StealChunkCompactionTask(ParallelTaskTerminator* t);
+  StealRegionCompactionTask(ParallelTaskTerminator* t);
 
-  char* name() { return (char *)"steal-chunk-task"; }
+  char* name() { return (char *)"steal-region-task"; }
   ParallelTaskTerminator* terminator() { return _terminator; }
 
   virtual void do_it(GCTaskManager* manager, uint which);
@@ -215,15 +215,15 @@
 class UpdateDensePrefixTask : public GCTask {
  private:
   PSParallelCompact::SpaceId _space_id;
-  size_t _chunk_index_start;
-  size_t _chunk_index_end;
+  size_t _region_index_start;
+  size_t _region_index_end;
 
  public:
   char* name() { return (char *)"update-dense_prefix-task"; }
 
   UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id,
-                        size_t chunk_index_start,
-                        size_t chunk_index_end);
+                        size_t region_index_start,
+                        size_t region_index_end);
 
   virtual void do_it(GCTaskManager* manager, uint which);
 };
@@ -231,17 +231,17 @@
 //
 // DrainStacksCompactionTask
 //
-// This task processes chunks that have been added to the stacks of each
+// This task processes regions that have been added to the stacks of each
 // compaction manager.
 //
 // Trying to use one draining thread does not work because there are no
 // guarantees about which task will be picked up by which thread.  For example,
-// if thread A gets all the preloaded chunks, thread A may not get a draining
+// if thread A gets all the preloaded regions, thread A may not get a draining
 // task (they may all be done by other threads).
 //
 
 class DrainStacksCompactionTask : public GCTask {
  public:
-  char* name() { return (char *)"drain-chunk-task"; }
+  char* name() { return (char *)"drain-region-task"; }
   virtual void do_it(GCTaskManager* manager, uint which);
 };
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -30,7 +30,7 @@
 OopTaskQueueSet*     ParCompactionManager::_stack_array = NULL;
 ObjectStartArray*    ParCompactionManager::_start_array = NULL;
 ParMarkBitMap*       ParCompactionManager::_mark_bitmap = NULL;
-ChunkTaskQueueSet*   ParCompactionManager::_chunk_array = NULL;
+RegionTaskQueueSet*   ParCompactionManager::_region_array = NULL;
 
 ParCompactionManager::ParCompactionManager() :
     _action(CopyAndUpdate) {
@@ -46,13 +46,13 @@
 
   // We want the overflow stack to be permanent
   _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true);
-#ifdef USE_ChunkTaskQueueWithOverflow
-  chunk_stack()->initialize();
+#ifdef USE_RegionTaskQueueWithOverflow
+  region_stack()->initialize();
 #else
-  chunk_stack()->initialize();
+  region_stack()->initialize();
 
   // We want the overflow stack to be permanent
-  _chunk_overflow_stack =
+  _region_overflow_stack =
     new (ResourceObj::C_HEAP) GrowableArray<size_t>(10, true);
 #endif
 
@@ -86,18 +86,18 @@
 
   _stack_array = new OopTaskQueueSet(parallel_gc_threads);
   guarantee(_stack_array != NULL, "Count not initialize promotion manager");
-  _chunk_array = new ChunkTaskQueueSet(parallel_gc_threads);
-  guarantee(_chunk_array != NULL, "Count not initialize promotion manager");
+  _region_array = new RegionTaskQueueSet(parallel_gc_threads);
+  guarantee(_region_array != NULL, "Count not initialize promotion manager");
 
   // Create and register the ParCompactionManager(s) for the worker threads.
   for(uint i=0; i<parallel_gc_threads; i++) {
     _manager_array[i] = new ParCompactionManager();
     guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
     stack_array()->register_queue(i, _manager_array[i]->marking_stack());
-#ifdef USE_ChunkTaskQueueWithOverflow
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue());
+#ifdef USE_RegionTaskQueueWithOverflow
+    region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue());
 #else
-    chunk_array()->register_queue(i, _manager_array[i]->chunk_stack());
+    region_array()->register_queue(i, _manager_array[i]->region_stack());
 #endif
   }
 
@@ -153,31 +153,31 @@
   return NULL;
 }
 
-// Save chunk on a stack
-void ParCompactionManager::save_for_processing(size_t chunk_index) {
+// Save region on a stack
+void ParCompactionManager::save_for_processing(size_t region_index) {
 #ifdef ASSERT
   const ParallelCompactData& sd = PSParallelCompact::summary_data();
-  ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index);
-  assert(chunk_ptr->claimed(), "must be claimed");
-  assert(chunk_ptr->_pushed++ == 0, "should only be pushed once");
+  ParallelCompactData::RegionData* const region_ptr = sd.region(region_index);
+  assert(region_ptr->claimed(), "must be claimed");
+  assert(region_ptr->_pushed++ == 0, "should only be pushed once");
 #endif
-  chunk_stack_push(chunk_index);
+  region_stack_push(region_index);
 }
 
-void ParCompactionManager::chunk_stack_push(size_t chunk_index) {
+void ParCompactionManager::region_stack_push(size_t region_index) {
 
-#ifdef USE_ChunkTaskQueueWithOverflow
-  chunk_stack()->save(chunk_index);
+#ifdef USE_RegionTaskQueueWithOverflow
+  region_stack()->save(region_index);
 #else
-  if(!chunk_stack()->push(chunk_index)) {
-    chunk_overflow_stack()->push(chunk_index);
+  if(!region_stack()->push(region_index)) {
+    region_overflow_stack()->push(region_index);
   }
 #endif
 }
 
-bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) {
-#ifdef USE_ChunkTaskQueueWithOverflow
-  return chunk_stack()->retrieve(chunk_index);
+bool ParCompactionManager::retrieve_for_processing(size_t& region_index) {
+#ifdef USE_RegionTaskQueueWithOverflow
+  return region_stack()->retrieve(region_index);
 #else
   // Should not be used in the parallel case
   ShouldNotReachHere();
@@ -230,14 +230,14 @@
   assert(overflow_stack()->length() == 0, "Sanity");
 }
 
-void ParCompactionManager::drain_chunk_overflow_stack() {
-  size_t chunk_index = (size_t) -1;
-  while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
-    PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+void ParCompactionManager::drain_region_overflow_stack() {
+  size_t region_index = (size_t) -1;
+  while(region_stack()->retrieve_from_overflow(region_index)) {
+    PSParallelCompact::fill_and_update_region(this, region_index);
   }
 }
 
-void ParCompactionManager::drain_chunk_stacks() {
+void ParCompactionManager::drain_region_stacks() {
 #ifdef ASSERT
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
@@ -249,42 +249,42 @@
 #if 1 // def DO_PARALLEL - the serial code hasn't been updated
   do {
 
-#ifdef USE_ChunkTaskQueueWithOverflow
+#ifdef USE_RegionTaskQueueWithOverflow
     // Drain overflow stack first, so other threads can steal from
     // claimed stack while we work.
-    size_t chunk_index = (size_t) -1;
-    while(chunk_stack()->retrieve_from_overflow(chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    size_t region_index = (size_t) -1;
+    while(region_stack()->retrieve_from_overflow(region_index)) {
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
 
-    while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) {
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    while (region_stack()->retrieve_from_stealable_queue(region_index)) {
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
-  } while (!chunk_stack()->is_empty());
+  } while (!region_stack()->is_empty());
 #else
     // Drain overflow stack first, so other threads can steal from
     // claimed stack while we work.
-    while(!chunk_overflow_stack()->is_empty()) {
-      size_t chunk_index = chunk_overflow_stack()->pop();
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+    while(!region_overflow_stack()->is_empty()) {
+      size_t region_index = region_overflow_stack()->pop();
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
 
-    size_t chunk_index = -1;
+    size_t region_index = -1;
     // obj is a reference!!!
-    while (chunk_stack()->pop_local(chunk_index)) {
+    while (region_stack()->pop_local(region_index)) {
       // It would be nice to assert about the type of objects we might
       // pop, but they can come from anywhere, unfortunately.
-      PSParallelCompact::fill_and_update_chunk(this, chunk_index);
+      PSParallelCompact::fill_and_update_region(this, region_index);
     }
-  } while((chunk_stack()->size() != 0) ||
-          (chunk_overflow_stack()->length() != 0));
+  } while((region_stack()->size() != 0) ||
+          (region_overflow_stack()->length() != 0));
 #endif
 
-#ifdef USE_ChunkTaskQueueWithOverflow
-  assert(chunk_stack()->is_empty(), "Sanity");
+#ifdef USE_RegionTaskQueueWithOverflow
+  assert(region_stack()->is_empty(), "Sanity");
 #else
-  assert(chunk_stack()->size() == 0, "Sanity");
-  assert(chunk_overflow_stack()->length() == 0, "Sanity");
+  assert(region_stack()->size() == 0, "Sanity");
+  assert(region_overflow_stack()->length() == 0, "Sanity");
 #endif
 #else
   oop obj;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -52,7 +52,7 @@
   friend class ParallelTaskTerminator;
   friend class ParMarkBitMap;
   friend class PSParallelCompact;
-  friend class StealChunkCompactionTask;
+  friend class StealRegionCompactionTask;
   friend class UpdateAndFillClosure;
   friend class RefProcTaskExecutor;
 
@@ -72,27 +72,27 @@
 // ------------------------  End don't putback if not needed
 
  private:
-  static ParCompactionManager**  _manager_array;
-  static OopTaskQueueSet*      _stack_array;
-  static ObjectStartArray*     _start_array;
-  static ChunkTaskQueueSet*    _chunk_array;
-  static PSOldGen*             _old_gen;
+  static ParCompactionManager** _manager_array;
+  static OopTaskQueueSet*       _stack_array;
+  static ObjectStartArray*      _start_array;
+  static RegionTaskQueueSet*    _region_array;
+  static PSOldGen*              _old_gen;
 
-  OopTaskQueue                 _marking_stack;
-  GrowableArray<oop>*          _overflow_stack;
+  OopTaskQueue                  _marking_stack;
+  GrowableArray<oop>*           _overflow_stack;
   // Is there a way to reuse the _marking_stack for the
-  // saving empty chunks?  For now just create a different
+  // saving empty regions?  For now just create a different
   // type of TaskQueue.
 
-#ifdef USE_ChunkTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow   _chunk_stack;
+#ifdef USE_RegionTaskQueueWithOverflow
+  RegionTaskQueueWithOverflow   _region_stack;
 #else
-  ChunkTaskQueue               _chunk_stack;
-  GrowableArray<size_t>*       _chunk_overflow_stack;
+  RegionTaskQueue               _region_stack;
+  GrowableArray<size_t>*        _region_overflow_stack;
 #endif
 
 #if 1  // does this happen enough to need a per thread stack?
-  GrowableArray<Klass*>*       _revisit_klass_stack;
+  GrowableArray<Klass*>*        _revisit_klass_stack;
 #endif
   static ParMarkBitMap* _mark_bitmap;
 
@@ -100,21 +100,22 @@
 
   static PSOldGen* old_gen()             { return _old_gen; }
   static ObjectStartArray* start_array() { return _start_array; }
-  static OopTaskQueueSet* stack_array()   { return _stack_array; }
+  static OopTaskQueueSet* stack_array()  { return _stack_array; }
 
   static void initialize(ParMarkBitMap* mbm);
 
  protected:
   // Array of tasks.  Needed by the ParallelTaskTerminator.
-  static ChunkTaskQueueSet* chunk_array()   { return _chunk_array; }
-
-  OopTaskQueue*  marking_stack()          { return &_marking_stack; }
-  GrowableArray<oop>* overflow_stack()    { return _overflow_stack; }
-#ifdef USE_ChunkTaskQueueWithOverflow
-  ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; }
+  static RegionTaskQueueSet* region_array()      { return _region_array; }
+  OopTaskQueue*  marking_stack()                 { return &_marking_stack; }
+  GrowableArray<oop>* overflow_stack()           { return _overflow_stack; }
+#ifdef USE_RegionTaskQueueWithOverflow
+  RegionTaskQueueWithOverflow* region_stack()    { return &_region_stack; }
 #else
-  ChunkTaskQueue*  chunk_stack()          { return &_chunk_stack; }
-  GrowableArray<size_t>* chunk_overflow_stack() { return _chunk_overflow_stack; }
+  RegionTaskQueue*  region_stack()               { return &_region_stack; }
+  GrowableArray<size_t>* region_overflow_stack() {
+    return _region_overflow_stack;
+  }
 #endif
 
   // Pushes onto the marking stack.  If the marking stack is full,
@@ -123,9 +124,9 @@
   // Do not implement an equivalent stack_pop.  Deal with the
   // marking stack and overflow stack directly.
 
-  // Pushes onto the chunk stack.  If the chunk stack is full,
-  // pushes onto the chunk overflow stack.
-  void chunk_stack_push(size_t chunk_index);
+  // Pushes onto the region stack.  If the region stack is full,
+  // pushes onto the region overflow stack.
+  void region_stack_push(size_t region_index);
  public:
 
   Action action() { return _action; }
@@ -160,10 +161,10 @@
   // Get a oop for scanning.  If returns null, no oop were found.
   oop retrieve_for_scanning();
 
-  // Save chunk for later processing.  Must not fail.
-  void save_for_processing(size_t chunk_index);
-  // Get a chunk for processing.  If returns null, no chunk were found.
-  bool retrieve_for_processing(size_t& chunk_index);
+  // Save region for later processing.  Must not fail.
+  void save_for_processing(size_t region_index);
+  // Get a region for processing.  If returns null, no region were found.
+  bool retrieve_for_processing(size_t& region_index);
 
   // Access function for compaction managers
   static ParCompactionManager* gc_thread_compaction_manager(int index);
@@ -172,18 +173,18 @@
     return stack_array()->steal(queue_num, seed, t);
   }
 
-  static bool steal(int queue_num, int* seed, ChunkTask& t) {
-    return chunk_array()->steal(queue_num, seed, t);
+  static bool steal(int queue_num, int* seed, RegionTask& t) {
+    return region_array()->steal(queue_num, seed, t);
   }
 
   // Process tasks remaining on any stack
   void drain_marking_stacks(OopClosure *blk);
 
   // Process tasks remaining on any stack
-  void drain_chunk_stacks();
+  void drain_region_stacks();
 
   // Process tasks remaining on any stack
-  void drain_chunk_overflow_stack();
+  void drain_region_overflow_stack();
 
   // Debugging support
 #ifdef ASSERT
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -35,9 +35,7 @@
   _ref_processor = new ReferenceProcessor(mr,
                                           true,    // atomic_discovery
                                           false);  // mt_discovery
-  if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-    _counters = new CollectorCounters("PSMarkSweep", 1);
-  }
+  _counters = new CollectorCounters("PSMarkSweep", 1);
 }
 
 // This method contains all heap specific policy for invoking mark sweep.
@@ -518,9 +516,6 @@
   follow_stack();
 
   // Process reference objects found during marking
-
-  // Skipping the reference processing for VerifyParallelOldWithMarkSweep
-  // affects the marking (makes it different).
   {
     ReferencePolicy *soft_ref_policy;
     if (clear_all_softrefs) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -152,20 +152,15 @@
         oop(q)->forward_to(oop(compact_top));
         assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
       } else {
-        // Don't clear the mark since it's confuses parallel old
-        // verification.
-        if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-          // if the object isn't moving we can just set the mark to the default
-          // mark and handle it specially later on.
-          oop(q)->init_mark();
-        }
+        // if the object isn't moving we can just set the mark to the default
+        // mark and handle it specially later on.
+        oop(q)->init_mark();
         assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
       }
 
       // Update object start array
-      if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-        if (start_array)
-          start_array->allocate_block(compact_top);
+      if (start_array) {
+        start_array->allocate_block(compact_top);
       }
 
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size));
@@ -219,19 +214,14 @@
             assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark");
           } else {
             // if the object isn't moving we can just set the mark to the default
-            // Don't clear the mark since it's confuses parallel old
-            // verification.
-            if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-              // mark and handle it specially later on.
-              oop(q)->init_mark();
-            }
+            // mark and handle it specially later on.
+            oop(q)->init_mark();
             assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL");
           }
 
-          if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
-            // Update object start array
-            if (start_array)
-              start_array->allocate_block(compact_top);
+          // Update object start array
+          if (start_array) {
+            start_array->allocate_block(compact_top);
           }
 
           VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz));
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -152,9 +152,7 @@
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
 
   // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
   start_array()->reset();
-  debug_only(})
 
   object_mark_sweep()->precompact();
 
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -28,43 +28,31 @@
 #include <math.h>
 
 // All sizes are in HeapWords.
-const size_t ParallelCompactData::Log2ChunkSize  = 9; // 512 words
-const size_t ParallelCompactData::ChunkSize      = (size_t)1 << Log2ChunkSize;
-const size_t ParallelCompactData::ChunkSizeBytes = ChunkSize << LogHeapWordSize;
-const size_t ParallelCompactData::ChunkSizeOffsetMask = ChunkSize - 1;
-const size_t ParallelCompactData::ChunkAddrOffsetMask = ChunkSizeBytes - 1;
-const size_t ParallelCompactData::ChunkAddrMask  = ~ChunkAddrOffsetMask;
-
-// 32-bit:  128 words covers 4 bitmap words
-// 64-bit:  128 words covers 2 bitmap words
-const size_t ParallelCompactData::Log2BlockSize   = 7; // 128 words
-const size_t ParallelCompactData::BlockSize       = (size_t)1 << Log2BlockSize;
-const size_t ParallelCompactData::BlockOffsetMask = BlockSize - 1;
-const size_t ParallelCompactData::BlockMask       = ~BlockOffsetMask;
-
-const size_t ParallelCompactData::BlocksPerChunk = ChunkSize / BlockSize;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_shift = 27;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_mask = ~0U << dc_shift;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_one = 0x1U << dc_shift;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::los_mask = ~dc_mask;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_claimed = 0x8U << dc_shift;
-
-const ParallelCompactData::ChunkData::chunk_sz_t
-ParallelCompactData::ChunkData::dc_completed = 0xcU << dc_shift;
-
-#ifdef ASSERT
-short   ParallelCompactData::BlockData::_cur_phase = 0;
-#endif
+const size_t ParallelCompactData::Log2RegionSize  = 9; // 512 words
+const size_t ParallelCompactData::RegionSize      = (size_t)1 << Log2RegionSize;
+const size_t ParallelCompactData::RegionSizeBytes =
+  RegionSize << LogHeapWordSize;
+const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
+const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
+const size_t ParallelCompactData::RegionAddrMask  = ~RegionAddrOffsetMask;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_shift = 27;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_one = 0x1U << dc_shift;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::los_mask = ~dc_mask;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_claimed = 0x8U << dc_shift;
+
+const ParallelCompactData::RegionData::region_sz_t
+ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift;
 
 SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id];
 bool      PSParallelCompact::_print_phases = false;
@@ -100,99 +88,12 @@
 GrowableArray<size_t>   * PSParallelCompact::_last_gc_live_oops_size = NULL;
 #endif
 
-// XXX beg - verification code; only works while we also mark in object headers
-static void
-verify_mark_bitmap(ParMarkBitMap& _mark_bitmap)
-{
-  ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
-
-  PSPermGen* perm_gen = heap->perm_gen();
-  PSOldGen* old_gen = heap->old_gen();
-  PSYoungGen* young_gen = heap->young_gen();
-
-  MutableSpace* perm_space = perm_gen->object_space();
-  MutableSpace* old_space = old_gen->object_space();
-  MutableSpace* eden_space = young_gen->eden_space();
-  MutableSpace* from_space = young_gen->from_space();
-  MutableSpace* to_space = young_gen->to_space();
-
-  // 'from_space' here is the survivor space at the lower address.
-  if (to_space->bottom() < from_space->bottom()) {
-    from_space = to_space;
-    to_space = young_gen->from_space();
-  }
-
-  HeapWord* boundaries[12];
-  unsigned int bidx = 0;
-  const unsigned int bidx_max = sizeof(boundaries) / sizeof(boundaries[0]);
-
-  boundaries[0] = perm_space->bottom();
-  boundaries[1] = perm_space->top();
-  boundaries[2] = old_space->bottom();
-  boundaries[3] = old_space->top();
-  boundaries[4] = eden_space->bottom();
-  boundaries[5] = eden_space->top();
-  boundaries[6] = from_space->bottom();
-  boundaries[7] = from_space->top();
-  boundaries[8] = to_space->bottom();
-  boundaries[9] = to_space->top();
-  boundaries[10] = to_space->end();
-  boundaries[11] = to_space->end();
-
-  BitMap::idx_t beg_bit = 0;
-  BitMap::idx_t end_bit;
-  BitMap::idx_t tmp_bit;
-  const BitMap::idx_t last_bit = _mark_bitmap.size();
-  do {
-    HeapWord* addr = _mark_bitmap.bit_to_addr(beg_bit);
-    if (_mark_bitmap.is_marked(beg_bit)) {
-      oop obj = (oop)addr;
-      assert(obj->is_gc_marked(), "obj header is not marked");
-      end_bit = _mark_bitmap.find_obj_end(beg_bit, last_bit);
-      const size_t size = _mark_bitmap.obj_size(beg_bit, end_bit);
-      assert(size == (size_t)obj->size(), "end bit wrong?");
-      beg_bit = _mark_bitmap.find_obj_beg(beg_bit + 1, last_bit);
-      assert(beg_bit > end_bit, "bit set in middle of an obj");
-    } else {
-      if (addr >= boundaries[bidx] && addr < boundaries[bidx + 1]) {
-        // a dead object in the current space.
-        oop obj = (oop)addr;
-        end_bit = _mark_bitmap.addr_to_bit(addr + obj->size());
-        assert(!obj->is_gc_marked(), "obj marked in header, not in bitmap");
-        tmp_bit = beg_bit + 1;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "beg bit set in unmarked obj");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "end bit set in unmarked obj");
-      } else if (addr < boundaries[bidx + 2]) {
-        // addr is between top in the current space and bottom in the next.
-        end_bit = beg_bit + pointer_delta(boundaries[bidx + 2], addr);
-        tmp_bit = beg_bit;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "beg bit set above top");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit);
-        assert(beg_bit == end_bit, "end bit set above top");
-        bidx += 2;
-      } else if (bidx < bidx_max - 2) {
-        bidx += 2; // ???
-      } else {
-        tmp_bit = beg_bit;
-        beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, last_bit);
-        assert(beg_bit == last_bit, "beg bit set outside heap");
-        beg_bit = _mark_bitmap.find_obj_end(tmp_bit, last_bit);
-        assert(beg_bit == last_bit, "end bit set outside heap");
-      }
-    }
-  } while (beg_bit < last_bit);
-}
-// XXX end - verification code; only works while we also mark in object headers
-
 #ifndef PRODUCT
 const char* PSParallelCompact::space_names[] = {
   "perm", "old ", "eden", "from", "to  "
 };
 
-void PSParallelCompact::print_chunk_ranges()
+void PSParallelCompact::print_region_ranges()
 {
   tty->print_cr("space  bottom     top        end        new_top");
   tty->print_cr("------ ---------- ---------- ---------- ----------");
@@ -203,31 +104,31 @@
                   SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " "
                   SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " ",
                   id, space_names[id],
-                  summary_data().addr_to_chunk_idx(space->bottom()),
-                  summary_data().addr_to_chunk_idx(space->top()),
-                  summary_data().addr_to_chunk_idx(space->end()),
-                  summary_data().addr_to_chunk_idx(_space_info[id].new_top()));
+                  summary_data().addr_to_region_idx(space->bottom()),
+                  summary_data().addr_to_region_idx(space->top()),
+                  summary_data().addr_to_region_idx(space->end()),
+                  summary_data().addr_to_region_idx(_space_info[id].new_top()));
   }
 }
 
 void
-print_generic_summary_chunk(size_t i, const ParallelCompactData::ChunkData* c)
+print_generic_summary_region(size_t i, const ParallelCompactData::RegionData* c)
 {
-#define CHUNK_IDX_FORMAT        SIZE_FORMAT_W(7)
-#define CHUNK_DATA_FORMAT       SIZE_FORMAT_W(5)
+#define REGION_IDX_FORMAT        SIZE_FORMAT_W(7)
+#define REGION_DATA_FORMAT       SIZE_FORMAT_W(5)
 
   ParallelCompactData& sd = PSParallelCompact::summary_data();
-  size_t dci = c->destination() ? sd.addr_to_chunk_idx(c->destination()) : 0;
-  tty->print_cr(CHUNK_IDX_FORMAT " " PTR_FORMAT " "
-                CHUNK_IDX_FORMAT " " PTR_FORMAT " "
-                CHUNK_DATA_FORMAT " " CHUNK_DATA_FORMAT " "
-                CHUNK_DATA_FORMAT " " CHUNK_IDX_FORMAT " %d",
+  size_t dci = c->destination() ? sd.addr_to_region_idx(c->destination()) : 0;
+  tty->print_cr(REGION_IDX_FORMAT " " PTR_FORMAT " "
+                REGION_IDX_FORMAT " " PTR_FORMAT " "
+                REGION_DATA_FORMAT " " REGION_DATA_FORMAT " "
+                REGION_DATA_FORMAT " " REGION_IDX_FORMAT " %d",
                 i, c->data_location(), dci, c->destination(),
                 c->partial_obj_size(), c->live_obj_size(),
-                c->data_size(), c->source_chunk(), c->destination_count());
-
-#undef  CHUNK_IDX_FORMAT
-#undef  CHUNK_DATA_FORMAT
+                c->data_size(), c->source_region(), c->destination_count());
+
+#undef  REGION_IDX_FORMAT
+#undef  REGION_DATA_FORMAT
 }
 
 void
@@ -236,14 +137,14 @@
                            HeapWord* const end_addr)
 {
   size_t total_words = 0;
-  size_t i = summary_data.addr_to_chunk_idx(beg_addr);
-  const size_t last = summary_data.addr_to_chunk_idx(end_addr);
+  size_t i = summary_data.addr_to_region_idx(beg_addr);
+  const size_t last = summary_data.addr_to_region_idx(end_addr);
   HeapWord* pdest = 0;
 
   while (i <= last) {
-    ParallelCompactData::ChunkData* c = summary_data.chunk(i);
+    ParallelCompactData::RegionData* c = summary_data.region(i);
     if (c->data_size() != 0 || c->destination() != pdest) {
-      print_generic_summary_chunk(i, c);
+      print_generic_summary_region(i, c);
       total_words += c->data_size();
       pdest = c->destination();
     }
@@ -265,16 +166,16 @@
 }
 
 void
-print_initial_summary_chunk(size_t i,
-                            const ParallelCompactData::ChunkData* c,
-                            bool newline = true)
+print_initial_summary_region(size_t i,
+                             const ParallelCompactData::RegionData* c,
+                             bool newline = true)
 {
   tty->print(SIZE_FORMAT_W(5) " " PTR_FORMAT " "
              SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " "
              SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " %d",
              i, c->destination(),
              c->partial_obj_size(), c->live_obj_size(),
-             c->data_size(), c->source_chunk(), c->destination_count());
+             c->data_size(), c->source_region(), c->destination_count());
   if (newline) tty->cr();
 }
 
@@ -285,47 +186,48 @@
     return;
   }
 
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
-  HeapWord* const top_aligned_up = summary_data.chunk_align_up(space->top());
-  const size_t end_chunk = summary_data.addr_to_chunk_idx(top_aligned_up);
-  const ParallelCompactData::ChunkData* c = summary_data.chunk(end_chunk - 1);
+  const size_t region_size = ParallelCompactData::RegionSize;
+  typedef ParallelCompactData::RegionData RegionData;
+  HeapWord* const top_aligned_up = summary_data.region_align_up(space->top());
+  const size_t end_region = summary_data.addr_to_region_idx(top_aligned_up);
+  const RegionData* c = summary_data.region(end_region - 1);
   HeapWord* end_addr = c->destination() + c->data_size();
   const size_t live_in_space = pointer_delta(end_addr, space->bottom());
 
-  // Print (and count) the full chunks at the beginning of the space.
-  size_t full_chunk_count = 0;
-  size_t i = summary_data.addr_to_chunk_idx(space->bottom());
-  while (i < end_chunk && summary_data.chunk(i)->data_size() == chunk_size) {
-    print_initial_summary_chunk(i, summary_data.chunk(i));
-    ++full_chunk_count;
+  // Print (and count) the full regions at the beginning of the space.
+  size_t full_region_count = 0;
+  size_t i = summary_data.addr_to_region_idx(space->bottom());
+  while (i < end_region && summary_data.region(i)->data_size() == region_size) {
+    print_initial_summary_region(i, summary_data.region(i));
+    ++full_region_count;
     ++i;
   }
 
-  size_t live_to_right = live_in_space - full_chunk_count * chunk_size;
+  size_t live_to_right = live_in_space - full_region_count * region_size;
 
   double max_reclaimed_ratio = 0.0;
-  size_t max_reclaimed_ratio_chunk = 0;
+  size_t max_reclaimed_ratio_region = 0;
   size_t max_dead_to_right = 0;
   size_t max_live_to_right = 0;
 
-  // Print the 'reclaimed ratio' for chunks while there is something live in the
-  // chunk or to the right of it.  The remaining chunks are empty (and
+  // Print the 'reclaimed ratio' for regions while there is something live in
+  // the region or to the right of it.  The remaining regions are empty (and
   // uninteresting), and computing the ratio will result in division by 0.
-  while (i < end_chunk && live_to_right > 0) {
-    c = summary_data.chunk(i);
-    HeapWord* const chunk_addr = summary_data.chunk_to_addr(i);
-    const size_t used_to_right = pointer_delta(space->top(), chunk_addr);
+  while (i < end_region && live_to_right > 0) {
+    c = summary_data.region(i);
+    HeapWord* const region_addr = summary_data.region_to_addr(i);
+    const size_t used_to_right = pointer_delta(space->top(), region_addr);
     const size_t dead_to_right = used_to_right - live_to_right;
     const double reclaimed_ratio = double(dead_to_right) / live_to_right;
 
     if (reclaimed_ratio > max_reclaimed_ratio) {
             max_reclaimed_ratio = reclaimed_ratio;
-            max_reclaimed_ratio_chunk = i;
+            max_reclaimed_ratio_region = i;
             max_dead_to_right = dead_to_right;
             max_live_to_right = live_to_right;
     }
 
-    print_initial_summary_chunk(i, c, false);
+    print_initial_summary_region(i, c, false);
     tty->print_cr(" %12.10f " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10),
                   reclaimed_ratio, dead_to_right, live_to_right);
 
@@ -333,14 +235,14 @@
     ++i;
   }
 
-  // Any remaining chunks are empty.  Print one more if there is one.
-  if (i < end_chunk) {
-    print_initial_summary_chunk(i, summary_data.chunk(i));
+  // Any remaining regions are empty.  Print one more if there is one.
+  if (i < end_region) {
+    print_initial_summary_region(i, summary_data.region(i));
   }
 
   tty->print_cr("max:  " SIZE_FORMAT_W(4) " d2r=" SIZE_FORMAT_W(10) " "
                 "l2r=" SIZE_FORMAT_W(10) " max_ratio=%14.12f",
-                max_reclaimed_ratio_chunk, max_dead_to_right,
+                max_reclaimed_ratio_region, max_dead_to_right,
                 max_live_to_right, max_reclaimed_ratio);
 }
 
@@ -372,13 +274,9 @@
 {
   _region_start = 0;
 
-  _chunk_vspace = 0;
-  _chunk_data = 0;
-  _chunk_count = 0;
-
-  _block_vspace = 0;
-  _block_data = 0;
-  _block_count = 0;
+  _region_vspace = 0;
+  _region_data = 0;
+  _region_count = 0;
 }
 
 bool ParallelCompactData::initialize(MemRegion covered_region)
@@ -387,18 +285,12 @@
   const size_t region_size = covered_region.word_size();
   DEBUG_ONLY(_region_end = _region_start + region_size;)
 
-  assert(chunk_align_down(_region_start) == _region_start,
+  assert(region_align_down(_region_start) == _region_start,
          "region start not aligned");
-  assert((region_size & ChunkSizeOffsetMask) == 0,
-         "region size not a multiple of ChunkSize");
-
-  bool result = initialize_chunk_data(region_size);
-
-  // Initialize the block data if it will be used for updating pointers, or if
-  // this is a debug build.
-  if (!UseParallelOldGCChunkPointerCalc || trueInDebug) {
-    result = result && initialize_block_data(region_size);
-  }
+  assert((region_size & RegionSizeOffsetMask) == 0,
+         "region size not a multiple of RegionSize");
+
+  bool result = initialize_region_data(region_size);
 
   return result;
 }
@@ -429,25 +321,13 @@
   return 0;
 }
 
-bool ParallelCompactData::initialize_chunk_data(size_t region_size)
+bool ParallelCompactData::initialize_region_data(size_t region_size)
 {
-  const size_t count = (region_size + ChunkSizeOffsetMask) >> Log2ChunkSize;
-  _chunk_vspace = create_vspace(count, sizeof(ChunkData));
-  if (_chunk_vspace != 0) {
-    _chunk_data = (ChunkData*)_chunk_vspace->reserved_low_addr();
-    _chunk_count = count;
-    return true;
-  }
-  return false;
-}
-
-bool ParallelCompactData::initialize_block_data(size_t region_size)
-{
-  const size_t count = (region_size + BlockOffsetMask) >> Log2BlockSize;
-  _block_vspace = create_vspace(count, sizeof(BlockData));
-  if (_block_vspace != 0) {
-    _block_data = (BlockData*)_block_vspace->reserved_low_addr();
-    _block_count = count;
+  const size_t count = (region_size + RegionSizeOffsetMask) >> Log2RegionSize;
+  _region_vspace = create_vspace(count, sizeof(RegionData));
+  if (_region_vspace != 0) {
+    _region_data = (RegionData*)_region_vspace->reserved_low_addr();
+    _region_count = count;
     return true;
   }
   return false;
@@ -455,38 +335,27 @@
 
 void ParallelCompactData::clear()
 {
-  if (_block_data) {
-    memset(_block_data, 0, _block_vspace->committed_size());
-  }
-  memset(_chunk_data, 0, _chunk_vspace->committed_size());
+  memset(_region_data, 0, _region_vspace->committed_size());
 }
 
-void ParallelCompactData::clear_range(size_t beg_chunk, size_t end_chunk) {
-  assert(beg_chunk <= _chunk_count, "beg_chunk out of range");
-  assert(end_chunk <= _chunk_count, "end_chunk out of range");
-  assert(ChunkSize % BlockSize == 0, "ChunkSize not a multiple of BlockSize");
-
-  const size_t chunk_cnt = end_chunk - beg_chunk;
-
-  if (_block_data) {
-    const size_t blocks_per_chunk = ChunkSize / BlockSize;
-    const size_t beg_block = beg_chunk * blocks_per_chunk;
-    const size_t block_cnt = chunk_cnt * blocks_per_chunk;
-    memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
-  }
-  memset(_chunk_data + beg_chunk, 0, chunk_cnt * sizeof(ChunkData));
+void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
+  assert(beg_region <= _region_count, "beg_region out of range");
+  assert(end_region <= _region_count, "end_region out of range");
+
+  const size_t region_cnt = end_region - beg_region;
+  memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
 }
 
-HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const
+HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
 {
-  const ChunkData* cur_cp = chunk(chunk_idx);
-  const ChunkData* const end_cp = chunk(chunk_count() - 1);
-
-  HeapWord* result = chunk_to_addr(chunk_idx);
+  const RegionData* cur_cp = region(region_idx);
+  const RegionData* const end_cp = region(region_count() - 1);
+
+  HeapWord* result = region_to_addr(region_idx);
   if (cur_cp < end_cp) {
     do {
       result += cur_cp->partial_obj_size();
-    } while (cur_cp->partial_obj_size() == ChunkSize && ++cur_cp < end_cp);
+    } while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp);
   }
   return result;
 }
@@ -494,56 +363,56 @@
 void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
 {
   const size_t obj_ofs = pointer_delta(addr, _region_start);
-  const size_t beg_chunk = obj_ofs >> Log2ChunkSize;
-  const size_t end_chunk = (obj_ofs + len - 1) >> Log2ChunkSize;
+  const size_t beg_region = obj_ofs >> Log2RegionSize;
+  const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize;
 
   DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);)
   DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);)
 
-  if (beg_chunk == end_chunk) {
-    // All in one chunk.
-    _chunk_data[beg_chunk].add_live_obj(len);
+  if (beg_region == end_region) {
+    // All in one region.
+    _region_data[beg_region].add_live_obj(len);
     return;
   }
 
-  // First chunk.
-  const size_t beg_ofs = chunk_offset(addr);
-  _chunk_data[beg_chunk].add_live_obj(ChunkSize - beg_ofs);
+  // First region.
+  const size_t beg_ofs = region_offset(addr);
+  _region_data[beg_region].add_live_obj(RegionSize - beg_ofs);
 
   klassOop klass = ((oop)addr)->klass();
-  // Middle chunks--completely spanned by this object.
-  for (size_t chunk = beg_chunk + 1; chunk < end_chunk; ++chunk) {
-    _chunk_data[chunk].set_partial_obj_size(ChunkSize);
-    _chunk_data[chunk].set_partial_obj_addr(addr);
+  // Middle regions--completely spanned by this object.
+  for (size_t region = beg_region + 1; region < end_region; ++region) {
+    _region_data[region].set_partial_obj_size(RegionSize);
+    _region_data[region].set_partial_obj_addr(addr);
   }
 
-  // Last chunk.
-  const size_t end_ofs = chunk_offset(addr + len - 1);
-  _chunk_data[end_chunk].set_partial_obj_size(end_ofs + 1);
-  _chunk_data[end_chunk].set_partial_obj_addr(addr);
+  // Last region.
+  const size_t end_ofs = region_offset(addr + len - 1);
+  _region_data[end_region].set_partial_obj_size(end_ofs + 1);
+  _region_data[end_region].set_partial_obj_addr(addr);
 }
 
 void
 ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end)
 {
-  assert(chunk_offset(beg) == 0, "not ChunkSize aligned");
-  assert(chunk_offset(end) == 0, "not ChunkSize aligned");
-
-  size_t cur_chunk = addr_to_chunk_idx(beg);
-  const size_t end_chunk = addr_to_chunk_idx(end);
+  assert(region_offset(beg) == 0, "not RegionSize aligned");
+  assert(region_offset(end) == 0, "not RegionSize aligned");
+
+  size_t cur_region = addr_to_region_idx(beg);
+  const size_t end_region = addr_to_region_idx(end);
   HeapWord* addr = beg;
-  while (cur_chunk < end_chunk) {
-    _chunk_data[cur_chunk].set_destination(addr);
-    _chunk_data[cur_chunk].set_destination_count(0);
-    _chunk_data[cur_chunk].set_source_chunk(cur_chunk);
-    _chunk_data[cur_chunk].set_data_location(addr);
-
-    // Update live_obj_size so the chunk appears completely full.
-    size_t live_size = ChunkSize - _chunk_data[cur_chunk].partial_obj_size();
-    _chunk_data[cur_chunk].set_live_obj_size(live_size);
-
-    ++cur_chunk;
-    addr += ChunkSize;
+  while (cur_region < end_region) {
+    _region_data[cur_region].set_destination(addr);
+    _region_data[cur_region].set_destination_count(0);
+    _region_data[cur_region].set_source_region(cur_region);
+    _region_data[cur_region].set_data_location(addr);
+
+    // Update live_obj_size so the region appears completely full.
+    size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size();
+    _region_data[cur_region].set_live_obj_size(live_size);
+
+    ++cur_region;
+    addr += RegionSize;
   }
 }
 
@@ -552,7 +421,7 @@
                                     HeapWord** target_next,
                                     HeapWord** source_next) {
   // This is too strict.
-  // assert(chunk_offset(source_beg) == 0, "not ChunkSize aligned");
+  // assert(region_offset(source_beg) == 0, "not RegionSize aligned");
 
   if (TraceParallelOldGCSummaryPhase) {
     tty->print_cr("tb=" PTR_FORMAT " te=" PTR_FORMAT " "
@@ -564,125 +433,93 @@
                   source_next != 0 ? *source_next : (HeapWord*) 0);
   }
 
-  size_t cur_chunk = addr_to_chunk_idx(source_beg);
-  const size_t end_chunk = addr_to_chunk_idx(chunk_align_up(source_end));
+  size_t cur_region = addr_to_region_idx(source_beg);
+  const size_t end_region = addr_to_region_idx(region_align_up(source_end));
 
   HeapWord *dest_addr = target_beg;
-  while (cur_chunk < end_chunk) {
-    size_t words = _chunk_data[cur_chunk].data_size();
+  while (cur_region < end_region) {
+    size_t words = _region_data[cur_region].data_size();
 
 #if     1
     assert(pointer_delta(target_end, dest_addr) >= words,
            "source region does not fit into target region");
 #else
-    // XXX - need some work on the corner cases here.  If the chunk does not
-    // fit, then must either make sure any partial_obj from the chunk fits, or
-    // 'undo' the initial part of the partial_obj that is in the previous chunk.
+    // XXX - need some work on the corner cases here.  If the region does not
+    // fit, then must either make sure any partial_obj from the region fits, or
+    // "undo" the initial part of the partial_obj that is in the previous
+    // region.
     if (dest_addr + words >= target_end) {
       // Let the caller know where to continue.
       *target_next = dest_addr;
-      *source_next = chunk_to_addr(cur_chunk);
+      *source_next = region_to_addr(cur_region);
       return false;
     }
 #endif  // #if 1
 
-    _chunk_data[cur_chunk].set_destination(dest_addr);
-
-    // Set the destination_count for cur_chunk, and if necessary, update
-    // source_chunk for a destination chunk.  The source_chunk field is updated
-    // if cur_chunk is the first (left-most) chunk to be copied to a destination
-    // chunk.
+    _region_data[cur_region].set_destination(dest_addr);
+
+    // Set the destination_count for cur_region, and if necessary, update
+    // source_region for a destination region.  The source_region field is
+    // updated if cur_region is the first (left-most) region to be copied to a
+    // destination region.
     //
-    // The destination_count calculation is a bit subtle.  A chunk that has data
-    // that compacts into itself does not count itself as a destination.  This
-    // maintains the invariant that a zero count means the chunk is available
-    // and can be claimed and then filled.
+    // The destination_count calculation is a bit subtle.  A region that has
+    // data that compacts into itself does not count itself as a destination.
+    // This maintains the invariant that a zero count means the region is
+    // available and can be claimed and then filled.
     if (words > 0) {
       HeapWord* const last_addr = dest_addr + words - 1;
-      const size_t dest_chunk_1 = addr_to_chunk_idx(dest_addr);
-      const size_t dest_chunk_2 = addr_to_chunk_idx(last_addr);
+      const size_t dest_region_1 = addr_to_region_idx(dest_addr);
+      const size_t dest_region_2 = addr_to_region_idx(last_addr);
 #if     0
-      // Initially assume that the destination chunks will be the same and
+      // Initially assume that the destination regions will be the same and
       // adjust the value below if necessary.  Under this assumption, if
-      // cur_chunk == dest_chunk_2, then cur_chunk will be compacted completely
-      // into itself.
-      uint destination_count = cur_chunk == dest_chunk_2 ? 0 : 1;
-      if (dest_chunk_1 != dest_chunk_2) {
-        // Destination chunks differ; adjust destination_count.
+      // cur_region == dest_region_2, then cur_region will be compacted
+      // completely into itself.
+      uint destination_count = cur_region == dest_region_2 ? 0 : 1;
+      if (dest_region_1 != dest_region_2) {
+        // Destination regions differ; adjust destination_count.
         destination_count += 1;
-        // Data from cur_chunk will be copied to the start of dest_chunk_2.
-        _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
-      } else if (chunk_offset(dest_addr) == 0) {
-        // Data from cur_chunk will be copied to the start of the destination
-        // chunk.
-        _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
+        // Data from cur_region will be copied to the start of dest_region_2.
+        _region_data[dest_region_2].set_source_region(cur_region);
+      } else if (region_offset(dest_addr) == 0) {
+        // Data from cur_region will be copied to the start of the destination
+        // region.
+        _region_data[dest_region_1].set_source_region(cur_region);
       }
 #else
-      // Initially assume that the destination chunks will be different and
+      // Initially assume that the destination regions will be different and
       // adjust the value below if necessary.  Under this assumption, if
-      // cur_chunk == dest_chunk2, then cur_chunk will be compacted partially
-      // into dest_chunk_1 and partially into itself.
-      uint destination_count = cur_chunk == dest_chunk_2 ? 1 : 2;
-      if (dest_chunk_1 != dest_chunk_2) {
-        // Data from cur_chunk will be copied to the start of dest_chunk_2.
-        _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk);
+      // cur_region == dest_region2, then cur_region will be compacted partially
+      // into dest_region_1 and partially into itself.
+      uint destination_count = cur_region == dest_region_2 ? 1 : 2;
+      if (dest_region_1 != dest_region_2) {
+        // Data from cur_region will be copied to the start of dest_region_2.
+        _region_data[dest_region_2].set_source_region(cur_region);
       } else {
-        // Destination chunks are the same; adjust destination_count.
+        // Destination regions are the same; adjust destination_count.
         destination_count -= 1;
-        if (chunk_offset(dest_addr) == 0) {
-          // Data from cur_chunk will be copied to the start of the destination
-          // chunk.
-          _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk);
+        if (region_offset(dest_addr) == 0) {
+          // Data from cur_region will be copied to the start of the destination
+          // region.
+          _region_data[dest_region_1].set_source_region(cur_region);
         }
       }
 #endif  // #if 0
 
-      _chunk_data[cur_chunk].set_destination_count(destination_count);
-      _chunk_data[cur_chunk].set_data_location(chunk_to_addr(cur_chunk));
+      _region_data[cur_region].set_destination_count(destination_count);
+      _region_data[cur_region].set_data_location(region_to_addr(cur_region));
       dest_addr += words;
     }
 
-    ++cur_chunk;
+    ++cur_region;
   }
 
   *target_next = dest_addr;
   return true;
 }
 
-bool ParallelCompactData::partial_obj_ends_in_block(size_t block_index) {
-  HeapWord* block_addr = block_to_addr(block_index);
-  HeapWord* block_end_addr = block_addr + BlockSize;
-  size_t chunk_index = addr_to_chunk_idx(block_addr);
-  HeapWord* partial_obj_end_addr = partial_obj_end(chunk_index);
-
-  // An object that ends at the end of the block, ends
-  // in the block (the last word of the object is to
-  // the left of the end).
-  if ((block_addr < partial_obj_end_addr) &&
-      (partial_obj_end_addr <= block_end_addr)) {
-    return true;
-  }
-
-  return false;
-}
-
 HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
-  HeapWord* result = NULL;
-  if (UseParallelOldGCChunkPointerCalc) {
-    result = chunk_calc_new_pointer(addr);
-  } else {
-    result = block_calc_new_pointer(addr);
-  }
-  return result;
-}
-
-// This method is overly complicated (expensive) to be called
-// for every reference.
-// Try to restructure this so that a NULL is returned if
-// the object is dead.  But don't wast the cycles to explicitly check
-// that it is dead since only live objects should be passed in.
-
-HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) {
   assert(addr != NULL, "Should detect NULL oop earlier");
   assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
 #ifdef ASSERT
@@ -692,30 +529,30 @@
 #endif
   assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
 
-  // Chunk covering the object.
-  size_t chunk_index = addr_to_chunk_idx(addr);
-  const ChunkData* const chunk_ptr = chunk(chunk_index);
-  HeapWord* const chunk_addr = chunk_align_down(addr);
-
-  assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
-  assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
-
-  HeapWord* result = chunk_ptr->destination();
-
-  // If all the data in the chunk is live, then the new location of the object
-  // can be calculated from the destination of the chunk plus the offset of the
-  // object in the chunk.
-  if (chunk_ptr->data_size() == ChunkSize) {
-    result += pointer_delta(addr, chunk_addr);
+  // Region covering the object.
+  size_t region_index = addr_to_region_idx(addr);
+  const RegionData* const region_ptr = region(region_index);
+  HeapWord* const region_addr = region_align_down(addr);
+
+  assert(addr < region_addr + RegionSize, "Region does not cover object");
+  assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
+
+  HeapWord* result = region_ptr->destination();
+
+  // If all the data in the region is live, then the new location of the object
+  // can be calculated from the destination of the region plus the offset of the
+  // object in the region.
+  if (region_ptr->data_size() == RegionSize) {
+    result += pointer_delta(addr, region_addr);
     return result;
   }
 
   // The new location of the object is
-  //    chunk destination +
-  //    size of the partial object extending onto the chunk +
-  //    sizes of the live objects in the Chunk that are to the left of addr
-  const size_t partial_obj_size = chunk_ptr->partial_obj_size();
-  HeapWord* const search_start = chunk_addr + partial_obj_size;
+  //    region destination +
+  //    size of the partial object extending onto the region +
+  //    sizes of the live objects in the Region that are to the left of addr
+  const size_t partial_obj_size = region_ptr->partial_obj_size();
+  HeapWord* const search_start = region_addr + partial_obj_size;
 
   const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
   size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
@@ -725,50 +562,6 @@
   return result;
 }
 
-HeapWord* ParallelCompactData::block_calc_new_pointer(HeapWord* addr) {
-  assert(addr != NULL, "Should detect NULL oop earlier");
-  assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
-#ifdef ASSERT
-  if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
-    gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
-  }
-#endif
-  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
-
-  // Chunk covering the object.
-  size_t chunk_index = addr_to_chunk_idx(addr);
-  const ChunkData* const chunk_ptr = chunk(chunk_index);
-  HeapWord* const chunk_addr = chunk_align_down(addr);
-
-  assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object");
-  assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check");
-
-  HeapWord* result = chunk_ptr->destination();
-
-  // If all the data in the chunk is live, then the new location of the object
-  // can be calculated from the destination of the chunk plus the offset of the
-  // object in the chunk.
-  if (chunk_ptr->data_size() == ChunkSize) {
-    result += pointer_delta(addr, chunk_addr);
-    return result;
-  }
-
-  // The new location of the object is
-  //    chunk destination +
-  //    block offset +
-  //    sizes of the live objects in the Block that are to the left of addr
-  const size_t block_offset = addr_to_block_ptr(addr)->offset();
-  HeapWord* const search_start = chunk_addr + block_offset;
-
-  const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
-  size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
-
-  result += block_offset + live_to_left;
-  assert(result <= addr, "object cannot move to the right");
-  assert(result == chunk_calc_new_pointer(addr), "Should match");
-  return result;
-}
-
 klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) {
   klassOop updated_klass;
   if (PSParallelCompact::should_update_klass(old_klass)) {
@@ -792,15 +585,14 @@
 
 void ParallelCompactData::verify_clear()
 {
-  verify_clear(_chunk_vspace);
-  verify_clear(_block_vspace);
+  verify_clear(_region_vspace);
 }
 #endif  // #ifdef ASSERT
 
 #ifdef NOT_PRODUCT
-ParallelCompactData::ChunkData* debug_chunk(size_t chunk_index) {
+ParallelCompactData::RegionData* debug_region(size_t region_index) {
   ParallelCompactData& sd = PSParallelCompact::summary_data();
-  return sd.chunk(chunk_index);
+  return sd.region(region_index);
 }
 #endif
 
@@ -953,10 +745,10 @@
   const idx_t end_bit = BitMap::word_align_up(_mark_bitmap.addr_to_bit(top));
   _mark_bitmap.clear_range(beg_bit, end_bit);
 
-  const size_t beg_chunk = _summary_data.addr_to_chunk_idx(bot);
-  const size_t end_chunk =
-    _summary_data.addr_to_chunk_idx(_summary_data.chunk_align_up(max_top));
-  _summary_data.clear_range(beg_chunk, end_chunk);
+  const size_t beg_region = _summary_data.addr_to_region_idx(bot);
+  const size_t end_region =
+    _summary_data.addr_to_region_idx(_summary_data.region_align_up(max_top));
+  _summary_data.clear_range(beg_region, end_region);
 }
 
 void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values)
@@ -1072,19 +864,19 @@
 PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id,
                                                     bool maximum_compaction)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
   const ParallelCompactData& sd = summary_data();
 
   const MutableSpace* const space = _space_info[id].space();
-  HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
-  const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(space->bottom());
-  const ChunkData* const end_cp = sd.addr_to_chunk_ptr(top_aligned_up);
-
-  // Skip full chunks at the beginning of the space--they are necessarily part
+  HeapWord* const top_aligned_up = sd.region_align_up(space->top());
+  const RegionData* const beg_cp = sd.addr_to_region_ptr(space->bottom());
+  const RegionData* const end_cp = sd.addr_to_region_ptr(top_aligned_up);
+
+  // Skip full regions at the beginning of the space--they are necessarily part
   // of the dense prefix.
   size_t full_count = 0;
-  const ChunkData* cp;
-  for (cp = beg_cp; cp < end_cp && cp->data_size() == chunk_size; ++cp) {
+  const RegionData* cp;
+  for (cp = beg_cp; cp < end_cp && cp->data_size() == region_size; ++cp) {
     ++full_count;
   }
 
@@ -1093,7 +885,7 @@
   const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval;
   if (maximum_compaction || cp == end_cp || interval_ended) {
     _maximum_compaction_gc_num = total_invocations();
-    return sd.chunk_to_addr(cp);
+    return sd.region_to_addr(cp);
   }
 
   HeapWord* const new_top = _space_info[id].new_top();
@@ -1116,52 +908,53 @@
   }
 
   // XXX - Use binary search?
-  HeapWord* dense_prefix = sd.chunk_to_addr(cp);
-  const ChunkData* full_cp = cp;
-  const ChunkData* const top_cp = sd.addr_to_chunk_ptr(space->top() - 1);
+  HeapWord* dense_prefix = sd.region_to_addr(cp);
+  const RegionData* full_cp = cp;
+  const RegionData* const top_cp = sd.addr_to_region_ptr(space->top() - 1);
   while (cp < end_cp) {
-    HeapWord* chunk_destination = cp->destination();
-    const size_t cur_deadwood = pointer_delta(dense_prefix, chunk_destination);
+    HeapWord* region_destination = cp->destination();
+    const size_t cur_deadwood = pointer_delta(dense_prefix, region_destination);
     if (TraceParallelOldGCDensePrefix && Verbose) {
       tty->print_cr("c#=" SIZE_FORMAT_W(4) " dst=" PTR_FORMAT " "
                     "dp=" SIZE_FORMAT_W(8) " " "cdw=" SIZE_FORMAT_W(8),
-                    sd.chunk(cp), chunk_destination,
+                    sd.region(cp), region_destination,
                     dense_prefix, cur_deadwood);
     }
 
     if (cur_deadwood >= deadwood_goal) {
-      // Found the chunk that has the correct amount of deadwood to the left.
-      // This typically occurs after crossing a fairly sparse set of chunks, so
-      // iterate backwards over those sparse chunks, looking for the chunk that
-      // has the lowest density of live objects 'to the right.'
-      size_t space_to_left = sd.chunk(cp) * chunk_size;
+      // Found the region that has the correct amount of deadwood to the left.
+      // This typically occurs after crossing a fairly sparse set of regions, so
+      // iterate backwards over those sparse regions, looking for the region
+      // that has the lowest density of live objects 'to the right.'
+      size_t space_to_left = sd.region(cp) * region_size;
       size_t live_to_left = space_to_left - cur_deadwood;
       size_t space_to_right = space_capacity - space_to_left;
       size_t live_to_right = space_live - live_to_left;
       double density_to_right = double(live_to_right) / space_to_right;
       while (cp > full_cp) {
         --cp;
-        const size_t prev_chunk_live_to_right = live_to_right - cp->data_size();
-        const size_t prev_chunk_space_to_right = space_to_right + chunk_size;
-        double prev_chunk_density_to_right =
-          double(prev_chunk_live_to_right) / prev_chunk_space_to_right;
-        if (density_to_right <= prev_chunk_density_to_right) {
+        const size_t prev_region_live_to_right = live_to_right -
+          cp->data_size();
+        const size_t prev_region_space_to_right = space_to_right + region_size;
+        double prev_region_density_to_right =
+          double(prev_region_live_to_right) / prev_region_space_to_right;
+        if (density_to_right <= prev_region_density_to_right) {
           return dense_prefix;
         }
         if (TraceParallelOldGCDensePrefix && Verbose) {
           tty->print_cr("backing up from c=" SIZE_FORMAT_W(4) " d2r=%10.8f "
-                        "pc_d2r=%10.8f", sd.chunk(cp), density_to_right,
-                        prev_chunk_density_to_right);
+                        "pc_d2r=%10.8f", sd.region(cp), density_to_right,
+                        prev_region_density_to_right);
         }
-        dense_prefix -= chunk_size;
-        live_to_right = prev_chunk_live_to_right;
-        space_to_right = prev_chunk_space_to_right;
-        density_to_right = prev_chunk_density_to_right;
+        dense_prefix -= region_size;
+        live_to_right = prev_region_live_to_right;
+        space_to_right = prev_region_space_to_right;
+        density_to_right = prev_region_density_to_right;
       }
       return dense_prefix;
     }
 
-    dense_prefix += chunk_size;
+    dense_prefix += region_size;
     ++cp;
   }
 
@@ -1174,8 +967,8 @@
                                                  const bool maximum_compaction,
                                                  HeapWord* const addr)
 {
-  const size_t chunk_idx = summary_data().addr_to_chunk_idx(addr);
-  ChunkData* const cp = summary_data().chunk(chunk_idx);
+  const size_t region_idx = summary_data().addr_to_region_idx(addr);
+  RegionData* const cp = summary_data().region(region_idx);
   const MutableSpace* const space = _space_info[id].space();
   HeapWord* const new_top = _space_info[id].new_top();
 
@@ -1191,7 +984,7 @@
                 "d2l=" SIZE_FORMAT " d2l%%=%6.4f "
                 "d2r=" SIZE_FORMAT " l2r=" SIZE_FORMAT
                 " ratio=%10.8f",
-                algorithm, addr, chunk_idx,
+                algorithm, addr, region_idx,
                 space_live,
                 dead_to_left, dead_to_left_pct,
                 dead_to_right, live_to_right,
@@ -1253,52 +1046,52 @@
   return MAX2(limit, 0.0);
 }
 
-ParallelCompactData::ChunkData*
-PSParallelCompact::first_dead_space_chunk(const ChunkData* beg,
-                                          const ChunkData* end)
+ParallelCompactData::RegionData*
+PSParallelCompact::first_dead_space_region(const RegionData* beg,
+                                           const RegionData* end)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
   ParallelCompactData& sd = summary_data();
-  size_t left = sd.chunk(beg);
-  size_t right = end > beg ? sd.chunk(end) - 1 : left;
+  size_t left = sd.region(beg);
+  size_t right = end > beg ? sd.region(end) - 1 : left;
 
   // Binary search.
   while (left < right) {
     // Equivalent to (left + right) / 2, but does not overflow.
     const size_t middle = left + (right - left) / 2;
-    ChunkData* const middle_ptr = sd.chunk(middle);
+    RegionData* const middle_ptr = sd.region(middle);
     HeapWord* const dest = middle_ptr->destination();
-    HeapWord* const addr = sd.chunk_to_addr(middle);
+    HeapWord* const addr = sd.region_to_addr(middle);
     assert(dest != NULL, "sanity");
     assert(dest <= addr, "must move left");
 
     if (middle > left && dest < addr) {
       right = middle - 1;
-    } else if (middle < right && middle_ptr->data_size() == chunk_size) {
+    } else if (middle < right && middle_ptr->data_size() == region_size) {
       left = middle + 1;
     } else {
       return middle_ptr;
     }
   }
-  return sd.chunk(left);
+  return sd.region(left);
 }
 
-ParallelCompactData::ChunkData*
-PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg,
-                                         const ChunkData* end,
-                                         size_t dead_words)
+ParallelCompactData::RegionData*
+PSParallelCompact::dead_wood_limit_region(const RegionData* beg,
+                                          const RegionData* end,
+                                          size_t dead_words)
 {
   ParallelCompactData& sd = summary_data();
-  size_t left = sd.chunk(beg);
-  size_t right = end > beg ? sd.chunk(end) - 1 : left;
+  size_t left = sd.region(beg);
+  size_t right = end > beg ? sd.region(end) - 1 : left;
 
   // Binary search.
   while (left < right) {
     // Equivalent to (left + right) / 2, but does not overflow.
     const size_t middle = left + (right - left) / 2;
-    ChunkData* const middle_ptr = sd.chunk(middle);
+    RegionData* const middle_ptr = sd.region(middle);
     HeapWord* const dest = middle_ptr->destination();
-    HeapWord* const addr = sd.chunk_to_addr(middle);
+    HeapWord* const addr = sd.region_to_addr(middle);
     assert(dest != NULL, "sanity");
     assert(dest <= addr, "must move left");
 
@@ -1311,13 +1104,13 @@
       return middle_ptr;
     }
   }
-  return sd.chunk(left);
+  return sd.region(left);
 }
 
 // The result is valid during the summary phase, after the initial summarization
 // of each space into itself, and before final summarization.
 inline double
-PSParallelCompact::reclaimed_ratio(const ChunkData* const cp,
+PSParallelCompact::reclaimed_ratio(const RegionData* const cp,
                                    HeapWord* const bottom,
                                    HeapWord* const top,
                                    HeapWord* const new_top)
@@ -1331,12 +1124,13 @@
   assert(top >= new_top, "summary data problem?");
   assert(new_top > bottom, "space is empty; should not be here");
   assert(new_top >= cp->destination(), "sanity");
-  assert(top >= sd.chunk_to_addr(cp), "sanity");
+  assert(top >= sd.region_to_addr(cp), "sanity");
 
   HeapWord* const destination = cp->destination();
   const size_t dense_prefix_live  = pointer_delta(destination, bottom);
   const size_t compacted_region_live = pointer_delta(new_top, destination);
-  const size_t compacted_region_used = pointer_delta(top, sd.chunk_to_addr(cp));
+  const size_t compacted_region_used = pointer_delta(top,
+                                                     sd.region_to_addr(cp));
   const size_t reclaimable = compacted_region_used - compacted_region_live;
 
   const double divisor = dense_prefix_live + 1.25 * compacted_region_live;
@@ -1344,39 +1138,40 @@
 }
 
 // Return the address of the end of the dense prefix, a.k.a. the start of the
-// compacted region.  The address is always on a chunk boundary.
+// compacted region.  The address is always on a region boundary.
 //
-// Completely full chunks at the left are skipped, since no compaction can occur
-// in those chunks.  Then the maximum amount of dead wood to allow is computed,
-// based on the density (amount live / capacity) of the generation; the chunk
-// with approximately that amount of dead space to the left is identified as the
-// limit chunk.  Chunks between the last completely full chunk and the limit
-// chunk are scanned and the one that has the best (maximum) reclaimed_ratio()
-// is selected.
+// Completely full regions at the left are skipped, since no compaction can
+// occur in those regions.  Then the maximum amount of dead wood to allow is
+// computed, based on the density (amount live / capacity) of the generation;
+// the region with approximately that amount of dead space to the left is
+// identified as the limit region.  Regions between the last completely full
+// region and the limit region are scanned and the one that has the best
+// (maximum) reclaimed_ratio() is selected.
 HeapWord*
 PSParallelCompact::compute_dense_prefix(const SpaceId id,
                                         bool maximum_compaction)
 {
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
+  const size_t region_size = ParallelCompactData::RegionSize;
   const ParallelCompactData& sd = summary_data();
 
   const MutableSpace* const space = _space_info[id].space();
   HeapWord* const top = space->top();
-  HeapWord* const top_aligned_up = sd.chunk_align_up(top);
+  HeapWord* const top_aligned_up = sd.region_align_up(top);
   HeapWord* const new_top = _space_info[id].new_top();
-  HeapWord* const new_top_aligned_up = sd.chunk_align_up(new_top);
+  HeapWord* const new_top_aligned_up = sd.region_align_up(new_top);
   HeapWord* const bottom = space->bottom();
-  const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(bottom);
-  const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
-  const ChunkData* const new_top_cp = sd.addr_to_chunk_ptr(new_top_aligned_up);
-
-  // Skip full chunks at the beginning of the space--they are necessarily part
+  const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom);
+  const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
+  const RegionData* const new_top_cp =
+    sd.addr_to_region_ptr(new_top_aligned_up);
+
+  // Skip full regions at the beginning of the space--they are necessarily part
   // of the dense prefix.
-  const ChunkData* const full_cp = first_dead_space_chunk(beg_cp, new_top_cp);
-  assert(full_cp->destination() == sd.chunk_to_addr(full_cp) ||
+  const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp);
+  assert(full_cp->destination() == sd.region_to_addr(full_cp) ||
          space->is_empty(), "no dead space allowed to the left");
-  assert(full_cp->data_size() < chunk_size || full_cp == new_top_cp - 1,
-         "chunk must have dead space");
+  assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1,
+         "region must have dead space");
 
   // The gc number is saved whenever a maximum compaction is done, and used to
   // determine when the maximum compaction interval has expired.  This avoids
@@ -1387,7 +1182,7 @@
     total_invocations() == HeapFirstMaximumCompactionCount;
   if (maximum_compaction || full_cp == top_cp || interval_ended) {
     _maximum_compaction_gc_num = total_invocations();
-    return sd.chunk_to_addr(full_cp);
+    return sd.region_to_addr(full_cp);
   }
 
   const size_t space_live = pointer_delta(new_top, bottom);
@@ -1413,15 +1208,15 @@
                   dead_wood_max, dead_wood_limit);
   }
 
-  // Locate the chunk with the desired amount of dead space to the left.
-  const ChunkData* const limit_cp =
-    dead_wood_limit_chunk(full_cp, top_cp, dead_wood_limit);
-
-  // Scan from the first chunk with dead space to the limit chunk and find the
+  // Locate the region with the desired amount of dead space to the left.
+  const RegionData* const limit_cp =
+    dead_wood_limit_region(full_cp, top_cp, dead_wood_limit);
+
+  // Scan from the first region with dead space to the limit region and find the
   // one with the best (largest) reclaimed ratio.
   double best_ratio = 0.0;
-  const ChunkData* best_cp = full_cp;
-  for (const ChunkData* cp = full_cp; cp < limit_cp; ++cp) {
+  const RegionData* best_cp = full_cp;
+  for (const RegionData* cp = full_cp; cp < limit_cp; ++cp) {
     double tmp_ratio = reclaimed_ratio(cp, bottom, top, new_top);
     if (tmp_ratio > best_ratio) {
       best_cp = cp;
@@ -1430,18 +1225,18 @@
   }
 
 #if     0
-  // Something to consider:  if the chunk with the best ratio is 'close to' the
-  // first chunk w/free space, choose the first chunk with free space
-  // ("first-free").  The first-free chunk is usually near the start of the
+  // Something to consider:  if the region with the best ratio is 'close to' the
+  // first region w/free space, choose the first region with free space
+  // ("first-free").  The first-free region is usually near the start of the
   // heap, which means we are copying most of the heap already, so copy a bit
   // more to get complete compaction.
-  if (pointer_delta(best_cp, full_cp, sizeof(ChunkData)) < 4) {
+  if (pointer_delta(best_cp, full_cp, sizeof(RegionData)) < 4) {
     _maximum_compaction_gc_num = total_invocations();
     best_cp = full_cp;
   }
 #endif  // #if 0
 
-  return sd.chunk_to_addr(best_cp);
+  return sd.region_to_addr(best_cp);
 }
 
 void PSParallelCompact::summarize_spaces_quick()
@@ -1459,9 +1254,9 @@
 void PSParallelCompact::fill_dense_prefix_end(SpaceId id)
 {
   HeapWord* const dense_prefix_end = dense_prefix(id);
-  const ChunkData* chunk = _summary_data.addr_to_chunk_ptr(dense_prefix_end);
+  const RegionData* region = _summary_data.addr_to_region_ptr(dense_prefix_end);
   const idx_t dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end);
-  if (dead_space_crosses_boundary(chunk, dense_prefix_bit)) {
+  if (dead_space_crosses_boundary(region, dense_prefix_bit)) {
     // Only enough dead space is filled so that any remaining dead space to the
     // left is larger than the minimum filler object.  (The remainder is filled
     // during the copy/update phase.)
@@ -1552,7 +1347,7 @@
       fill_dense_prefix_end(id);
     }
 
-    // Compute the destination of each Chunk, and thus each object.
+    // Compute the destination of each Region, and thus each object.
     _summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end);
     _summary_data.summarize(dense_prefix_end, space->end(),
                             dense_prefix_end, space->top(),
@@ -1560,19 +1355,19 @@
   }
 
   if (TraceParallelOldGCSummaryPhase) {
-    const size_t chunk_size = ParallelCompactData::ChunkSize;
+    const size_t region_size = ParallelCompactData::RegionSize;
     HeapWord* const dense_prefix_end = _space_info[id].dense_prefix();
-    const size_t dp_chunk = _summary_data.addr_to_chunk_idx(dense_prefix_end);
+    const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end);
     const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom());
     HeapWord* const new_top = _space_info[id].new_top();
-    const HeapWord* nt_aligned_up = _summary_data.chunk_align_up(new_top);
+    const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top);
     const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end);
     tty->print_cr("id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " "
-                  "dp_chunk=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
+                  "dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
                   "cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT,
                   id, space->capacity_in_words(), dense_prefix_end,
-                  dp_chunk, dp_words / chunk_size,
-                  cr_words / chunk_size, new_top);
+                  dp_region, dp_words / region_size,
+                  cr_words / region_size, new_top);
   }
 }
 
@@ -1584,11 +1379,6 @@
   // trace("2");
 
 #ifdef  ASSERT
-  if (VerifyParallelOldWithMarkSweep  &&
-      (PSParallelCompact::total_invocations() %
-         VerifyParallelOldWithMarkSweepInterval) == 0) {
-    verify_mark_bitmap(_mark_bitmap);
-  }
   if (TraceParallelOldGCMarkingPhase) {
     tty->print_cr("add_obj_count=" SIZE_FORMAT " "
                   "add_obj_bytes=" SIZE_FORMAT,
@@ -1605,7 +1395,7 @@
   if (TraceParallelOldGCSummaryPhase) {
     tty->print_cr("summary_phase:  after summarizing each space to self");
     Universe::print();
-    NOT_PRODUCT(print_chunk_ranges());
+    NOT_PRODUCT(print_region_ranges());
     if (Verbose) {
       NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info));
     }
@@ -1651,14 +1441,15 @@
                               space->bottom(), space->top(),
                               new_top_addr);
 
-      // Clear the source_chunk field for each chunk in the space.
+      // Clear the source_region field for each region in the space.
       HeapWord* const new_top = _space_info[id].new_top();
-      HeapWord* const clear_end = _summary_data.chunk_align_up(new_top);
-      ChunkData* beg_chunk = _summary_data.addr_to_chunk_ptr(space->bottom());
-      ChunkData* end_chunk = _summary_data.addr_to_chunk_ptr(clear_end);
-      while (beg_chunk < end_chunk) {
-        beg_chunk->set_source_chunk(0);
-        ++beg_chunk;
+      HeapWord* const clear_end = _summary_data.region_align_up(new_top);
+      RegionData* beg_region =
+        _summary_data.addr_to_region_ptr(space->bottom());
+      RegionData* end_region = _summary_data.addr_to_region_ptr(clear_end);
+      while (beg_region < end_region) {
+        beg_region->set_source_region(0);
+        ++beg_region;
       }
 
       // Reset the new_top value for the space.
@@ -1666,243 +1457,16 @@
     }
   }
 
-  // Fill in the block data after any changes to the chunks have
-  // been made.
-#ifdef  ASSERT
-  summarize_blocks(cm, perm_space_id);
-  summarize_blocks(cm, old_space_id);
-#else
-  if (!UseParallelOldGCChunkPointerCalc) {
-    summarize_blocks(cm, perm_space_id);
-    summarize_blocks(cm, old_space_id);
-  }
-#endif
-
   if (TraceParallelOldGCSummaryPhase) {
     tty->print_cr("summary_phase:  after final summarization");
     Universe::print();
-    NOT_PRODUCT(print_chunk_ranges());
+    NOT_PRODUCT(print_region_ranges());
     if (Verbose) {
       NOT_PRODUCT(print_generic_summary_data(_summary_data, _space_info));
     }
   }
 }
 
-// Fill in the BlockData.
-// Iterate over the spaces and within each space iterate over
-// the chunks and fill in the BlockData for each chunk.
-
-void PSParallelCompact::summarize_blocks(ParCompactionManager* cm,
-                                         SpaceId first_compaction_space_id) {
-#if     0
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(1);)
-  for (SpaceId cur_space_id = first_compaction_space_id;
-       cur_space_id != last_space_id;
-       cur_space_id = next_compaction_space_id(cur_space_id)) {
-    // Iterate over the chunks in the space
-    size_t start_chunk_index =
-      _summary_data.addr_to_chunk_idx(space(cur_space_id)->bottom());
-    BitBlockUpdateClosure bbu(mark_bitmap(),
-                              cm,
-                              start_chunk_index);
-    // Iterate over blocks.
-    for (size_t chunk_index =  start_chunk_index;
-         chunk_index < _summary_data.chunk_count() &&
-         _summary_data.chunk_to_addr(chunk_index) < space(cur_space_id)->top();
-         chunk_index++) {
-
-      // Reset the closure for the new chunk.  Note that the closure
-      // maintains some data that does not get reset for each chunk
-      // so a new instance of the closure is no appropriate.
-      bbu.reset_chunk(chunk_index);
-
-      // Start the iteration with the first live object.  This
-      // may return the end of the chunk.  That is acceptable since
-      // it will properly limit the iterations.
-      ParMarkBitMap::idx_t left_offset = mark_bitmap()->addr_to_bit(
-        _summary_data.first_live_or_end_in_chunk(chunk_index));
-
-      // End the iteration at the end of the chunk.
-      HeapWord* chunk_addr = _summary_data.chunk_to_addr(chunk_index);
-      HeapWord* chunk_end = chunk_addr + ParallelCompactData::ChunkSize;
-      ParMarkBitMap::idx_t right_offset =
-        mark_bitmap()->addr_to_bit(chunk_end);
-
-      // Blocks that have not objects starting in them can be
-      // skipped because their data will never be used.
-      if (left_offset < right_offset) {
-
-        // Iterate through the objects in the chunk.
-        ParMarkBitMap::idx_t last_offset =
-          mark_bitmap()->pair_iterate(&bbu, left_offset, right_offset);
-
-        // If last_offset is less than right_offset, then the iterations
-        // terminated while it was looking for an end bit.  "last_offset"
-        // is then the offset for the last start bit.  In this situation
-        // the "offset" field for the next block to the right (_cur_block + 1)
-        // will not have been update although there may be live data
-        // to the left of the chunk.
-
-        size_t cur_block_plus_1 = bbu.cur_block() + 1;
-        HeapWord* cur_block_plus_1_addr =
-        _summary_data.block_to_addr(bbu.cur_block()) +
-        ParallelCompactData::BlockSize;
-        HeapWord* last_offset_addr = mark_bitmap()->bit_to_addr(last_offset);
- #if 1  // This code works.  The else doesn't but should.  Why does it?
-        // The current block (cur_block()) has already been updated.
-        // The last block that may need to be updated is either the
-        // next block (current block + 1) or the block where the
-        // last object starts (which can be greater than the
-        // next block if there were no objects found in intervening
-        // blocks).
-        size_t last_block =
-          MAX2(bbu.cur_block() + 1,
-               _summary_data.addr_to_block_idx(last_offset_addr));
- #else
-        // The current block has already been updated.  The only block
-        // that remains to be updated is the block where the last
-        // object in the chunk starts.
-        size_t last_block = _summary_data.addr_to_block_idx(last_offset_addr);
- #endif
-        assert_bit_is_start(last_offset);
-        assert((last_block == _summary_data.block_count()) ||
-             (_summary_data.block(last_block)->raw_offset() == 0),
-          "Should not have been set");
-        // Is the last block still in the current chunk?  If still
-        // in this chunk, update the last block (the counting that
-        // included the current block is meant for the offset of the last
-        // block).  If not in this chunk, do nothing.  Should not
-        // update a block in the next chunk.
-        if (ParallelCompactData::chunk_contains_block(bbu.chunk_index(),
-                                                      last_block)) {
-          if (last_offset < right_offset) {
-            // The last object started in this chunk but ends beyond
-            // this chunk.  Update the block for this last object.
-            assert(mark_bitmap()->is_marked(last_offset), "Should be marked");
-            // No end bit was found.  The closure takes care of
-            // the cases where
-            //   an objects crosses over into the next block
-            //   an objects starts and ends in the next block
-            // It does not handle the case where an object is
-            // the first object in a later block and extends
-            // past the end of the chunk (i.e., the closure
-            // only handles complete objects that are in the range
-            // it is given).  That object is handed back here
-            // for any special consideration necessary.
-            //
-            // Is the first bit in the last block a start or end bit?
-            //
-            // If the partial object ends in the last block L,
-            // then the 1st bit in L may be an end bit.
-            //
-            // Else does the last object start in a block after the current
-            // block? A block AA will already have been updated if an
-            // object ends in the next block AA+1.  An object found to end in
-            // the AA+1 is the trigger that updates AA.  Objects are being
-            // counted in the current block for updaing a following
-            // block.  An object may start in later block
-            // block but may extend beyond the last block in the chunk.
-            // Updates are only done when the end of an object has been
-            // found. If the last object (covered by block L) starts
-            // beyond the current block, then no object ends in L (otherwise
-            // L would be the current block).  So the first bit in L is
-            // a start bit.
-            //
-            // Else the last objects start in the current block and ends
-            // beyond the chunk.  The current block has already been
-            // updated and there is no later block (with an object
-            // starting in it) that needs to be updated.
-            //
-            if (_summary_data.partial_obj_ends_in_block(last_block)) {
-              _summary_data.block(last_block)->set_end_bit_offset(
-                bbu.live_data_left());
-            } else if (last_offset_addr >= cur_block_plus_1_addr) {
-              //   The start of the object is on a later block
-              // (to the right of the current block and there are no
-              // complete live objects to the left of this last object
-              // within the chunk.
-              //   The first bit in the block is for the start of the
-              // last object.
-              _summary_data.block(last_block)->set_start_bit_offset(
-                bbu.live_data_left());
-            } else {
-              //   The start of the last object was found in
-              // the current chunk (which has already
-              // been updated).
-              assert(bbu.cur_block() ==
-                      _summary_data.addr_to_block_idx(last_offset_addr),
-                "Should be a block already processed");
-            }
-#ifdef ASSERT
-            // Is there enough block information to find this object?
-            // The destination of the chunk has not been set so the
-            // values returned by calc_new_pointer() and
-            // block_calc_new_pointer() will only be
-            // offsets.  But they should agree.
-            HeapWord* moved_obj_with_chunks =
-              _summary_data.chunk_calc_new_pointer(last_offset_addr);
-            HeapWord* moved_obj_with_blocks =
-              _summary_data.calc_new_pointer(last_offset_addr);
-            assert(moved_obj_with_chunks == moved_obj_with_blocks,
-              "Block calculation is wrong");
-#endif
-          } else if (last_block < _summary_data.block_count()) {
-            // Iterations ended looking for a start bit (but
-            // did not run off the end of the block table).
-            _summary_data.block(last_block)->set_start_bit_offset(
-              bbu.live_data_left());
-          }
-        }
-#ifdef ASSERT
-        // Is there enough block information to find this object?
-          HeapWord* left_offset_addr = mark_bitmap()->bit_to_addr(left_offset);
-        HeapWord* moved_obj_with_chunks =
-          _summary_data.calc_new_pointer(left_offset_addr);
-        HeapWord* moved_obj_with_blocks =
-          _summary_data.calc_new_pointer(left_offset_addr);
-          assert(moved_obj_with_chunks == moved_obj_with_blocks,
-          "Block calculation is wrong");
-#endif
-
-        // Is there another block after the end of this chunk?
-#ifdef ASSERT
-        if (last_block < _summary_data.block_count()) {
-        // No object may have been found in a block.  If that
-        // block is at the end of the chunk, the iteration will
-        // terminate without incrementing the current block so
-        // that the current block is not the last block in the
-        // chunk.  That situation precludes asserting that the
-        // current block is the last block in the chunk.  Assert
-        // the lesser condition that the current block does not
-        // exceed the chunk.
-          assert(_summary_data.block_to_addr(last_block) <=
-               (_summary_data.chunk_to_addr(chunk_index) +
-                 ParallelCompactData::ChunkSize),
-              "Chunk and block inconsistency");
-          assert(last_offset <= right_offset, "Iteration over ran end");
-        }
-#endif
-      }
-#ifdef ASSERT
-      if (PrintGCDetails && Verbose) {
-        if (_summary_data.chunk(chunk_index)->partial_obj_size() == 1) {
-          size_t first_block =
-            chunk_index / ParallelCompactData::BlocksPerChunk;
-          gclog_or_tty->print_cr("first_block " PTR_FORMAT
-            " _offset " PTR_FORMAT
-            "_first_is_start_bit %d",
-            first_block,
-            _summary_data.block(first_block)->raw_offset(),
-            _summary_data.block(first_block)->first_is_start_bit());
-        }
-      }
-#endif
-    }
-  }
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(16);)
-#endif  // #if 0
-}
-
 // This method should contain all heap-specific policy for invoking a full
 // collection.  invoke_no_policy() will only attempt to compact the heap; it
 // will do nothing further.  If we need to bail out for policy reasons, scavenge
@@ -1937,18 +1501,9 @@
   }
 }
 
-bool ParallelCompactData::chunk_contains(size_t chunk_index, HeapWord* addr) {
-  size_t addr_chunk_index = addr_to_chunk_idx(addr);
-  return chunk_index == addr_chunk_index;
-}
-
-bool ParallelCompactData::chunk_contains_block(size_t chunk_index,
-                                               size_t block_index) {
-  size_t first_block_in_chunk = chunk_index * BlocksPerChunk;
-  size_t last_block_in_chunk = (chunk_index + 1) * BlocksPerChunk - 1;
-
-  return (first_block_in_chunk <= block_index) &&
-         (block_index <= last_block_in_chunk);
+bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
+  size_t addr_region_index = addr_to_region_idx(addr);
+  return region_index == addr_region_index;
 }
 
 // This method contains no policy. You should probably
@@ -2038,39 +1593,9 @@
     }
 #endif  // #ifndef PRODUCT
 
-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      gclog_or_tty->print_cr("Verify marking with mark_sweep_phase1()");
-      if (PrintGCDetails && Verbose) {
-        gclog_or_tty->print_cr("mark_sweep_phase1:");
-      }
-      // Clear the discovered lists so that discovered objects
-      // don't look like they have been discovered twice.
-      ref_processor()->clear_discovered_references();
-
-      PSMarkSweep::allocate_stacks();
-      MemRegion mr = Universe::heap()->reserved_region();
-      PSMarkSweep::ref_processor()->enable_discovery();
-      PSMarkSweep::mark_sweep_phase1(maximum_heap_compaction);
-    }
-#endif
-
     bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc;
     summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc);
 
-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      if (PrintGCDetails && Verbose) {
-        gclog_or_tty->print_cr("mark_sweep_phase2:");
-      }
-      PSMarkSweep::mark_sweep_phase2();
-    }
-#endif
-
     COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity"));
     COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
 
@@ -2078,28 +1603,6 @@
     // needed by the compaction for filling holes in the dense prefix.
     adjust_roots();
 
-#ifdef ASSERT
-    if (VerifyParallelOldWithMarkSweep &&
-        (PSParallelCompact::total_invocations() %
-           VerifyParallelOldWithMarkSweepInterval) == 0) {
-      // Do a separate verify phase so that the verify
-      // code can use the the forwarding pointers to
-      // check the new pointer calculation.  The restore_marks()
-      // has to be done before the real compact.
-      vmthread_cm->set_action(ParCompactionManager::VerifyUpdate);
-      compact_perm(vmthread_cm);
-      compact_serial(vmthread_cm);
-      vmthread_cm->set_action(ParCompactionManager::ResetObjects);
-      compact_perm(vmthread_cm);
-      compact_serial(vmthread_cm);
-      vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy);
-
-      // For debugging only
-      PSMarkSweep::restore_marks();
-      PSMarkSweep::deallocate_stacks();
-    }
-#endif
-
     compaction_start.update();
     // Does the perm gen always have to be done serially because
     // klasses are used in the update of an object?
@@ -2349,7 +1852,7 @@
 
   ParallelScavengeHeap* heap = gc_heap();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
+  TaskQueueSetSuper* qset = ParCompactionManager::region_array();
   ParallelTaskTerminator terminator(parallel_gc_threads, qset);
 
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
@@ -2487,8 +1990,9 @@
   move_and_update(cm, perm_space_id);
 }
 
-void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q,
-                                                     uint parallel_gc_threads) {
+void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q,
+                                                      uint parallel_gc_threads)
+{
   TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty);
 
   const unsigned int task_count = MAX2(parallel_gc_threads, 1U);
@@ -2496,13 +2000,13 @@
     q->enqueue(new DrainStacksCompactionTask());
   }
 
-  // Find all chunks that are available (can be filled immediately) and
+  // Find all regions that are available (can be filled immediately) and
   // distribute them to the thread stacks.  The iteration is done in reverse
-  // order (high to low) so the chunks will be removed in ascending order.
+  // order (high to low) so the regions will be removed in ascending order.
 
   const ParallelCompactData& sd = PSParallelCompact::summary_data();
 
-  size_t fillable_chunks = 0;   // A count for diagnostic purposes.
+  size_t fillable_regions = 0;   // A count for diagnostic purposes.
   unsigned int which = 0;       // The worker thread number.
 
   for (unsigned int id = to_space_id; id > perm_space_id; --id) {
@@ -2510,25 +2014,26 @@
     MutableSpace* const space = space_info->space();
     HeapWord* const new_top = space_info->new_top();
 
-    const size_t beg_chunk = sd.addr_to_chunk_idx(space_info->dense_prefix());
-    const size_t end_chunk = sd.addr_to_chunk_idx(sd.chunk_align_up(new_top));
-    assert(end_chunk > 0, "perm gen cannot be empty");
-
-    for (size_t cur = end_chunk - 1; cur >= beg_chunk; --cur) {
-      if (sd.chunk(cur)->claim_unsafe()) {
+    const size_t beg_region = sd.addr_to_region_idx(space_info->dense_prefix());
+    const size_t end_region =
+      sd.addr_to_region_idx(sd.region_align_up(new_top));
+    assert(end_region > 0, "perm gen cannot be empty");
+
+    for (size_t cur = end_region - 1; cur >= beg_region; --cur) {
+      if (sd.region(cur)->claim_unsafe()) {
         ParCompactionManager* cm = ParCompactionManager::manager_array(which);
         cm->save_for_processing(cur);
 
         if (TraceParallelOldGCCompactionPhase && Verbose) {
-          const size_t count_mod_8 = fillable_chunks & 7;
+          const size_t count_mod_8 = fillable_regions & 7;
           if (count_mod_8 == 0) gclog_or_tty->print("fillable: ");
           gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur);
           if (count_mod_8 == 7) gclog_or_tty->cr();
         }
 
-        NOT_PRODUCT(++fillable_chunks;)
-
-        // Assign chunks to threads in round-robin fashion.
+        NOT_PRODUCT(++fillable_regions;)
+
+        // Assign regions to threads in round-robin fashion.
         if (++which == task_count) {
           which = 0;
         }
@@ -2537,8 +2042,8 @@
   }
 
   if (TraceParallelOldGCCompactionPhase) {
-    if (Verbose && (fillable_chunks & 7) != 0) gclog_or_tty->cr();
-    gclog_or_tty->print_cr("%u initially fillable chunks", fillable_chunks);
+    if (Verbose && (fillable_regions & 7) != 0) gclog_or_tty->cr();
+    gclog_or_tty->print_cr("%u initially fillable regions", fillable_regions);
   }
 }
 
@@ -2551,7 +2056,7 @@
   ParallelCompactData& sd = PSParallelCompact::summary_data();
 
   // Iterate over all the spaces adding tasks for updating
-  // chunks in the dense prefix.  Assume that 1 gc thread
+  // regions in the dense prefix.  Assume that 1 gc thread
   // will work on opening the gaps and the remaining gc threads
   // will work on the dense prefix.
   SpaceId space_id = old_space_id;
@@ -2565,30 +2070,31 @@
       continue;
     }
 
-    // The dense prefix is before this chunk.
-    size_t chunk_index_end_dense_prefix =
-        sd.addr_to_chunk_idx(dense_prefix_end);
-    ChunkData* const dense_prefix_cp = sd.chunk(chunk_index_end_dense_prefix);
+    // The dense prefix is before this region.
+    size_t region_index_end_dense_prefix =
+        sd.addr_to_region_idx(dense_prefix_end);
+    RegionData* const dense_prefix_cp =
+      sd.region(region_index_end_dense_prefix);
     assert(dense_prefix_end == space->end() ||
            dense_prefix_cp->available() ||
            dense_prefix_cp->claimed(),
-           "The chunk after the dense prefix should always be ready to fill");
-
-    size_t chunk_index_start = sd.addr_to_chunk_idx(space->bottom());
+           "The region after the dense prefix should always be ready to fill");
+
+    size_t region_index_start = sd.addr_to_region_idx(space->bottom());
 
     // Is there dense prefix work?
-    size_t total_dense_prefix_chunks =
-      chunk_index_end_dense_prefix - chunk_index_start;
-    // How many chunks of the dense prefix should be given to
+    size_t total_dense_prefix_regions =
+      region_index_end_dense_prefix - region_index_start;
+    // How many regions of the dense prefix should be given to
     // each thread?
-    if (total_dense_prefix_chunks > 0) {
+    if (total_dense_prefix_regions > 0) {
       uint tasks_for_dense_prefix = 1;
       if (UseParallelDensePrefixUpdate) {
-        if (total_dense_prefix_chunks <=
+        if (total_dense_prefix_regions <=
             (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
           // Don't over partition.  This assumes that
           // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
-          // so there are not many chunks to process.
+          // so there are not many regions to process.
           tasks_for_dense_prefix = parallel_gc_threads;
         } else {
           // Over partition
@@ -2596,50 +2102,50 @@
             PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
         }
       }
-      size_t chunks_per_thread = total_dense_prefix_chunks /
+      size_t regions_per_thread = total_dense_prefix_regions /
         tasks_for_dense_prefix;
-      // Give each thread at least 1 chunk.
-      if (chunks_per_thread == 0) {
-        chunks_per_thread = 1;
+      // Give each thread at least 1 region.
+      if (regions_per_thread == 0) {
+        regions_per_thread = 1;
       }
 
       for (uint k = 0; k < tasks_for_dense_prefix; k++) {
-        if (chunk_index_start >= chunk_index_end_dense_prefix) {
+        if (region_index_start >= region_index_end_dense_prefix) {
           break;
         }
-        // chunk_index_end is not processed
-        size_t chunk_index_end = MIN2(chunk_index_start + chunks_per_thread,
-                                      chunk_index_end_dense_prefix);
+        // region_index_end is not processed
+        size_t region_index_end = MIN2(region_index_start + regions_per_thread,
+                                       region_index_end_dense_prefix);
         q->enqueue(new UpdateDensePrefixTask(
                                  space_id,
-                                 chunk_index_start,
-                                 chunk_index_end));
-        chunk_index_start = chunk_index_end;
+                                 region_index_start,
+                                 region_index_end));
+        region_index_start = region_index_end;
       }
     }
     // This gets any part of the dense prefix that did not
     // fit evenly.
-    if (chunk_index_start < chunk_index_end_dense_prefix) {
+    if (region_index_start < region_index_end_dense_prefix) {
       q->enqueue(new UpdateDensePrefixTask(
                                  space_id,
-                                 chunk_index_start,
-                                 chunk_index_end_dense_prefix));
+                                 region_index_start,
+                                 region_index_end_dense_prefix));
     }
     space_id = next_compaction_space_id(space_id);
   }  // End tasks for dense prefix
 }
 
-void PSParallelCompact::enqueue_chunk_stealing_tasks(
+void PSParallelCompact::enqueue_region_stealing_tasks(
                                      GCTaskQueue* q,
                                      ParallelTaskTerminator* terminator_ptr,
                                      uint parallel_gc_threads) {
   TraceTime tm("steal task setup", print_phases(), true, gclog_or_tty);
 
-  // Once a thread has drained it's stack, it should try to steal chunks from
+  // Once a thread has drained it's stack, it should try to steal regions from
   // other threads.
   if (parallel_gc_threads > 1) {
     for (uint j = 0; j < parallel_gc_threads; j++) {
-      q->enqueue(new StealChunkCompactionTask(terminator_ptr));
+      q->enqueue(new StealRegionCompactionTask(terminator_ptr));
     }
   }
 }
@@ -2654,13 +2160,13 @@
   PSOldGen* old_gen = heap->old_gen();
   old_gen->start_array()->reset();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  TaskQueueSetSuper* qset = ParCompactionManager::chunk_array();
+  TaskQueueSetSuper* qset = ParCompactionManager::region_array();
   ParallelTaskTerminator terminator(parallel_gc_threads, qset);
 
   GCTaskQueue* q = GCTaskQueue::create();
-  enqueue_chunk_draining_tasks(q, parallel_gc_threads);
+  enqueue_region_draining_tasks(q, parallel_gc_threads);
   enqueue_dense_prefix_tasks(q, parallel_gc_threads);
-  enqueue_chunk_stealing_tasks(q, &terminator, parallel_gc_threads);
+  enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads);
 
   {
     TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty);
@@ -2676,9 +2182,9 @@
     WaitForBarrierGCTask::destroy(fin);
 
 #ifdef  ASSERT
-    // Verify that all chunks have been processed before the deferred updates.
+    // Verify that all regions have been processed before the deferred updates.
     // Note that perm_space_id is skipped; this type of verification is not
-    // valid until the perm gen is compacted by chunks.
+    // valid until the perm gen is compacted by regions.
     for (unsigned int id = old_space_id; id < last_space_id; ++id) {
       verify_complete(SpaceId(id));
     }
@@ -2697,42 +2203,42 @@
 
 #ifdef  ASSERT
 void PSParallelCompact::verify_complete(SpaceId space_id) {
-  // All Chunks between space bottom() to new_top() should be marked as filled
-  // and all Chunks between new_top() and top() should be available (i.e.,
+  // All Regions between space bottom() to new_top() should be marked as filled
+  // and all Regions between new_top() and top() should be available (i.e.,
   // should have been emptied).
   ParallelCompactData& sd = summary_data();
   SpaceInfo si = _space_info[space_id];
-  HeapWord* new_top_addr = sd.chunk_align_up(si.new_top());
-  HeapWord* old_top_addr = sd.chunk_align_up(si.space()->top());
-  const size_t beg_chunk = sd.addr_to_chunk_idx(si.space()->bottom());
-  const size_t new_top_chunk = sd.addr_to_chunk_idx(new_top_addr);
-  const size_t old_top_chunk = sd.addr_to_chunk_idx(old_top_addr);
+  HeapWord* new_top_addr = sd.region_align_up(si.new_top());
+  HeapWord* old_top_addr = sd.region_align_up(si.space()->top());
+  const size_t beg_region = sd.addr_to_region_idx(si.space()->bottom());
+  const size_t new_top_region = sd.addr_to_region_idx(new_top_addr);
+  const size_t old_top_region = sd.addr_to_region_idx(old_top_addr);
 
   bool issued_a_warning = false;
 
-  size_t cur_chunk;
-  for (cur_chunk = beg_chunk; cur_chunk < new_top_chunk; ++cur_chunk) {
-    const ChunkData* const c = sd.chunk(cur_chunk);
+  size_t cur_region;
+  for (cur_region = beg_region; cur_region < new_top_region; ++cur_region) {
+    const RegionData* const c = sd.region(cur_region);
     if (!c->completed()) {
-      warning("chunk " SIZE_FORMAT " not filled:  "
+      warning("region " SIZE_FORMAT " not filled:  "
               "destination_count=" SIZE_FORMAT,
-              cur_chunk, c->destination_count());
+              cur_region, c->destination_count());
       issued_a_warning = true;
     }
   }
 
-  for (cur_chunk = new_top_chunk; cur_chunk < old_top_chunk; ++cur_chunk) {
-    const ChunkData* const c = sd.chunk(cur_chunk);
+  for (cur_region = new_top_region; cur_region < old_top_region; ++cur_region) {
+    const RegionData* const c = sd.region(cur_region);
     if (!c->available()) {
-      warning("chunk " SIZE_FORMAT " not empty:   "
+      warning("region " SIZE_FORMAT " not empty:   "
               "destination_count=" SIZE_FORMAT,
-              cur_chunk, c->destination_count());
+              cur_region, c->destination_count());
       issued_a_warning = true;
     }
   }
 
   if (issued_a_warning) {
-    print_chunk_ranges();
+    print_region_ranges();
   }
 }
 #endif  // #ifdef ASSERT
@@ -2933,46 +2439,47 @@
 }
 #endif //VALIDATE_MARK_SWEEP
 
-// Update interior oops in the ranges of chunks [beg_chunk, end_chunk).
+// Update interior oops in the ranges of regions [beg_region, end_region).
 void
 PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
                                                        SpaceId space_id,
-                                                       size_t beg_chunk,
-                                                       size_t end_chunk) {
+                                                       size_t beg_region,
+                                                       size_t end_region) {
   ParallelCompactData& sd = summary_data();
   ParMarkBitMap* const mbm = mark_bitmap();
 
-  HeapWord* beg_addr = sd.chunk_to_addr(beg_chunk);
-  HeapWord* const end_addr = sd.chunk_to_addr(end_chunk);
-  assert(beg_chunk <= end_chunk, "bad chunk range");
+  HeapWord* beg_addr = sd.region_to_addr(beg_region);
+  HeapWord* const end_addr = sd.region_to_addr(end_region);
+  assert(beg_region <= end_region, "bad region range");
   assert(end_addr <= dense_prefix(space_id), "not in the dense prefix");
 
 #ifdef  ASSERT
-  // Claim the chunks to avoid triggering an assert when they are marked as
+  // Claim the regions to avoid triggering an assert when they are marked as
   // filled.
-  for (size_t claim_chunk = beg_chunk; claim_chunk < end_chunk; ++claim_chunk) {
-    assert(sd.chunk(claim_chunk)->claim_unsafe(), "claim() failed");
+  for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) {
+    assert(sd.region(claim_region)->claim_unsafe(), "claim() failed");
   }
 #endif  // #ifdef ASSERT
 
   if (beg_addr != space(space_id)->bottom()) {
     // Find the first live object or block of dead space that *starts* in this
-    // range of chunks.  If a partial object crosses onto the chunk, skip it; it
-    // will be marked for 'deferred update' when the object head is processed.
-    // If dead space crosses onto the chunk, it is also skipped; it will be
-    // filled when the prior chunk is processed.  If neither of those apply, the
-    // first word in the chunk is the start of a live object or dead space.
+    // range of regions.  If a partial object crosses onto the region, skip it;
+    // it will be marked for 'deferred update' when the object head is
+    // processed.  If dead space crosses onto the region, it is also skipped; it
+    // will be filled when the prior region is processed.  If neither of those
+    // apply, the first word in the region is the start of a live object or dead
+    // space.
     assert(beg_addr > space(space_id)->bottom(), "sanity");
-    const ChunkData* const cp = sd.chunk(beg_chunk);
+    const RegionData* const cp = sd.region(beg_region);
     if (cp->partial_obj_size() != 0) {
-      beg_addr = sd.partial_obj_end(beg_chunk);
+      beg_addr = sd.partial_obj_end(beg_region);
     } else if (dead_space_crosses_boundary(cp, mbm->addr_to_bit(beg_addr))) {
       beg_addr = mbm->find_obj_beg(beg_addr, end_addr);
     }
   }
 
   if (beg_addr < end_addr) {
-    // A live object or block of dead space starts in this range of Chunks.
+    // A live object or block of dead space starts in this range of Regions.
      HeapWord* const dense_prefix_end = dense_prefix(space_id);
 
     // Create closures and iterate.
@@ -2986,10 +2493,10 @@
     }
   }
 
-  // Mark the chunks as filled.
-  ChunkData* const beg_cp = sd.chunk(beg_chunk);
-  ChunkData* const end_cp = sd.chunk(end_chunk);
-  for (ChunkData* cp = beg_cp; cp < end_cp; ++cp) {
+  // Mark the regions as filled.
+  RegionData* const beg_cp = sd.region(beg_region);
+  RegionData* const end_cp = sd.region(end_region);
+  for (RegionData* cp = beg_cp; cp < end_cp; ++cp) {
     cp->set_completed();
   }
 }
@@ -3021,13 +2528,13 @@
   const MutableSpace* const space = space_info->space();
   assert(space_info->dense_prefix() >= space->bottom(), "dense_prefix not set");
   HeapWord* const beg_addr = space_info->dense_prefix();
-  HeapWord* const end_addr = sd.chunk_align_up(space_info->new_top());
-
-  const ChunkData* const beg_chunk = sd.addr_to_chunk_ptr(beg_addr);
-  const ChunkData* const end_chunk = sd.addr_to_chunk_ptr(end_addr);
-  const ChunkData* cur_chunk;
-  for (cur_chunk = beg_chunk; cur_chunk < end_chunk; ++cur_chunk) {
-    HeapWord* const addr = cur_chunk->deferred_obj_addr();
+  HeapWord* const end_addr = sd.region_align_up(space_info->new_top());
+
+  const RegionData* const beg_region = sd.addr_to_region_ptr(beg_addr);
+  const RegionData* const end_region = sd.addr_to_region_ptr(end_addr);
+  const RegionData* cur_region;
+  for (cur_region = beg_region; cur_region < end_region; ++cur_region) {
+    HeapWord* const addr = cur_region->deferred_obj_addr();
     if (addr != NULL) {
       if (start_array != NULL) {
         start_array->allocate_block(addr);
@@ -3073,45 +2580,45 @@
 
 HeapWord*
 PSParallelCompact::first_src_addr(HeapWord* const dest_addr,
-                                 size_t src_chunk_idx)
+                                 size_t src_region_idx)
 {
   ParMarkBitMap* const bitmap = mark_bitmap();
   const ParallelCompactData& sd = summary_data();
-  const size_t ChunkSize = ParallelCompactData::ChunkSize;
-
-  assert(sd.is_chunk_aligned(dest_addr), "not aligned");
-
-  const ChunkData* const src_chunk_ptr = sd.chunk(src_chunk_idx);
-  const size_t partial_obj_size = src_chunk_ptr->partial_obj_size();
-  HeapWord* const src_chunk_destination = src_chunk_ptr->destination();
-
-  assert(dest_addr >= src_chunk_destination, "wrong src chunk");
-  assert(src_chunk_ptr->data_size() > 0, "src chunk cannot be empty");
-
-  HeapWord* const src_chunk_beg = sd.chunk_to_addr(src_chunk_idx);
-  HeapWord* const src_chunk_end = src_chunk_beg + ChunkSize;
-
-  HeapWord* addr = src_chunk_beg;
-  if (dest_addr == src_chunk_destination) {
-    // Return the first live word in the source chunk.
+  const size_t RegionSize = ParallelCompactData::RegionSize;
+
+  assert(sd.is_region_aligned(dest_addr), "not aligned");
+
+  const RegionData* const src_region_ptr = sd.region(src_region_idx);
+  const size_t partial_obj_size = src_region_ptr->partial_obj_size();
+  HeapWord* const src_region_destination = src_region_ptr->destination();
+
+  assert(dest_addr >= src_region_destination, "wrong src region");
+  assert(src_region_ptr->data_size() > 0, "src region cannot be empty");
+
+  HeapWord* const src_region_beg = sd.region_to_addr(src_region_idx);
+  HeapWord* const src_region_end = src_region_beg + RegionSize;
+
+  HeapWord* addr = src_region_beg;
+  if (dest_addr == src_region_destination) {
+    // Return the first live word in the source region.
     if (partial_obj_size == 0) {
-      addr = bitmap->find_obj_beg(addr, src_chunk_end);
-      assert(addr < src_chunk_end, "no objects start in src chunk");
+      addr = bitmap->find_obj_beg(addr, src_region_end);
+      assert(addr < src_region_end, "no objects start in src region");
     }
     return addr;
   }
 
   // Must skip some live data.
-  size_t words_to_skip = dest_addr - src_chunk_destination;
-  assert(src_chunk_ptr->data_size() > words_to_skip, "wrong src chunk");
+  size_t words_to_skip = dest_addr - src_region_destination;
+  assert(src_region_ptr->data_size() > words_to_skip, "wrong src region");
 
   if (partial_obj_size >= words_to_skip) {
     // All the live words to skip are part of the partial object.
     addr += words_to_skip;
     if (partial_obj_size == words_to_skip) {
       // Find the first live word past the partial object.
-      addr = bitmap->find_obj_beg(addr, src_chunk_end);
-      assert(addr < src_chunk_end, "wrong src chunk");
+      addr = bitmap->find_obj_beg(addr, src_region_end);
+      assert(addr < src_region_end, "wrong src region");
     }
     return addr;
   }
@@ -3122,63 +2629,64 @@
     addr += partial_obj_size;
   }
 
-  // Skip over live words due to objects that start in the chunk.
-  addr = skip_live_words(addr, src_chunk_end, words_to_skip);
-  assert(addr < src_chunk_end, "wrong src chunk");
+  // Skip over live words due to objects that start in the region.
+  addr = skip_live_words(addr, src_region_end, words_to_skip);
+  assert(addr < src_region_end, "wrong src region");
   return addr;
 }
 
 void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm,
-                                                     size_t beg_chunk,
+                                                     size_t beg_region,
                                                      HeapWord* end_addr)
 {
   ParallelCompactData& sd = summary_data();
-  ChunkData* const beg = sd.chunk(beg_chunk);
-  HeapWord* const end_addr_aligned_up = sd.chunk_align_up(end_addr);
-  ChunkData* const end = sd.addr_to_chunk_ptr(end_addr_aligned_up);
-  size_t cur_idx = beg_chunk;
-  for (ChunkData* cur = beg; cur < end; ++cur, ++cur_idx) {
-    assert(cur->data_size() > 0, "chunk must have live data");
+  RegionData* const beg = sd.region(beg_region);
+  HeapWord* const end_addr_aligned_up = sd.region_align_up(end_addr);
+  RegionData* const end = sd.addr_to_region_ptr(end_addr_aligned_up);
+  size_t cur_idx = beg_region;
+  for (RegionData* cur = beg; cur < end; ++cur, ++cur_idx) {
+    assert(cur->data_size() > 0, "region must have live data");
     cur->decrement_destination_count();
-    if (cur_idx <= cur->source_chunk() && cur->available() && cur->claim()) {
+    if (cur_idx <= cur->source_region() && cur->available() && cur->claim()) {
       cm->save_for_processing(cur_idx);
     }
   }
 }
 
-size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure,
-                                         SpaceId& src_space_id,
-                                         HeapWord*& src_space_top,
-                                         HeapWord* end_addr)
+size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure,
+                                          SpaceId& src_space_id,
+                                          HeapWord*& src_space_top,
+                                          HeapWord* end_addr)
 {
-  typedef ParallelCompactData::ChunkData ChunkData;
+  typedef ParallelCompactData::RegionData RegionData;
 
   ParallelCompactData& sd = PSParallelCompact::summary_data();
-  const size_t chunk_size = ParallelCompactData::ChunkSize;
-
-  size_t src_chunk_idx = 0;
-
-  // Skip empty chunks (if any) up to the top of the space.
-  HeapWord* const src_aligned_up = sd.chunk_align_up(end_addr);
-  ChunkData* src_chunk_ptr = sd.addr_to_chunk_ptr(src_aligned_up);
-  HeapWord* const top_aligned_up = sd.chunk_align_up(src_space_top);
-  const ChunkData* const top_chunk_ptr = sd.addr_to_chunk_ptr(top_aligned_up);
-  while (src_chunk_ptr < top_chunk_ptr && src_chunk_ptr->data_size() == 0) {
-    ++src_chunk_ptr;
+  const size_t region_size = ParallelCompactData::RegionSize;
+
+  size_t src_region_idx = 0;
+
+  // Skip empty regions (if any) up to the top of the space.
+  HeapWord* const src_aligned_up = sd.region_align_up(end_addr);
+  RegionData* src_region_ptr = sd.addr_to_region_ptr(src_aligned_up);
+  HeapWord* const top_aligned_up = sd.region_align_up(src_space_top);
+  const RegionData* const top_region_ptr =
+    sd.addr_to_region_ptr(top_aligned_up);
+  while (src_region_ptr < top_region_ptr && src_region_ptr->data_size() == 0) {
+    ++src_region_ptr;
   }
 
-  if (src_chunk_ptr < top_chunk_ptr) {
-    // The next source chunk is in the current space.  Update src_chunk_idx and
-    // the source address to match src_chunk_ptr.
-    src_chunk_idx = sd.chunk(src_chunk_ptr);
-    HeapWord* const src_chunk_addr = sd.chunk_to_addr(src_chunk_idx);
-    if (src_chunk_addr > closure.source()) {
-      closure.set_source(src_chunk_addr);
+  if (src_region_ptr < top_region_ptr) {
+    // The next source region is in the current space.  Update src_region_idx
+    // and the source address to match src_region_ptr.
+    src_region_idx = sd.region(src_region_ptr);
+    HeapWord* const src_region_addr = sd.region_to_addr(src_region_idx);
+    if (src_region_addr > closure.source()) {
+      closure.set_source(src_region_addr);
     }
-    return src_chunk_idx;
+    return src_region_idx;
   }
 
-  // Switch to a new source space and find the first non-empty chunk.
+  // Switch to a new source space and find the first non-empty region.
   unsigned int space_id = src_space_id + 1;
   assert(space_id < last_space_id, "not enough spaces");
 
@@ -3187,14 +2695,14 @@
   do {
     MutableSpace* space = _space_info[space_id].space();
     HeapWord* const bottom = space->bottom();
-    const ChunkData* const bottom_cp = sd.addr_to_chunk_ptr(bottom);
+    const RegionData* const bottom_cp = sd.addr_to_region_ptr(bottom);
 
     // Iterate over the spaces that do not compact into themselves.
     if (bottom_cp->destination() != bottom) {
-      HeapWord* const top_aligned_up = sd.chunk_align_up(space->top());
-      const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up);
-
-      for (const ChunkData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
+      HeapWord* const top_aligned_up = sd.region_align_up(space->top());
+      const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
+
+      for (const RegionData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) {
         if (src_cp->live_obj_size() > 0) {
           // Found it.
           assert(src_cp->destination() == destination,
@@ -3204,9 +2712,9 @@
 
           src_space_id = SpaceId(space_id);
           src_space_top = space->top();
-          const size_t src_chunk_idx = sd.chunk(src_cp);
-          closure.set_source(sd.chunk_to_addr(src_chunk_idx));
-          return src_chunk_idx;
+          const size_t src_region_idx = sd.region(src_cp);
+          closure.set_source(sd.region_to_addr(src_region_idx));
+          return src_region_idx;
         } else {
           assert(src_cp->data_size() == 0, "sanity");
         }
@@ -3214,38 +2722,38 @@
     }
   } while (++space_id < last_space_id);
 
-  assert(false, "no source chunk was found");
+  assert(false, "no source region was found");
   return 0;
 }
 
-void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx)
+void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
 {
   typedef ParMarkBitMap::IterationStatus IterationStatus;
-  const size_t ChunkSize = ParallelCompactData::ChunkSize;
+  const size_t RegionSize = ParallelCompactData::RegionSize;
   ParMarkBitMap* const bitmap = mark_bitmap();
   ParallelCompactData& sd = summary_data();
-  ChunkData* const chunk_ptr = sd.chunk(chunk_idx);
+  RegionData* const region_ptr = sd.region(region_idx);
 
   // Get the items needed to construct the closure.
-  HeapWord* dest_addr = sd.chunk_to_addr(chunk_idx);
+  HeapWord* dest_addr = sd.region_to_addr(region_idx);
   SpaceId dest_space_id = space_id(dest_addr);
   ObjectStartArray* start_array = _space_info[dest_space_id].start_array();
   HeapWord* new_top = _space_info[dest_space_id].new_top();
   assert(dest_addr < new_top, "sanity");
-  const size_t words = MIN2(pointer_delta(new_top, dest_addr), ChunkSize);
-
-  // Get the source chunk and related info.
-  size_t src_chunk_idx = chunk_ptr->source_chunk();
-  SpaceId src_space_id = space_id(sd.chunk_to_addr(src_chunk_idx));
+  const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize);
+
+  // Get the source region and related info.
+  size_t src_region_idx = region_ptr->source_region();
+  SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx));
   HeapWord* src_space_top = _space_info[src_space_id].space()->top();
 
   MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words);
-  closure.set_source(first_src_addr(dest_addr, src_chunk_idx));
-
-  // Adjust src_chunk_idx to prepare for decrementing destination counts (the
-  // destination count is not decremented when a chunk is copied to itself).
-  if (src_chunk_idx == chunk_idx) {
-    src_chunk_idx += 1;
+  closure.set_source(first_src_addr(dest_addr, src_region_idx));
+
+  // Adjust src_region_idx to prepare for decrementing destination counts (the
+  // destination count is not decremented when a region is copied to itself).
+  if (src_region_idx == region_idx) {
+    src_region_idx += 1;
   }
 
   if (bitmap->is_unmarked(closure.source())) {
@@ -3255,32 +2763,33 @@
     HeapWord* const old_src_addr = closure.source();
     closure.copy_partial_obj();
     if (closure.is_full()) {
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_deferred_obj_addr(NULL);
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_deferred_obj_addr(NULL);
+      region_ptr->set_completed();
       return;
     }
 
-    HeapWord* const end_addr = sd.chunk_align_down(closure.source());
-    if (sd.chunk_align_down(old_src_addr) != end_addr) {
-      // The partial object was copied from more than one source chunk.
-      decrement_destination_counts(cm, src_chunk_idx, end_addr);
-
-      // Move to the next source chunk, possibly switching spaces as well.  All
+    HeapWord* const end_addr = sd.region_align_down(closure.source());
+    if (sd.region_align_down(old_src_addr) != end_addr) {
+      // The partial object was copied from more than one source region.
+      decrement_destination_counts(cm, src_region_idx, end_addr);
+
+      // Move to the next source region, possibly switching spaces as well.  All
       // args except end_addr may be modified.
-      src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
-                                     end_addr);
+      src_region_idx = next_src_region(closure, src_space_id, src_space_top,
+                                       end_addr);
     }
   }
 
   do {
     HeapWord* const cur_addr = closure.source();
-    HeapWord* const end_addr = MIN2(sd.chunk_align_up(cur_addr + 1),
+    HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1),
                                     src_space_top);
     IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr);
 
     if (status == ParMarkBitMap::incomplete) {
-      // The last obj that starts in the source chunk does not end in the chunk.
+      // The last obj that starts in the source region does not end in the
+      // region.
       assert(closure.source() < end_addr, "sanity")
       HeapWord* const obj_beg = closure.source();
       HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(),
@@ -3299,28 +2808,28 @@
 
     if (status == ParMarkBitMap::would_overflow) {
       // The last object did not fit.  Note that interior oop updates were
-      // deferred, then copy enough of the object to fill the chunk.
-      chunk_ptr->set_deferred_obj_addr(closure.destination());
+      // deferred, then copy enough of the object to fill the region.
+      region_ptr->set_deferred_obj_addr(closure.destination());
       status = closure.copy_until_full(); // copies from closure.source()
 
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_completed();
       return;
     }
 
     if (status == ParMarkBitMap::full) {
-      decrement_destination_counts(cm, src_chunk_idx, closure.source());
-      chunk_ptr->set_deferred_obj_addr(NULL);
-      chunk_ptr->set_completed();
+      decrement_destination_counts(cm, src_region_idx, closure.source());
+      region_ptr->set_deferred_obj_addr(NULL);
+      region_ptr->set_completed();
       return;
     }
 
-    decrement_destination_counts(cm, src_chunk_idx, end_addr);
-
-    // Move to the next source chunk, possibly switching spaces as well.  All
+    decrement_destination_counts(cm, src_region_idx, end_addr);
+
+    // Move to the next source region, possibly switching spaces as well.  All
     // args except end_addr may be modified.
-    src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top,
-                                   end_addr);
+    src_region_idx = next_src_region(closure, src_space_id, src_space_top,
+                                     end_addr);
   } while (true);
 }
 
@@ -3352,15 +2861,15 @@
   }
 #endif
 
-  const size_t beg_chunk = sd.addr_to_chunk_idx(beg_addr);
-  const size_t dp_chunk = sd.addr_to_chunk_idx(dp_addr);
-  if (beg_chunk < dp_chunk) {
-    update_and_deadwood_in_dense_prefix(cm, space_id, beg_chunk, dp_chunk);
+  const size_t beg_region = sd.addr_to_region_idx(beg_addr);
+  const size_t dp_region = sd.addr_to_region_idx(dp_addr);
+  if (beg_region < dp_region) {
+    update_and_deadwood_in_dense_prefix(cm, space_id, beg_region, dp_region);
   }
 
-  // The destination of the first live object that starts in the chunk is one
-  // past the end of the partial object entering the chunk (if any).
-  HeapWord* const dest_addr = sd.partial_obj_end(dp_chunk);
+  // The destination of the first live object that starts in the region is one
+  // past the end of the partial object entering the region (if any).
+  HeapWord* const dest_addr = sd.partial_obj_end(dp_region);
   HeapWord* const new_top = _space_info[space_id].new_top();
   assert(new_top >= dest_addr, "bad new_top value");
   const size_t words = pointer_delta(new_top, dest_addr);
@@ -3469,172 +2978,6 @@
   return ParMarkBitMap::incomplete;
 }
 
-BitBlockUpdateClosure::BitBlockUpdateClosure(ParMarkBitMap* mbm,
-                        ParCompactionManager* cm,
-                        size_t chunk_index) :
-                        ParMarkBitMapClosure(mbm, cm),
-                        _live_data_left(0),
-                        _cur_block(0) {
-  _chunk_start =
-    PSParallelCompact::summary_data().chunk_to_addr(chunk_index);
-  _chunk_end =
-    PSParallelCompact::summary_data().chunk_to_addr(chunk_index) +
-                 ParallelCompactData::ChunkSize;
-  _chunk_index = chunk_index;
-  _cur_block =
-    PSParallelCompact::summary_data().addr_to_block_idx(_chunk_start);
-}
-
-bool BitBlockUpdateClosure::chunk_contains_cur_block() {
-  return ParallelCompactData::chunk_contains_block(_chunk_index, _cur_block);
-}
-
-void BitBlockUpdateClosure::reset_chunk(size_t chunk_index) {
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(7);)
-  ParallelCompactData& sd = PSParallelCompact::summary_data();
-  _chunk_index = chunk_index;
-  _live_data_left = 0;
-  _chunk_start = sd.chunk_to_addr(chunk_index);
-  _chunk_end = sd.chunk_to_addr(chunk_index) + ParallelCompactData::ChunkSize;
-
-  // The first block in this chunk
-  size_t first_block =  sd.addr_to_block_idx(_chunk_start);
-  size_t partial_live_size = sd.chunk(chunk_index)->partial_obj_size();
-
-  // Set the offset to 0. By definition it should have that value
-  // but it may have been written while processing an earlier chunk.
-  if (partial_live_size == 0) {
-    // No live object extends onto the chunk.  The first bit
-    // in the bit map for the first chunk must be a start bit.
-    // Although there may not be any marked bits, it is safe
-    // to set it as a start bit.
-    sd.block(first_block)->set_start_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(true);
-  } else if (sd.partial_obj_ends_in_block(first_block)) {
-    sd.block(first_block)->set_end_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(false);
-  } else {
-    // The partial object extends beyond the first block.
-    // There is no object starting in the first block
-    // so the offset and bit parity are not needed.
-    // Set the the bit parity to start bit so assertions
-    // work when not bit is found.
-    sd.block(first_block)->set_end_bit_offset(0);
-    sd.block(first_block)->set_first_is_start_bit(false);
-  }
-  _cur_block = first_block;
-#ifdef ASSERT
-  if (sd.block(first_block)->first_is_start_bit()) {
-    assert(!sd.partial_obj_ends_in_block(first_block),
-      "Partial object cannot end in first block");
-  }
-
-  if (PrintGCDetails && Verbose) {
-    if (partial_live_size == 1) {
-    gclog_or_tty->print_cr("first_block " PTR_FORMAT
-      " _offset " PTR_FORMAT
-      " _first_is_start_bit %d",
-      first_block,
-      sd.block(first_block)->raw_offset(),
-      sd.block(first_block)->first_is_start_bit());
-    }
-  }
-#endif
-  DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(17);)
-}
-
-// This method is called when a object has been found (both beginning
-// and end of the object) in the range of iteration.  This method is
-// calculating the words of live data to the left of a block.  That live
-// data includes any object starting to the left of the block (i.e.,
-// the live-data-to-the-left of block AAA will include the full size
-// of any object entering AAA).
-
-ParMarkBitMapClosure::IterationStatus
-BitBlockUpdateClosure::do_addr(HeapWord* addr, size_t words) {
-  // add the size to the block data.
-  HeapWord* obj = addr;
-  ParallelCompactData& sd = PSParallelCompact::summary_data();
-
-  assert(bitmap()->obj_size(obj) == words, "bad size");
-  assert(_chunk_start <= obj, "object is not in chunk");
-  assert(obj + words <= _chunk_end, "object is not in chunk");
-
-  // Update the live data to the left
-  size_t prev_live_data_left = _live_data_left;
-  _live_data_left = _live_data_left + words;
-
-  // Is this object in the current block.
-  size_t block_of_obj = sd.addr_to_block_idx(obj);
-  size_t block_of_obj_last = sd.addr_to_block_idx(obj + words - 1);
-  HeapWord* block_of_obj_last_addr = sd.block_to_addr(block_of_obj_last);
-  if (_cur_block < block_of_obj) {
-
-    //
-    // No object crossed the block boundary and this object was found
-    // on the other side of the block boundary.  Update the offset for
-    // the new block with the data size that does not include this object.
-    //
-    // The first bit in block_of_obj is a start bit except in the
-    // case where the partial object for the chunk extends into
-    // this block.
-    if (sd.partial_obj_ends_in_block(block_of_obj)) {
-      sd.block(block_of_obj)->set_end_bit_offset(prev_live_data_left);
-    } else {
-      sd.block(block_of_obj)->set_start_bit_offset(prev_live_data_left);
-    }
-
-    // Does this object pass beyond the its block?
-    if (block_of_obj < block_of_obj_last) {
-      // Object crosses block boundary.  Two blocks need to be udpated:
-      //        the current block where the object started
-      //        the block where the object ends
-      //
-      // The offset for blocks with no objects starting in them
-      // (e.g., blocks between _cur_block and  block_of_obj_last)
-      // should not be needed.
-      // Note that block_of_obj_last may be in another chunk.  If so,
-      // it should be overwritten later.  This is a problem (writting
-      // into a block in a later chunk) for parallel execution.
-      assert(obj < block_of_obj_last_addr,
-        "Object should start in previous block");
-
-      // obj is crossing into block_of_obj_last so the first bit
-      // is and end bit.
-      sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
-
-      _cur_block = block_of_obj_last;
-    } else {
-      // _first_is_start_bit has already been set correctly
-      // in the if-then-else above so don't reset it here.
-      _cur_block = block_of_obj;
-    }
-  } else {
-    // The current block only changes if the object extends beyound
-    // the block it starts in.
-    //
-    // The object starts in the current block.
-    // Does this object pass beyond the end of it?
-    if (block_of_obj < block_of_obj_last) {
-      // Object crosses block boundary.
-      // See note above on possible blocks between block_of_obj and
-      // block_of_obj_last
-      assert(obj < block_of_obj_last_addr,
-        "Object should start in previous block");
-
-      sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left);
-
-      _cur_block = block_of_obj_last;
-    }
-  }
-
-  // Return incomplete if there are more blocks to be done.
-  if (chunk_contains_cur_block()) {
-    return ParMarkBitMap::incomplete;
-  }
-  return ParMarkBitMap::complete;
-}
-
 // Verify the new location using the forwarding pointer
 // from MarkSweep::mark_sweep_phase2().  Set the mark_word
 // to the initial value.
@@ -3707,12 +3050,3 @@
       return last_space_id;
   }
 }
-
-// Here temporarily for debugging
-#ifdef ASSERT
-  size_t ParallelCompactData::block_idx(BlockData* block) {
-    size_t index = pointer_delta(block,
-      PSParallelCompact::summary_data()._block_data, sizeof(BlockData));
-    return index;
-  }
-#endif
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -76,87 +76,80 @@
 {
 public:
   // Sizes are in HeapWords, unless indicated otherwise.
-  static const size_t Log2ChunkSize;
-  static const size_t ChunkSize;
-  static const size_t ChunkSizeBytes;
+  static const size_t Log2RegionSize;
+  static const size_t RegionSize;
+  static const size_t RegionSizeBytes;
 
-  // Mask for the bits in a size_t to get an offset within a chunk.
-  static const size_t ChunkSizeOffsetMask;
-  // Mask for the bits in a pointer to get an offset within a chunk.
-  static const size_t ChunkAddrOffsetMask;
-  // Mask for the bits in a pointer to get the address of the start of a chunk.
-  static const size_t ChunkAddrMask;
+  // Mask for the bits in a size_t to get an offset within a region.
+  static const size_t RegionSizeOffsetMask;
+  // Mask for the bits in a pointer to get an offset within a region.
+  static const size_t RegionAddrOffsetMask;
+  // Mask for the bits in a pointer to get the address of the start of a region.
+  static const size_t RegionAddrMask;
 
-  static const size_t Log2BlockSize;
-  static const size_t BlockSize;
-  static const size_t BlockOffsetMask;
-  static const size_t BlockMask;
-
-  static const size_t BlocksPerChunk;
-
-  class ChunkData
+  class RegionData
   {
   public:
-    // Destination address of the chunk.
+    // Destination address of the region.
     HeapWord* destination() const { return _destination; }
 
-    // The first chunk containing data destined for this chunk.
-    size_t source_chunk() const { return _source_chunk; }
+    // The first region containing data destined for this region.
+    size_t source_region() const { return _source_region; }
 
-    // The object (if any) starting in this chunk and ending in a different
-    // chunk that could not be updated during the main (parallel) compaction
+    // The object (if any) starting in this region and ending in a different
+    // region that could not be updated during the main (parallel) compaction
     // phase.  This is different from _partial_obj_addr, which is an object that
-    // extends onto a source chunk.  However, the two uses do not overlap in
+    // extends onto a source region.  However, the two uses do not overlap in
     // time, so the same field is used to save space.
     HeapWord* deferred_obj_addr() const { return _partial_obj_addr; }
 
-    // The starting address of the partial object extending onto the chunk.
+    // The starting address of the partial object extending onto the region.
     HeapWord* partial_obj_addr() const { return _partial_obj_addr; }
 
-    // Size of the partial object extending onto the chunk (words).
+    // Size of the partial object extending onto the region (words).
     size_t partial_obj_size() const { return _partial_obj_size; }
 
-    // Size of live data that lies within this chunk due to objects that start
-    // in this chunk (words).  This does not include the partial object
-    // extending onto the chunk (if any), or the part of an object that extends
-    // onto the next chunk (if any).
+    // Size of live data that lies within this region due to objects that start
+    // in this region (words).  This does not include the partial object
+    // extending onto the region (if any), or the part of an object that extends
+    // onto the next region (if any).
     size_t live_obj_size() const { return _dc_and_los & los_mask; }
 
-    // Total live data that lies within the chunk (words).
+    // Total live data that lies within the region (words).
     size_t data_size() const { return partial_obj_size() + live_obj_size(); }
 
-    // The destination_count is the number of other chunks to which data from
-    // this chunk will be copied.  At the end of the summary phase, the valid
+    // The destination_count is the number of other regions to which data from
+    // this region will be copied.  At the end of the summary phase, the valid
     // values of destination_count are
     //
-    // 0 - data from the chunk will be compacted completely into itself, or the
-    //     chunk is empty.  The chunk can be claimed and then filled.
-    // 1 - data from the chunk will be compacted into 1 other chunk; some
-    //     data from the chunk may also be compacted into the chunk itself.
-    // 2 - data from the chunk will be copied to 2 other chunks.
+    // 0 - data from the region will be compacted completely into itself, or the
+    //     region is empty.  The region can be claimed and then filled.
+    // 1 - data from the region will be compacted into 1 other region; some
+    //     data from the region may also be compacted into the region itself.
+    // 2 - data from the region will be copied to 2 other regions.
     //
-    // During compaction as chunks are emptied, the destination_count is
+    // During compaction as regions are emptied, the destination_count is
     // decremented (atomically) and when it reaches 0, it can be claimed and
     // then filled.
     //
-    // A chunk is claimed for processing by atomically changing the
-    // destination_count to the claimed value (dc_claimed).  After a chunk has
+    // A region is claimed for processing by atomically changing the
+    // destination_count to the claimed value (dc_claimed).  After a region has
     // been filled, the destination_count should be set to the completed value
     // (dc_completed).
     inline uint destination_count() const;
     inline uint destination_count_raw() const;
 
-    // The location of the java heap data that corresponds to this chunk.
+    // The location of the java heap data that corresponds to this region.
     inline HeapWord* data_location() const;
 
-    // The highest address referenced by objects in this chunk.
+    // The highest address referenced by objects in this region.
     inline HeapWord* highest_ref() const;
 
-    // Whether this chunk is available to be claimed, has been claimed, or has
+    // Whether this region is available to be claimed, has been claimed, or has
     // been completed.
     //
-    // Minor subtlety:  claimed() returns true if the chunk is marked
-    // completed(), which is desirable since a chunk must be claimed before it
+    // Minor subtlety:  claimed() returns true if the region is marked
+    // completed(), which is desirable since a region must be claimed before it
     // can be completed.
     bool available() const { return _dc_and_los < dc_one; }
     bool claimed() const   { return _dc_and_los >= dc_claimed; }
@@ -164,11 +157,11 @@
 
     // These are not atomic.
     void set_destination(HeapWord* addr)       { _destination = addr; }
-    void set_source_chunk(size_t chunk)        { _source_chunk = chunk; }
+    void set_source_region(size_t region)      { _source_region = region; }
     void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
     void set_partial_obj_addr(HeapWord* addr)  { _partial_obj_addr = addr; }
     void set_partial_obj_size(size_t words)    {
-      _partial_obj_size = (chunk_sz_t) words;
+      _partial_obj_size = (region_sz_t) words;
     }
 
     inline void set_destination_count(uint count);
@@ -184,129 +177,57 @@
     inline bool claim();
 
   private:
-    // The type used to represent object sizes within a chunk.
-    typedef uint chunk_sz_t;
+    // The type used to represent object sizes within a region.
+    typedef uint region_sz_t;
 
     // Constants for manipulating the _dc_and_los field, which holds both the
     // destination count and live obj size.  The live obj size lives at the
     // least significant end so no masking is necessary when adding.
-    static const chunk_sz_t dc_shift;           // Shift amount.
-    static const chunk_sz_t dc_mask;            // Mask for destination count.
-    static const chunk_sz_t dc_one;             // 1, shifted appropriately.
-    static const chunk_sz_t dc_claimed;         // Chunk has been claimed.
-    static const chunk_sz_t dc_completed;       // Chunk has been completed.
-    static const chunk_sz_t los_mask;           // Mask for live obj size.
+    static const region_sz_t dc_shift;           // Shift amount.
+    static const region_sz_t dc_mask;            // Mask for destination count.
+    static const region_sz_t dc_one;             // 1, shifted appropriately.
+    static const region_sz_t dc_claimed;         // Region has been claimed.
+    static const region_sz_t dc_completed;       // Region has been completed.
+    static const region_sz_t los_mask;           // Mask for live obj size.
 
-    HeapWord*           _destination;
-    size_t              _source_chunk;
-    HeapWord*           _partial_obj_addr;
-    chunk_sz_t          _partial_obj_size;
-    chunk_sz_t volatile _dc_and_los;
+    HeapWord*            _destination;
+    size_t               _source_region;
+    HeapWord*            _partial_obj_addr;
+    region_sz_t          _partial_obj_size;
+    region_sz_t volatile _dc_and_los;
 #ifdef ASSERT
     // These enable optimizations that are only partially implemented.  Use
     // debug builds to prevent the code fragments from breaking.
-    HeapWord*           _data_location;
-    HeapWord*           _highest_ref;
+    HeapWord*            _data_location;
+    HeapWord*            _highest_ref;
 #endif  // #ifdef ASSERT
 
 #ifdef ASSERT
    public:
-    uint            _pushed;    // 0 until chunk is pushed onto a worker's stack
+    uint            _pushed;   // 0 until region is pushed onto a worker's stack
    private:
 #endif
   };
 
-  // 'Blocks' allow shorter sections of the bitmap to be searched.  Each Block
-  // holds an offset, which is the amount of live data in the Chunk to the left
-  // of the first live object in the Block.  This amount of live data will
-  // include any object extending into the block. The first block in
-  // a chunk does not include any partial object extending into the
-  // the chunk.
-  //
-  // The offset also encodes the
-  // 'parity' of the first 1 bit in the Block:  a positive offset means the
-  // first 1 bit marks the start of an object, a negative offset means the first
-  // 1 bit marks the end of an object.
-  class BlockData
-  {
-   public:
-    typedef short int blk_ofs_t;
-
-    blk_ofs_t offset() const { return _offset >= 0 ? _offset : -_offset; }
-    blk_ofs_t raw_offset() const { return _offset; }
-    void set_first_is_start_bit(bool v) { _first_is_start_bit = v; }
-
-#if 0
-    // The need for this method was anticipated but it is
-    // never actually used.  Do not include it for now.  If
-    // it is needed, consider the problem of what is passed
-    // as "v".  To avoid warning errors the method set_start_bit_offset()
-    // was changed to take a size_t as the parameter and to do the
-    // check for the possible overflow.  Doing the cast in these
-    // methods better limits the potential problems because of
-    // the size of the field to this class.
-    void set_raw_offset(blk_ofs_t v) { _offset = v; }
-#endif
-    void set_start_bit_offset(size_t val) {
-      assert(val >= 0, "sanity");
-      _offset = (blk_ofs_t) val;
-      assert(val == (size_t) _offset, "Value is too large");
-      _first_is_start_bit = true;
-    }
-    void set_end_bit_offset(size_t val) {
-      assert(val >= 0, "sanity");
-      _offset = (blk_ofs_t) val;
-      assert(val == (size_t) _offset, "Value is too large");
-      _offset = - _offset;
-      _first_is_start_bit = false;
-    }
-    bool first_is_start_bit() {
-      assert(_set_phase > 0, "Not initialized");
-      return _first_is_start_bit;
-    }
-    bool first_is_end_bit() {
-      assert(_set_phase > 0, "Not initialized");
-      return !_first_is_start_bit;
-    }
-
-   private:
-    blk_ofs_t _offset;
-    // This is temporary until the mark_bitmap is separated into
-    // a start bit array and an end bit array.
-    bool      _first_is_start_bit;
-#ifdef ASSERT
-    short     _set_phase;
-    static short _cur_phase;
-   public:
-    static void set_cur_phase(short v) { _cur_phase = v; }
-#endif
-  };
-
 public:
   ParallelCompactData();
   bool initialize(MemRegion covered_region);
 
-  size_t chunk_count() const { return _chunk_count; }
-
-  // Convert chunk indices to/from ChunkData pointers.
-  inline ChunkData* chunk(size_t chunk_idx) const;
-  inline size_t     chunk(const ChunkData* const chunk_ptr) const;
+  size_t region_count() const { return _region_count; }
 
-  // Returns true if the given address is contained within the chunk
-  bool chunk_contains(size_t chunk_index, HeapWord* addr);
+  // Convert region indices to/from RegionData pointers.
+  inline RegionData* region(size_t region_idx) const;
+  inline size_t     region(const RegionData* const region_ptr) const;
 
-  size_t block_count() const { return _block_count; }
-  inline BlockData* block(size_t n) const;
-
-  // Returns true if the given block is in the given chunk.
-  static bool chunk_contains_block(size_t chunk_index, size_t block_index);
+  // Returns true if the given address is contained within the region
+  bool region_contains(size_t region_index, HeapWord* addr);
 
   void add_obj(HeapWord* addr, size_t len);
   void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
 
-  // Fill in the chunks covering [beg, end) so that no data moves; i.e., the
-  // destination of chunk n is simply the start of chunk n.  The argument beg
-  // must be chunk-aligned; end need not be.
+  // Fill in the regions covering [beg, end) so that no data moves; i.e., the
+  // destination of region n is simply the start of region n.  The argument beg
+  // must be region-aligned; end need not be.
   void summarize_dense_prefix(HeapWord* beg, HeapWord* end);
 
   bool summarize(HeapWord* target_beg, HeapWord* target_end,
@@ -314,48 +235,33 @@
                  HeapWord** target_next, HeapWord** source_next = 0);
 
   void clear();
-  void clear_range(size_t beg_chunk, size_t end_chunk);
+  void clear_range(size_t beg_region, size_t end_region);
   void clear_range(HeapWord* beg, HeapWord* end) {
-    clear_range(addr_to_chunk_idx(beg), addr_to_chunk_idx(end));
+    clear_range(addr_to_region_idx(beg), addr_to_region_idx(end));
   }
 
-  // Return the number of words between addr and the start of the chunk
+  // Return the number of words between addr and the start of the region
   // containing addr.
-  inline size_t     chunk_offset(const HeapWord* addr) const;
-
-  // Convert addresses to/from a chunk index or chunk pointer.
-  inline size_t     addr_to_chunk_idx(const HeapWord* addr) const;
-  inline ChunkData* addr_to_chunk_ptr(const HeapWord* addr) const;
-  inline HeapWord*  chunk_to_addr(size_t chunk) const;
-  inline HeapWord*  chunk_to_addr(size_t chunk, size_t offset) const;
-  inline HeapWord*  chunk_to_addr(const ChunkData* chunk) const;
+  inline size_t     region_offset(const HeapWord* addr) const;
 
-  inline HeapWord*  chunk_align_down(HeapWord* addr) const;
-  inline HeapWord*  chunk_align_up(HeapWord* addr) const;
-  inline bool       is_chunk_aligned(HeapWord* addr) const;
+  // Convert addresses to/from a region index or region pointer.
+  inline size_t     addr_to_region_idx(const HeapWord* addr) const;
+  inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
+  inline HeapWord*  region_to_addr(size_t region) const;
+  inline HeapWord*  region_to_addr(size_t region, size_t offset) const;
+  inline HeapWord*  region_to_addr(const RegionData* region) const;
 
-  // Analogous to chunk_offset() for blocks.
-  size_t     block_offset(const HeapWord* addr) const;
-  size_t     addr_to_block_idx(const HeapWord* addr) const;
-  size_t     addr_to_block_idx(const oop obj) const {
-    return addr_to_block_idx((HeapWord*) obj);
-  }
-  inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
-  inline HeapWord*  block_to_addr(size_t block) const;
+  inline HeapWord*  region_align_down(HeapWord* addr) const;
+  inline HeapWord*  region_align_up(HeapWord* addr) const;
+  inline bool       is_region_aligned(HeapWord* addr) const;
 
   // Return the address one past the end of the partial object.
-  HeapWord* partial_obj_end(size_t chunk_idx) const;
+  HeapWord* partial_obj_end(size_t region_idx) const;
 
   // Return the new location of the object p after the
   // the compaction.
   HeapWord* calc_new_pointer(HeapWord* addr);
 
-  // Same as calc_new_pointer() using blocks.
-  HeapWord* block_calc_new_pointer(HeapWord* addr);
-
-  // Same as calc_new_pointer() using chunks.
-  HeapWord* chunk_calc_new_pointer(HeapWord* addr);
-
   HeapWord* calc_new_pointer(oop p) {
     return calc_new_pointer((HeapWord*) p);
   }
@@ -363,22 +269,13 @@
   // Return the updated address for the given klass
   klassOop calc_new_klass(klassOop);
 
-  // Given a block returns true if the partial object for the
-  // corresponding chunk ends in the block.  Returns false, otherwise
-  // If there is no partial object, returns false.
-  bool partial_obj_ends_in_block(size_t block_index);
-
-  // Returns the block index for the block
-  static size_t block_idx(BlockData* block);
-
 #ifdef  ASSERT
   void verify_clear(const PSVirtualSpace* vspace);
   void verify_clear();
 #endif  // #ifdef ASSERT
 
 private:
-  bool initialize_block_data(size_t region_size);
-  bool initialize_chunk_data(size_t region_size);
+  bool initialize_region_data(size_t region_size);
   PSVirtualSpace* create_vspace(size_t count, size_t element_size);
 
 private:
@@ -387,74 +284,70 @@
   HeapWord*       _region_end;
 #endif  // #ifdef ASSERT
 
-  PSVirtualSpace* _chunk_vspace;
-  ChunkData*      _chunk_data;
-  size_t          _chunk_count;
-
-  PSVirtualSpace* _block_vspace;
-  BlockData*      _block_data;
-  size_t          _block_count;
+  PSVirtualSpace* _region_vspace;
+  RegionData*     _region_data;
+  size_t          _region_count;
 };
 
 inline uint
-ParallelCompactData::ChunkData::destination_count_raw() const
+ParallelCompactData::RegionData::destination_count_raw() const
 {
   return _dc_and_los & dc_mask;
 }
 
 inline uint
-ParallelCompactData::ChunkData::destination_count() const
+ParallelCompactData::RegionData::destination_count() const
 {
   return destination_count_raw() >> dc_shift;
 }
 
 inline void
-ParallelCompactData::ChunkData::set_destination_count(uint count)
+ParallelCompactData::RegionData::set_destination_count(uint count)
 {
   assert(count <= (dc_completed >> dc_shift), "count too large");
-  const chunk_sz_t live_sz = (chunk_sz_t) live_obj_size();
+  const region_sz_t live_sz = (region_sz_t) live_obj_size();
   _dc_and_los = (count << dc_shift) | live_sz;
 }
 
-inline void ParallelCompactData::ChunkData::set_live_obj_size(size_t words)
+inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
 {
   assert(words <= los_mask, "would overflow");
-  _dc_and_los = destination_count_raw() | (chunk_sz_t)words;
+  _dc_and_los = destination_count_raw() | (region_sz_t)words;
 }
 
-inline void ParallelCompactData::ChunkData::decrement_destination_count()
+inline void ParallelCompactData::RegionData::decrement_destination_count()
 {
   assert(_dc_and_los < dc_claimed, "already claimed");
   assert(_dc_and_los >= dc_one, "count would go negative");
   Atomic::add((int)dc_mask, (volatile int*)&_dc_and_los);
 }
 
-inline HeapWord* ParallelCompactData::ChunkData::data_location() const
+inline HeapWord* ParallelCompactData::RegionData::data_location() const
 {
   DEBUG_ONLY(return _data_location;)
   NOT_DEBUG(return NULL;)
 }
 
-inline HeapWord* ParallelCompactData::ChunkData::highest_ref() const
+inline HeapWord* ParallelCompactData::RegionData::highest_ref() const
 {
   DEBUG_ONLY(return _highest_ref;)
   NOT_DEBUG(return NULL;)
 }
 
-inline void ParallelCompactData::ChunkData::set_data_location(HeapWord* addr)
+inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr)
 {
   DEBUG_ONLY(_data_location = addr;)
 }
 
-inline void ParallelCompactData::ChunkData::set_completed()
+inline void ParallelCompactData::RegionData::set_completed()
 {
   assert(claimed(), "must be claimed first");
-  _dc_and_los = dc_completed | (chunk_sz_t) live_obj_size();
+  _dc_and_los = dc_completed | (region_sz_t) live_obj_size();
 }
 
-// MT-unsafe claiming of a chunk.  Should only be used during single threaded
+// MT-unsafe claiming of a region.  Should only be used during single threaded
 // execution.
-inline bool ParallelCompactData::ChunkData::claim_unsafe()
+inline bool ParallelCompactData::RegionData::claim_unsafe()
 {
   if (available()) {
     _dc_and_los |= dc_claimed;
@@ -463,13 +356,13 @@
   return false;
 }
 
-inline void ParallelCompactData::ChunkData::add_live_obj(size_t words)
+inline void ParallelCompactData::RegionData::add_live_obj(size_t words)
 {
   assert(words <= (size_t)los_mask - live_obj_size(), "overflow");
   Atomic::add((int) words, (volatile int*) &_dc_and_los);
 }
 
-inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr)
+inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr)
 {
 #ifdef ASSERT
   HeapWord* tmp = _highest_ref;
@@ -479,7 +372,7 @@
 #endif  // #ifdef ASSERT
 }
 
-inline bool ParallelCompactData::ChunkData::claim()
+inline bool ParallelCompactData::RegionData::claim()
 {
   const int los = (int) live_obj_size();
   const int old = Atomic::cmpxchg(dc_claimed | los,
@@ -487,119 +380,85 @@
   return old == los;
 }
 
-inline ParallelCompactData::ChunkData*
-ParallelCompactData::chunk(size_t chunk_idx) const
+inline ParallelCompactData::RegionData*
+ParallelCompactData::region(size_t region_idx) const
 {
-  assert(chunk_idx <= chunk_count(), "bad arg");
-  return _chunk_data + chunk_idx;
+  assert(region_idx <= region_count(), "bad arg");
+  return _region_data + region_idx;
 }
 
 inline size_t
-ParallelCompactData::chunk(const ChunkData* const chunk_ptr) const
+ParallelCompactData::region(const RegionData* const region_ptr) const
 {
-  assert(chunk_ptr >= _chunk_data, "bad arg");
-  assert(chunk_ptr <= _chunk_data + chunk_count(), "bad arg");
-  return pointer_delta(chunk_ptr, _chunk_data, sizeof(ChunkData));
-}
-
-inline ParallelCompactData::BlockData*
-ParallelCompactData::block(size_t n) const {
-  assert(n < block_count(), "bad arg");
-  return _block_data + n;
+  assert(region_ptr >= _region_data, "bad arg");
+  assert(region_ptr <= _region_data + region_count(), "bad arg");
+  return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
 }
 
 inline size_t
-ParallelCompactData::chunk_offset(const HeapWord* addr) const
+ParallelCompactData::region_offset(const HeapWord* addr) const
 {
   assert(addr >= _region_start, "bad addr");
   assert(addr <= _region_end, "bad addr");
-  return (size_t(addr) & ChunkAddrOffsetMask) >> LogHeapWordSize;
+  return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
 }
 
 inline size_t
-ParallelCompactData::addr_to_chunk_idx(const HeapWord* addr) const
+ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
 {
   assert(addr >= _region_start, "bad addr");
   assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) >> Log2ChunkSize;
-}
-
-inline ParallelCompactData::ChunkData*
-ParallelCompactData::addr_to_chunk_ptr(const HeapWord* addr) const
-{
-  return chunk(addr_to_chunk_idx(addr));
+  return pointer_delta(addr, _region_start) >> Log2RegionSize;
 }
 
-inline HeapWord*
-ParallelCompactData::chunk_to_addr(size_t chunk) const
+inline ParallelCompactData::RegionData*
+ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const
 {
-  assert(chunk <= _chunk_count, "chunk out of range");
-  return _region_start + (chunk << Log2ChunkSize);
-}
-
-inline HeapWord*
-ParallelCompactData::chunk_to_addr(const ChunkData* chunk) const
-{
-  return chunk_to_addr(pointer_delta(chunk, _chunk_data, sizeof(ChunkData)));
+  return region(addr_to_region_idx(addr));
 }
 
 inline HeapWord*
-ParallelCompactData::chunk_to_addr(size_t chunk, size_t offset) const
+ParallelCompactData::region_to_addr(size_t region) const
 {
-  assert(chunk <= _chunk_count, "chunk out of range");
-  assert(offset < ChunkSize, "offset too big");  // This may be too strict.
-  return chunk_to_addr(chunk) + offset;
+  assert(region <= _region_count, "region out of range");
+  return _region_start + (region << Log2RegionSize);
+}
+
+inline HeapWord*
+ParallelCompactData::region_to_addr(const RegionData* region) const
+{
+  return region_to_addr(pointer_delta(region, _region_data,
+                                      sizeof(RegionData)));
 }
 
 inline HeapWord*
-ParallelCompactData::chunk_align_down(HeapWord* addr) const
+ParallelCompactData::region_to_addr(size_t region, size_t offset) const
 {
-  assert(addr >= _region_start, "bad addr");
-  assert(addr < _region_end + ChunkSize, "bad addr");
-  return (HeapWord*)(size_t(addr) & ChunkAddrMask);
+  assert(region <= _region_count, "region out of range");
+  assert(offset < RegionSize, "offset too big");  // This may be too strict.
+  return region_to_addr(region) + offset;
 }
 
 inline HeapWord*
-ParallelCompactData::chunk_align_up(HeapWord* addr) const
+ParallelCompactData::region_align_down(HeapWord* addr) const
+{
+  assert(addr >= _region_start, "bad addr");
+  assert(addr < _region_end + RegionSize, "bad addr");
+  return (HeapWord*)(size_t(addr) & RegionAddrMask);
+}
+
+inline HeapWord*
+ParallelCompactData::region_align_up(HeapWord* addr) const
 {
   assert(addr >= _region_start, "bad addr");
   assert(addr <= _region_end, "bad addr");
-  return chunk_align_down(addr + ChunkSizeOffsetMask);
+  return region_align_down(addr + RegionSizeOffsetMask);
 }
 
 inline bool
-ParallelCompactData::is_chunk_aligned(HeapWord* addr) const
-{
-  return chunk_offset(addr) == 0;
-}
-
-inline size_t
-ParallelCompactData::block_offset(const HeapWord* addr) const
-{
-  assert(addr >= _region_start, "bad addr");
-  assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) & BlockOffsetMask;
-}
-
-inline size_t
-ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
+ParallelCompactData::is_region_aligned(HeapWord* addr) const
 {
-  assert(addr >= _region_start, "bad addr");
-  assert(addr <= _region_end, "bad addr");
-  return pointer_delta(addr, _region_start) >> Log2BlockSize;
-}
-
-inline ParallelCompactData::BlockData*
-ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
-{
-  return block(addr_to_block_idx(addr));
-}
-
-inline HeapWord*
-ParallelCompactData::block_to_addr(size_t block) const
-{
-  assert(block < _block_count, "block out of range");
-  return _region_start + (block << Log2BlockSize);
+  return region_offset(addr) == 0;
 }
 
 // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
@@ -687,45 +546,15 @@
   _words_remaining -= words;
 }
 
-// Closure for updating the block data during the summary phase.
-class BitBlockUpdateClosure: public ParMarkBitMapClosure {
-  // ParallelCompactData::BlockData::blk_ofs_t _live_data_left;
-  size_t    _live_data_left;
-  size_t    _cur_block;
-  HeapWord* _chunk_start;
-  HeapWord* _chunk_end;
-  size_t    _chunk_index;
-
- public:
-  BitBlockUpdateClosure(ParMarkBitMap* mbm,
-                        ParCompactionManager* cm,
-                        size_t chunk_index);
-
-  size_t cur_block() { return _cur_block; }
-  size_t chunk_index() { return _chunk_index; }
-  size_t live_data_left() { return _live_data_left; }
-  // Returns true the first bit in the current block (cur_block) is
-  // a start bit.
-  // Returns true if the current block is within the chunk for the closure;
-  bool chunk_contains_cur_block();
-
-  // Set the chunk index and related chunk values for
-  // a new chunk.
-  void reset_chunk(size_t chunk_index);
-
-  virtual IterationStatus do_addr(HeapWord* addr, size_t words);
-};
-
-// The UseParallelOldGC collector is a stop-the-world garbage
-// collector that does parts of the collection using parallel threads.
-// The collection includes the tenured generation and the young
-// generation.  The permanent generation is collected at the same
-// time as the other two generations but the permanent generation
-// is collect by a single GC thread.  The permanent generation is
-// collected serially because of the requirement that during the
-// processing of a klass AAA, any objects reference by AAA must
-// already have been processed.  This requirement is enforced by
-// a left (lower address) to right (higher address) sliding compaction.
+// The UseParallelOldGC collector is a stop-the-world garbage collector that
+// does parts of the collection using parallel threads.  The collection includes
+// the tenured generation and the young generation.  The permanent generation is
+// collected at the same time as the other two generations but the permanent
+// generation is collect by a single GC thread.  The permanent generation is
+// collected serially because of the requirement that during the processing of a
+// klass AAA, any objects reference by AAA must already have been processed.
+// This requirement is enforced by a left (lower address) to right (higher
+// address) sliding compaction.
 //
 // There are four phases of the collection.
 //
@@ -740,81 +569,75 @@
 //      - move the objects to their destination
 //      - update some references and reinitialize some variables
 //
-// These three phases are invoked in PSParallelCompact::invoke_no_policy().
-// The marking phase is implemented in PSParallelCompact::marking_phase()
-// and does a complete marking of the heap.
-// The summary phase is implemented in PSParallelCompact::summary_phase().
-// The move and update phase is implemented in PSParallelCompact::compact().
+// These three phases are invoked in PSParallelCompact::invoke_no_policy().  The
+// marking phase is implemented in PSParallelCompact::marking_phase() and does a
+// complete marking of the heap.  The summary phase is implemented in
+// PSParallelCompact::summary_phase().  The move and update phase is implemented
+// in PSParallelCompact::compact().
 //
-// A space that is being collected is divided into chunks and with
-// each chunk is associated an object of type ParallelCompactData.
-// Each chunk is of a fixed size and typically will contain more than
-// 1 object and may have parts of objects at the front and back of the
-// chunk.
+// A space that is being collected is divided into regions and with each region
+// is associated an object of type ParallelCompactData.  Each region is of a
+// fixed size and typically will contain more than 1 object and may have parts
+// of objects at the front and back of the region.
 //
-// chunk            -----+---------------------+----------
+// region            -----+---------------------+----------
 // objects covered   [ AAA  )[ BBB )[ CCC   )[ DDD     )
 //
-// The marking phase does a complete marking of all live objects in the
-// heap.  The marking also compiles the size of the data for
-// all live objects covered by the chunk.  This size includes the
-// part of any live object spanning onto the chunk (part of AAA
-// if it is live) from the front, all live objects contained in the chunk
-// (BBB and/or CCC if they are live), and the part of any live objects
-// covered by the chunk that extends off the chunk (part of DDD if it is
-// live).  The marking phase uses multiple GC threads and marking is
-// done in a bit array of type ParMarkBitMap.  The marking of the
-// bit map is done atomically as is the accumulation of the size of the
-// live objects covered by a chunk.
+// The marking phase does a complete marking of all live objects in the heap.
+// The marking also compiles the size of the data for all live objects covered
+// by the region.  This size includes the part of any live object spanning onto
+// the region (part of AAA if it is live) from the front, all live objects
+// contained in the region (BBB and/or CCC if they are live), and the part of
+// any live objects covered by the region that extends off the region (part of
+// DDD if it is live).  The marking phase uses multiple GC threads and marking
+// is done in a bit array of type ParMarkBitMap.  The marking of the bit map is
+// done atomically as is the accumulation of the size of the live objects
+// covered by a region.
 //
-// The summary phase calculates the total live data to the left of
-// each chunk XXX.  Based on that total and the bottom of the space,
-// it can calculate the starting location of the live data in XXX.
-// The summary phase calculates for each chunk XXX quantites such as
+// The summary phase calculates the total live data to the left of each region
+// XXX.  Based on that total and the bottom of the space, it can calculate the
+// starting location of the live data in XXX.  The summary phase calculates for
+// each region XXX quantites such as
 //
-//      - the amount of live data at the beginning of a chunk from an object
-//      entering the chunk.
-//      - the location of the first live data on the chunk
-//      - a count of the number of chunks receiving live data from XXX.
+//      - the amount of live data at the beginning of a region from an object
+//        entering the region.
+//      - the location of the first live data on the region
+//      - a count of the number of regions receiving live data from XXX.
 //
 // See ParallelCompactData for precise details.  The summary phase also
-// calculates the dense prefix for the compaction.  The dense prefix
-// is a portion at the beginning of the space that is not moved.  The
-// objects in the dense prefix do need to have their object references
-// updated.  See method summarize_dense_prefix().
+// calculates the dense prefix for the compaction.  The dense prefix is a
+// portion at the beginning of the space that is not moved.  The objects in the
+// dense prefix do need to have their object references updated.  See method
+// summarize_dense_prefix().
 //
 // The summary phase is done using 1 GC thread.
 //
-// The compaction phase moves objects to their new location and updates
-// all references in the object.
+// The compaction phase moves objects to their new location and updates all
+// references in the object.
 //
-// A current exception is that objects that cross a chunk boundary
-// are moved but do not have their references updated.  References are
-// not updated because it cannot easily be determined if the klass
-// pointer KKK for the object AAA has been updated.  KKK likely resides
-// in a chunk to the left of the chunk containing AAA.  These AAA's
-// have there references updated at the end in a clean up phase.
-// See the method PSParallelCompact::update_deferred_objects().  An
-// alternate strategy is being investigated for this deferral of updating.
+// A current exception is that objects that cross a region boundary are moved
+// but do not have their references updated.  References are not updated because
+// it cannot easily be determined if the klass pointer KKK for the object AAA
+// has been updated.  KKK likely resides in a region to the left of the region
+// containing AAA.  These AAA's have there references updated at the end in a
+// clean up phase.  See the method PSParallelCompact::update_deferred_objects().
+// An alternate strategy is being investigated for this deferral of updating.
 //
-// Compaction is done on a chunk basis.  A chunk that is ready to be
-// filled is put on a ready list and GC threads take chunk off the list
-// and fill them.  A chunk is ready to be filled if it
-// empty of live objects.  Such a chunk may have been initially
-// empty (only contained
-// dead objects) or may have had all its live objects copied out already.
-// A chunk that compacts into itself is also ready for filling.  The
-// ready list is initially filled with empty chunks and chunks compacting
-// into themselves.  There is always at least 1 chunk that can be put on
-// the ready list.  The chunks are atomically added and removed from
-// the ready list.
-//
+// Compaction is done on a region basis.  A region that is ready to be filled is
+// put on a ready list and GC threads take region off the list and fill them.  A
+// region is ready to be filled if it empty of live objects.  Such a region may
+// have been initially empty (only contained dead objects) or may have had all
+// its live objects copied out already.  A region that compacts into itself is
+// also ready for filling.  The ready list is initially filled with empty
+// regions and regions compacting into themselves.  There is always at least 1
+// region that can be put on the ready list.  The regions are atomically added
+// and removed from the ready list.
+
 class PSParallelCompact : AllStatic {
  public:
   // Convenient access to type names.
   typedef ParMarkBitMap::idx_t idx_t;
-  typedef ParallelCompactData::ChunkData ChunkData;
-  typedef ParallelCompactData::BlockData BlockData;
+  typedef ParallelCompactData::RegionData RegionData;
 
   typedef enum {
     perm_space_id, old_space_id, eden_space_id,
@@ -977,26 +800,26 @@
   // not reclaimed).
   static double dead_wood_limiter(double density, size_t min_percent);
 
-  // Find the first (left-most) chunk in the range [beg, end) that has at least
+  // Find the first (left-most) region in the range [beg, end) that has at least
   // dead_words of dead space to the left.  The argument beg must be the first
-  // chunk in the space that is not completely live.
-  static ChunkData* dead_wood_limit_chunk(const ChunkData* beg,
-                                          const ChunkData* end,
-                                          size_t dead_words);
+  // region in the space that is not completely live.
+  static RegionData* dead_wood_limit_region(const RegionData* beg,
+                                            const RegionData* end,
+                                            size_t dead_words);
 
-  // Return a pointer to the first chunk in the range [beg, end) that is not
+  // Return a pointer to the first region in the range [beg, end) that is not
   // completely full.
-  static ChunkData* first_dead_space_chunk(const ChunkData* beg,
-                                           const ChunkData* end);
+  static RegionData* first_dead_space_region(const RegionData* beg,
+                                             const RegionData* end);
 
   // Return a value indicating the benefit or 'yield' if the compacted region
   // were to start (or equivalently if the dense prefix were to end) at the
-  // candidate chunk.  Higher values are better.
+  // candidate region.  Higher values are better.
   //
   // The value is based on the amount of space reclaimed vs. the costs of (a)
   // updating references in the dense prefix plus (b) copying objects and
   // updating references in the compacted region.
-  static inline double reclaimed_ratio(const ChunkData* const candidate,
+  static inline double reclaimed_ratio(const RegionData* const candidate,
                                        HeapWord* const bottom,
                                        HeapWord* const top,
                                        HeapWord* const new_top);
@@ -1005,9 +828,9 @@
   static HeapWord* compute_dense_prefix(const SpaceId id,
                                         bool maximum_compaction);
 
-  // Return true if dead space crosses onto the specified Chunk; bit must be the
-  // bit index corresponding to the first word of the Chunk.
-  static inline bool dead_space_crosses_boundary(const ChunkData* chunk,
+  // Return true if dead space crosses onto the specified Region; bit must be
+  // the bit index corresponding to the first word of the Region.
+  static inline bool dead_space_crosses_boundary(const RegionData* region,
                                                  idx_t bit);
 
   // Summary phase utility routine to fill dead space (if any) at the dense
@@ -1019,12 +842,6 @@
   static void summarize_space(SpaceId id, bool maximum_compaction);
   static void summary_phase(ParCompactionManager* cm, bool maximum_compaction);
 
-  static bool block_first_offset(size_t block_index, idx_t* block_offset_ptr);
-
-  // Fill in the BlockData
-  static void summarize_blocks(ParCompactionManager* cm,
-                               SpaceId first_compaction_space_id);
-
   // The space that is compacted after space_id.
   static SpaceId next_compaction_space_id(SpaceId space_id);
 
@@ -1038,16 +855,16 @@
   static void compact_perm(ParCompactionManager* cm);
   static void compact();
 
-  // Add available chunks to the stack and draining tasks to the task queue.
-  static void enqueue_chunk_draining_tasks(GCTaskQueue* q,
-                                           uint parallel_gc_threads);
+  // Add available regions to the stack and draining tasks to the task queue.
+  static void enqueue_region_draining_tasks(GCTaskQueue* q,
+                                            uint parallel_gc_threads);
 
   // Add dense prefix update tasks to the task queue.
   static void enqueue_dense_prefix_tasks(GCTaskQueue* q,
                                          uint parallel_gc_threads);
 
-  // Add chunk stealing tasks to the task queue.
-  static void enqueue_chunk_stealing_tasks(
+  // Add region stealing tasks to the task queue.
+  static void enqueue_region_stealing_tasks(
                                        GCTaskQueue* q,
                                        ParallelTaskTerminator* terminator_ptr,
                                        uint parallel_gc_threads);
@@ -1154,56 +971,56 @@
   // Move and update the live objects in the specified space.
   static void move_and_update(ParCompactionManager* cm, SpaceId space_id);
 
-  // Process the end of the given chunk range in the dense prefix.
+  // Process the end of the given region range in the dense prefix.
   // This includes saving any object not updated.
-  static void dense_prefix_chunks_epilogue(ParCompactionManager* cm,
-                                           size_t chunk_start_index,
-                                           size_t chunk_end_index,
-                                           idx_t exiting_object_offset,
-                                           idx_t chunk_offset_start,
-                                           idx_t chunk_offset_end);
+  static void dense_prefix_regions_epilogue(ParCompactionManager* cm,
+                                            size_t region_start_index,
+                                            size_t region_end_index,
+                                            idx_t exiting_object_offset,
+                                            idx_t region_offset_start,
+                                            idx_t region_offset_end);
 
-  // Update a chunk in the dense prefix.  For each live object
-  // in the chunk, update it's interior references.  For each
+  // Update a region in the dense prefix.  For each live object
+  // in the region, update it's interior references.  For each
   // dead object, fill it with deadwood. Dead space at the end
-  // of a chunk range will be filled to the start of the next
-  // live object regardless of the chunk_index_end.  None of the
+  // of a region range will be filled to the start of the next
+  // live object regardless of the region_index_end.  None of the
   // objects in the dense prefix move and dead space is dead
   // (holds only dead objects that don't need any processing), so
   // dead space can be filled in any order.
   static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
                                                   SpaceId space_id,
-                                                  size_t chunk_index_start,
-                                                  size_t chunk_index_end);
+                                                  size_t region_index_start,
+                                                  size_t region_index_end);
 
   // Return the address of the count + 1st live word in the range [beg, end).
   static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);
 
   // Return the address of the word to be copied to dest_addr, which must be
-  // aligned to a chunk boundary.
+  // aligned to a region boundary.
   static HeapWord* first_src_addr(HeapWord* const dest_addr,
-                                  size_t src_chunk_idx);
+                                  size_t src_region_idx);
 
-  // Determine the next source chunk, set closure.source() to the start of the
-  // new chunk return the chunk index.  Parameter end_addr is the address one
+  // Determine the next source region, set closure.source() to the start of the
+  // new region return the region index.  Parameter end_addr is the address one
   // beyond the end of source range just processed.  If necessary, switch to a
   // new source space and set src_space_id (in-out parameter) and src_space_top
   // (out parameter) accordingly.
-  static size_t next_src_chunk(MoveAndUpdateClosure& closure,
-                               SpaceId& src_space_id,
-                               HeapWord*& src_space_top,
-                               HeapWord* end_addr);
+  static size_t next_src_region(MoveAndUpdateClosure& closure,
+                                SpaceId& src_space_id,
+                                HeapWord*& src_space_top,
+                                HeapWord* end_addr);
 
-  // Decrement the destination count for each non-empty source chunk in the
-  // range [beg_chunk, chunk(chunk_align_up(end_addr))).
+  // Decrement the destination count for each non-empty source region in the
+  // range [beg_region, region(region_align_up(end_addr))).
   static void decrement_destination_counts(ParCompactionManager* cm,
-                                           size_t beg_chunk,
+                                           size_t beg_region,
                                            HeapWord* end_addr);
 
-  // Fill a chunk, copying objects from one or more source chunks.
-  static void fill_chunk(ParCompactionManager* cm, size_t chunk_idx);
-  static void fill_and_update_chunk(ParCompactionManager* cm, size_t chunk) {
-    fill_chunk(cm, chunk);
+  // Fill a region, copying objects from one or more source regions.
+  static void fill_region(ParCompactionManager* cm, size_t region_idx);
+  static void fill_and_update_region(ParCompactionManager* cm, size_t region) {
+    fill_region(cm, region);
   }
 
   // Update the deferred objects in the space.
@@ -1259,7 +1076,7 @@
 #ifndef PRODUCT
   // Debugging support.
   static const char* space_names[last_space_id];
-  static void print_chunk_ranges();
+  static void print_region_ranges();
   static void print_dense_prefix_stats(const char* const algorithm,
                                        const SpaceId id,
                                        const bool maximum_compaction,
@@ -1267,7 +1084,7 @@
 #endif  // #ifndef PRODUCT
 
 #ifdef  ASSERT
-  // Verify that all the chunks have been emptied.
+  // Verify that all the regions have been emptied.
   static void verify_complete(SpaceId space_id);
 #endif  // #ifdef ASSERT
 };
@@ -1376,17 +1193,17 @@
 }
 
 inline bool
-PSParallelCompact::dead_space_crosses_boundary(const ChunkData* chunk,
+PSParallelCompact::dead_space_crosses_boundary(const RegionData* region,
                                                idx_t bit)
 {
-  assert(bit > 0, "cannot call this for the first bit/chunk");
-  assert(_summary_data.chunk_to_addr(chunk) == _mark_bitmap.bit_to_addr(bit),
+  assert(bit > 0, "cannot call this for the first bit/region");
+  assert(_summary_data.region_to_addr(region) == _mark_bitmap.bit_to_addr(bit),
          "sanity check");
 
   // Dead space crosses the boundary if (1) a partial object does not extend
-  // onto the chunk, (2) an object does not start at the beginning of the chunk,
-  // and (3) an object does not end at the end of the prior chunk.
-  return chunk->partial_obj_size() == 0 &&
+  // onto the region, (2) an object does not start at the beginning of the
+  // region, and (3) an object does not end at the end of the prior region.
+  return region->partial_obj_size() == 0 &&
     !_mark_bitmap.is_obj_beg(bit) &&
     !_mark_bitmap.is_obj_end(bit - 1);
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -123,8 +123,6 @@
 
 void PSPermGen::precompact() {
   // Reset start array first.
-  debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {)
   _start_array.reset();
-  debug_only(})
   object_mark_sweep()->precompact();
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/coTracker.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_coTracker.cpp.incl"
+
+COTracker* COTracker::_head = NULL;
+double COTracker::_cpu_number = -1.0;
+
+void
+COTracker::resetPeriod(double now_sec, double vnow_sec) {
+  guarantee( _enabled, "invariant" );
+  _period_start_time_sec  = now_sec;
+  _period_start_vtime_sec = vnow_sec;
+}
+
+void
+COTracker::setConcOverhead(double time_stamp_sec,
+                           double conc_overhead) {
+  guarantee( _enabled, "invariant" );
+  _conc_overhead  = conc_overhead;
+  _time_stamp_sec = time_stamp_sec;
+  if (conc_overhead > 0.001)
+    _conc_overhead_seq.add(conc_overhead);
+}
+
+void
+COTracker::reset(double starting_conc_overhead) {
+  guarantee( _enabled, "invariant" );
+  double now_sec = os::elapsedTime();
+  setConcOverhead(now_sec, starting_conc_overhead);
+}
+
+void
+COTracker::start() {
+  guarantee( _enabled, "invariant" );
+  resetPeriod(os::elapsedTime(), os::elapsedVTime());
+}
+
+void
+COTracker::update(bool force_end) {
+  assert( _enabled, "invariant" );
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_sec = end_time_sec - _period_start_time_sec;
+  if (force_end || elapsed_time_sec > _update_period_sec) {
+    // reached the end of the period
+    double end_vtime_sec = os::elapsedVTime();
+    double elapsed_vtime_sec = end_vtime_sec - _period_start_vtime_sec;
+
+    double conc_overhead = elapsed_vtime_sec / elapsed_time_sec;
+
+    setConcOverhead(end_time_sec, conc_overhead);
+    resetPeriod(end_time_sec, end_vtime_sec);
+  }
+}
+
+void
+COTracker::updateForSTW(double start_sec, double end_sec) {
+  if (!_enabled)
+    return;
+
+  // During a STW pause, no concurrent GC thread has done any
+  // work. So, we can safely adjust the start of the current period by
+  // adding the duration of the STW pause to it, so that the STW pause
+  // doesn't affect the reading of the concurrent overhead (it's
+  // basically like excluding the time of the STW pause from the
+  // concurrent overhead calculation).
+
+  double stw_duration_sec = end_sec - start_sec;
+  guarantee( stw_duration_sec > 0.0, "invariant" );
+
+  if (outOfDate(start_sec))
+    _conc_overhead = 0.0;
+  else
+    _time_stamp_sec = end_sec;
+  _period_start_time_sec += stw_duration_sec;
+  _conc_overhead_seq = NumberSeq();
+
+  guarantee( os::elapsedTime() > _period_start_time_sec, "invariant" );
+}
+
+double
+COTracker::predConcOverhead() {
+  if (_enabled) {
+    // tty->print(" %1.2lf", _conc_overhead_seq.maximum());
+    return _conc_overhead_seq.maximum();
+  } else {
+    // tty->print(" DD");
+    return 0.0;
+  }
+}
+
+void
+COTracker::resetPred() {
+  _conc_overhead_seq = NumberSeq();
+}
+
+COTracker::COTracker(int group)
+    : _enabled(false),
+      _group(group),
+      _period_start_time_sec(-1.0),
+      _period_start_vtime_sec(-1.0),
+      _conc_overhead(-1.0),
+      _time_stamp_sec(-1.0),
+      _next(NULL) {
+  // GCOverheadReportingPeriodMS indicates how frequently the
+  // concurrent overhead will be recorded by the GC Overhead
+  // Reporter. We want to take readings less often than that. If we
+  // took readings more often than some of them might be lost.
+  _update_period_sec = ((double) GCOverheadReportingPeriodMS) / 1000.0 * 1.25;
+  _next = _head;
+  _head = this;
+
+  if (_cpu_number < 0.0)
+    _cpu_number = (double) os::processor_count();
+}
+
+// statics
+
+void
+COTracker::updateAllForSTW(double start_sec, double end_sec) {
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    curr->updateForSTW(start_sec, end_sec);
+  }
+}
+
+double
+COTracker::totalConcOverhead(double now_sec) {
+  double total_conc_overhead = 0.0;
+
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    double conc_overhead = curr->concOverhead(now_sec);
+    total_conc_overhead += conc_overhead;
+  }
+
+  return total_conc_overhead;
+}
+
+double
+COTracker::totalConcOverhead(double now_sec,
+                             size_t group_num,
+                             double* co_per_group) {
+  double total_conc_overhead = 0.0;
+
+  for (size_t i = 0; i < group_num; ++i)
+    co_per_group[i] = 0.0;
+
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    size_t group = curr->_group;
+    assert( 0 <= group && group < group_num, "invariant" );
+    double conc_overhead = curr->concOverhead(now_sec);
+
+    co_per_group[group] += conc_overhead;
+    total_conc_overhead += conc_overhead;
+  }
+
+  return total_conc_overhead;
+}
+
+double
+COTracker::totalPredConcOverhead() {
+  double total_pred_conc_overhead = 0.0;
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    total_pred_conc_overhead += curr->predConcOverhead();
+    curr->resetPred();
+  }
+  return total_pred_conc_overhead / _cpu_number;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/coTracker.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// COTracker keeps track of the concurrent overhead of a GC thread.
+
+// A thread that needs to be tracked must, itself, start up its
+// tracker with the start() method and then call the update() method
+// at regular intervals. What the tracker does is to calculate the
+// concurrent overhead of a process at a given update period. The
+// tracker starts and when is detects that it has exceeded the given
+// period, it calculates the duration of the period in wall-clock time
+// and the duration of the period in vtime (i.e. how much time the
+// concurrent processes really took up during this period). The ratio
+// of the latter over the former is the concurrent overhead of that
+// process for that period over a single CPU. This overhead is stored
+// on the tracker, "timestamped" with the wall-clock time of the end
+// of the period. When the concurrent overhead of this process needs
+// to be queried, this last "reading" provides a good approximation
+// (we assume that the concurrent overhead of a particular thread
+// stays largely constant over time). The timestamp is necessary to
+// detect when the process has stopped working and the recorded
+// reading hasn't been updated for some time.
+
+// Each concurrent GC thread is considered to be part of a "group"
+// (i.e. any available concurrent marking threads are part of the
+// "concurrent marking thread group"). A COTracker is associated with
+// a single group at construction-time. It's up to each collector to
+// decide how groups will be mapped to such an id (ids should start
+// from 0 and be consecutive; there's a hardcoded max group num
+// defined on the GCOverheadTracker class). The notion of a group has
+// been introduced to be able to identify how much overhead was
+// imposed by each group, instead of getting a single value that
+// covers all concurrent overhead.
+
+class COTracker {
+private:
+  // It indicates whether this tracker is enabled or not. When the
+  // tracker is disabled, then it returns 0.0 as the latest concurrent
+  // overhead and several methods (reset, start, and update) are not
+  // supposed to be called on it. This enabling / disabling facility
+  // is really provided to make a bit more explicit in the code when a
+  // particulary tracker of a processes that doesn't run all the time
+  // (e.g. concurrent marking) is supposed to be used and not it's not.
+  bool               _enabled;
+
+  // The ID of the group associated with this tracker.
+  int                _group;
+
+  // The update period of the tracker. A new value for the concurrent
+  // overhead of the associated process will be made at intervals no
+  // smaller than this.
+  double             _update_period_sec;
+
+  // The start times (both wall-block time and vtime) of the current
+  // interval.
+  double             _period_start_time_sec;
+  double             _period_start_vtime_sec;
+
+  // Number seq of the concurrent overhead readings within a period
+  NumberSeq          _conc_overhead_seq;
+
+  // The latest reading of the concurrent overhead (over a single CPU)
+  // imposed by the associated concurrent thread, made available at
+  // the indicated wall-clock time.
+  double             _conc_overhead;
+  double             _time_stamp_sec;
+
+  // The number of CPUs that the host machine has (for convenience
+  // really, as we'd have to keep translating it into a double)
+  static double      _cpu_number;
+
+  // Fields that keep a list of all trackers created. This is useful,
+  // since it allows us to sum up the concurrent overhead without
+  // having to write code for a specific collector to broadcast a
+  // request to all its concurrent processes.
+  COTracker*         _next;
+  static COTracker*  _head;
+
+  // It indicates that a new period is starting by updating the
+  // _period_start_time_sec and _period_start_vtime_sec fields.
+  void resetPeriod(double now_sec, double vnow_sec);
+  // It updates the latest concurrent overhead reading, taken at a
+  // given wall-clock time.
+  void setConcOverhead(double time_stamp_sec, double conc_overhead);
+
+  // It determines whether the time stamp of the latest concurrent
+  // overhead reading is out of date or not.
+  bool outOfDate(double now_sec) {
+    // The latest reading is considered out of date, if it was taken
+    // 1.2x the update period.
+    return (now_sec - _time_stamp_sec) > 1.2 * _update_period_sec;
+  }
+
+public:
+  // The constructor which associates the tracker with a group ID.
+  COTracker(int group);
+
+  // Methods to enable / disable the tracker and query whether it is enabled.
+  void enable()  { _enabled = true;  }
+  void disable() { _enabled = false; }
+  bool enabled() { return _enabled;  }
+
+  // It resets the tracker and sets concurrent overhead reading to be
+  // the given parameter and the associated time stamp to be now.
+  void reset(double starting_conc_overhead = 0.0);
+  // The tracker starts tracking. IT should only be called from the
+  // concurrent thread that is tracked by this tracker.
+  void start();
+  // It updates the tracker and, if the current period is longer than
+  // the update period, the concurrent overhead reading will be
+  // updated. force_end being true indicates that it's the last call
+  // to update() by this process before the tracker is disabled (the
+  // tracker can be re-enabled later if necessary).  It should only be
+  // called from the concurrent thread that is tracked by this tracker
+  // and while the thread has joined the STS.
+  void update(bool force_end = false);
+  // It adjusts the contents of the tracker to take into account a STW
+  // pause.
+  void updateForSTW(double start_sec, double end_sec);
+
+  // It returns the last concurrent overhead reading over a single
+  // CPU. If the reading is out of date, or the tracker is disabled,
+  // it returns 0.0.
+  double concCPUOverhead(double now_sec) {
+    if (!_enabled || outOfDate(now_sec))
+      return 0.0;
+    else
+      return _conc_overhead;
+  }
+
+  // It returns the last concurrent overhead reading over all CPUs
+  // that the host machine has. If the reading is out of date, or the
+  // tracker is disabled, it returns 0.0.
+  double concOverhead(double now_sec) {
+    return concCPUOverhead(now_sec) / _cpu_number;
+  }
+
+  double predConcOverhead();
+
+  void resetPred();
+
+  // statics
+
+  // It notifies all trackers about a STW pause.
+  static void updateAllForSTW(double start_sec, double end_sec);
+
+  // It returns the sum of the concurrent overhead readings of all
+  // available (and enabled) trackers for the given time stamp. The
+  // overhead is over all the CPUs of the host machine.
+
+  static double totalConcOverhead(double now_sec);
+  // Like the previous method, but it also sums up the overheads per
+  // group number. The length of the co_per_group array must be at
+  // least as large group_num
+  static double totalConcOverhead(double now_sec,
+                                  size_t group_num,
+                                  double* co_per_group);
+
+  static double totalPredConcOverhead();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// CopyrightVersion 1.2
+
+# include "incls/_precompiled.incl"
+# include "incls/_concurrentGCThread.cpp.incl"
+
+bool ConcurrentGCThread::_should_terminate    = false;
+bool ConcurrentGCThread::_has_terminated      = false;
+int  ConcurrentGCThread::_CGC_flag            = CGC_nil;
+
+SuspendibleThreadSet ConcurrentGCThread::_sts;
+
+ConcurrentGCThread::ConcurrentGCThread() {
+  _sts.initialize();
+};
+
+void ConcurrentGCThread::stopWorldAndDo(VoidClosure* op) {
+  MutexLockerEx x(Heap_lock,
+                  Mutex::_no_safepoint_check_flag);
+  // warning("CGC: about to try stopping world");
+  SafepointSynchronize::begin();
+  // warning("CGC: successfully stopped world");
+  op->do_void();
+  SafepointSynchronize::end();
+  // warning("CGC: successfully restarted world");
+}
+
+void ConcurrentGCThread::safepoint_synchronize() {
+  _sts.suspend_all();
+}
+
+void ConcurrentGCThread::safepoint_desynchronize() {
+  _sts.resume_all();
+}
+
+void ConcurrentGCThread::create_and_start() {
+  if (os::create_thread(this, os::cgc_thread)) {
+    // XXX: need to set this to low priority
+    // unless "agressive mode" set; priority
+    // should be just less than that of VMThread.
+    os::set_priority(this, NearMaxPriority);
+    if (!_should_terminate && !DisableStartThread) {
+      os::start_thread(this);
+    }
+  }
+}
+
+void ConcurrentGCThread::initialize_in_thread() {
+  this->record_stack_base_and_size();
+  this->initialize_thread_local_storage();
+  this->set_active_handles(JNIHandleBlock::allocate_block());
+  // From this time Thread::current() should be working.
+  assert(this == Thread::current(), "just checking");
+}
+
+void ConcurrentGCThread::wait_for_universe_init() {
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  while (!is_init_completed() && !_should_terminate) {
+    CGC_lock->wait(Mutex::_no_safepoint_check_flag, 200);
+  }
+}
+
+void ConcurrentGCThread::terminate() {
+  // Signal that it is terminated
+  {
+    MutexLockerEx mu(Terminator_lock,
+                     Mutex::_no_safepoint_check_flag);
+    _has_terminated = true;
+    Terminator_lock->notify();
+  }
+
+  // Thread destructor usually does this..
+  ThreadLocalStorage::set_thread(NULL);
+}
+
+
+void SuspendibleThreadSet::initialize_work() {
+  MutexLocker x(STS_init_lock);
+  if (!_initialized) {
+    _m             = new Monitor(Mutex::leaf,
+                                 "SuspendibleThreadSetLock", true);
+    _async         = 0;
+    _async_stop    = false;
+    _async_stopped = 0;
+    _initialized   = true;
+  }
+}
+
+void SuspendibleThreadSet::join() {
+  initialize();
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
+  _async++;
+  assert(_async > 0, "Huh.");
+}
+
+void SuspendibleThreadSet::leave() {
+  assert(_initialized, "Must be initialized.");
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  _async--;
+  assert(_async >= 0, "Huh.");
+  if (_async_stop) _m->notify_all();
+}
+
+void SuspendibleThreadSet::yield(const char* id) {
+  assert(_initialized, "Must be initialized.");
+  if (_async_stop) {
+    MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+    if (_async_stop) {
+      _async_stopped++;
+      assert(_async_stopped > 0, "Huh.");
+      if (_async_stopped == _async) {
+        if (ConcGCYieldTimeout > 0) {
+          double now = os::elapsedTime();
+          guarantee((now - _suspend_all_start) * 1000.0 <
+                    (double)ConcGCYieldTimeout,
+                    "Long delay; whodunit?");
+        }
+      }
+      _m->notify_all();
+      while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
+      _async_stopped--;
+      assert(_async >= 0, "Huh");
+      _m->notify_all();
+    }
+  }
+}
+
+void SuspendibleThreadSet::suspend_all() {
+  initialize();  // If necessary.
+  if (ConcGCYieldTimeout > 0) {
+    _suspend_all_start = os::elapsedTime();
+  }
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  assert(!_async_stop, "Only one at a time.");
+  _async_stop = true;
+  while (_async_stopped < _async) _m->wait(Mutex::_no_safepoint_check_flag);
+}
+
+void SuspendibleThreadSet::resume_all() {
+  assert(_initialized, "Must be initialized.");
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  assert(_async_stopped == _async, "Huh.");
+  _async_stop = false;
+  _m->notify_all();
+}
+
+static void _sltLoop(JavaThread* thread, TRAPS) {
+  SurrogateLockerThread* slt = (SurrogateLockerThread*)thread;
+  slt->loop();
+}
+
+SurrogateLockerThread::SurrogateLockerThread() :
+  JavaThread(&_sltLoop),
+  _monitor(Mutex::nonleaf, "SLTMonitor"),
+  _buffer(empty)
+{}
+
+SurrogateLockerThread* SurrogateLockerThread::make(TRAPS) {
+  klassOop k =
+    SystemDictionary::resolve_or_fail(vmSymbolHandles::java_lang_Thread(),
+                                      true, CHECK_NULL);
+  instanceKlassHandle klass (THREAD, k);
+  instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_NULL);
+
+  const char thread_name[] = "Surrogate Locker Thread (CMS)";
+  Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL);
+
+  // Initialize thread_oop to put it into the system threadGroup
+  Handle thread_group (THREAD, Universe::system_thread_group());
+  JavaValue result(T_VOID);
+  JavaCalls::call_special(&result, thread_oop,
+                          klass,
+                          vmSymbolHandles::object_initializer_name(),
+                          vmSymbolHandles::threadgroup_string_void_signature(),
+                          thread_group,
+                          string,
+                          CHECK_NULL);
+
+  SurrogateLockerThread* res;
+  {
+    MutexLocker mu(Threads_lock);
+    res = new SurrogateLockerThread();
+
+    // At this point it may be possible that no osthread was created for the
+    // JavaThread due to lack of memory. We would have to throw an exception
+    // in that case. However, since this must work and we do not allow
+    // exceptions anyway, check and abort if this fails.
+    if (res == NULL || res->osthread() == NULL) {
+      vm_exit_during_initialization("java.lang.OutOfMemoryError",
+                                    "unable to create new native thread");
+    }
+    java_lang_Thread::set_thread(thread_oop(), res);
+    java_lang_Thread::set_priority(thread_oop(), NearMaxPriority);
+    java_lang_Thread::set_daemon(thread_oop());
+
+    res->set_threadObj(thread_oop());
+    Threads::add(res);
+    Thread::start(res);
+  }
+  os::yield(); // This seems to help with initial start-up of SLT
+  return res;
+}
+
+void SurrogateLockerThread::manipulatePLL(SLT_msg_type msg) {
+  MutexLockerEx x(&_monitor, Mutex::_no_safepoint_check_flag);
+  assert(_buffer == empty, "Should be empty");
+  assert(msg != empty, "empty message");
+  _buffer = msg;
+  while (_buffer != empty) {
+    _monitor.notify();
+    _monitor.wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+// ======= Surrogate Locker Thread =============
+
+void SurrogateLockerThread::loop() {
+  BasicLock pll_basic_lock;
+  SLT_msg_type msg;
+  debug_only(unsigned int owned = 0;)
+
+  while (/* !isTerminated() */ 1) {
+    {
+      MutexLocker x(&_monitor);
+      // Since we are a JavaThread, we can't be here at a safepoint.
+      assert(!SafepointSynchronize::is_at_safepoint(),
+             "SLT is a JavaThread");
+      // wait for msg buffer to become non-empty
+      while (_buffer == empty) {
+        _monitor.notify();
+        _monitor.wait();
+      }
+      msg = _buffer;
+    }
+    switch(msg) {
+      case acquirePLL: {
+        instanceRefKlass::acquire_pending_list_lock(&pll_basic_lock);
+        debug_only(owned++;)
+        break;
+      }
+      case releaseAndNotifyPLL: {
+        assert(owned > 0, "Don't have PLL");
+        instanceRefKlass::release_and_notify_pending_list_lock(&pll_basic_lock);
+        debug_only(owned--;)
+        break;
+      }
+      case empty:
+      default: {
+        guarantee(false,"Unexpected message in _buffer");
+        break;
+      }
+    }
+    {
+      MutexLocker x(&_monitor);
+      // Since we are a JavaThread, we can't be here at a safepoint.
+      assert(!SafepointSynchronize::is_at_safepoint(),
+             "SLT is a JavaThread");
+      _buffer = empty;
+      _monitor.notify();
+    }
+  }
+  assert(!_monitor.owned_by_self(), "Should unlock before exit.");
+}
+
+
+// ===== STS Access From Outside CGCT =====
+
+void ConcurrentGCThread::stsYield(const char* id) {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.yield(id);
+}
+
+bool ConcurrentGCThread::stsShouldYield() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  return _sts.should_yield();
+}
+
+void ConcurrentGCThread::stsJoin() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.join();
+}
+
+void ConcurrentGCThread::stsLeave() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.leave();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class VoidClosure;
+
+// A SuspendibleThreadSet is (obviously) a set of threads that can be
+// suspended.  A thread can join and later leave the set, and periodically
+// yield.  If some thread (not in the set) requests, via suspend_all, that
+// the threads be suspended, then the requesting thread is blocked until
+// all the threads in the set have yielded or left the set.  (Threads may
+// not enter the set when an attempted suspension is in progress.)  The
+// suspending thread later calls resume_all, allowing the suspended threads
+// to continue.
+
+class SuspendibleThreadSet {
+  Monitor* _m;
+  int      _async;
+  bool     _async_stop;
+  int      _async_stopped;
+  bool     _initialized;
+  double   _suspend_all_start;
+
+  void initialize_work();
+
+ public:
+  SuspendibleThreadSet() : _initialized(false) {}
+
+  // Add the current thread to the set.  May block if a suspension
+  // is in progress.
+  void join();
+  // Removes the current thread from the set.
+  void leave();
+  // Returns "true" iff an suspension is in progress.
+  bool should_yield() { return _async_stop; }
+  // Suspends the current thread if a suspension is in progress (for
+  // the duration of the suspension.)
+  void yield(const char* id);
+  // Return when all threads in the set are suspended.
+  void suspend_all();
+  // Allow suspended threads to resume.
+  void resume_all();
+  // Redundant initializations okay.
+  void initialize() {
+    // Double-check dirty read idiom.
+    if (!_initialized) initialize_work();
+  }
+};
+
+
+class ConcurrentGCThread: public NamedThread {
+  friend class VMStructs;
+
+protected:
+  static bool _should_terminate;
+  static bool _has_terminated;
+
+  enum CGC_flag_type {
+    CGC_nil           = 0x0,
+    CGC_dont_suspend  = 0x1,
+    CGC_CGC_safepoint = 0x2,
+    CGC_VM_safepoint  = 0x4
+  };
+
+  static int _CGC_flag;
+
+  static bool CGC_flag_is_set(int b)       { return (_CGC_flag & b) != 0; }
+  static int set_CGC_flag(int b)           { return _CGC_flag |= b; }
+  static int reset_CGC_flag(int b)         { return _CGC_flag &= ~b; }
+
+  void stopWorldAndDo(VoidClosure* op);
+
+  // All instances share this one set.
+  static SuspendibleThreadSet _sts;
+
+  // Create and start the thread (setting it's priority high.)
+  void create_and_start();
+
+  // Do initialization steps in the thread: record stack base and size,
+  // init thread local storage, set JNI handle block.
+  void initialize_in_thread();
+
+  // Wait until Universe::is_fully_initialized();
+  void wait_for_universe_init();
+
+  // Record that the current thread is terminating, and will do more
+  // concurrent work.
+  void terminate();
+
+public:
+  // Constructor
+
+  ConcurrentGCThread();
+  ~ConcurrentGCThread() {} // Exists to call NamedThread destructor.
+
+  // Tester
+  bool is_ConcurrentGC_thread() const          { return true;       }
+
+  static void safepoint_synchronize();
+  static void safepoint_desynchronize();
+
+  // All overridings should probably do _sts::yield, but we allow
+  // overriding for distinguished debugging messages.  Default is to do
+  // nothing.
+  virtual void yield() {}
+
+  bool should_yield() { return _sts.should_yield(); }
+
+  // they are prefixed by sts since there are already yield() and
+  // should_yield() (non-static) methods in this class and it was an
+  // easy way to differentiate them.
+  static void stsYield(const char* id);
+  static bool stsShouldYield();
+  static void stsJoin();
+  static void stsLeave();
+
+};
+
+// The SurrogateLockerThread is used by concurrent GC threads for
+// manipulating Java monitors, in particular, currently for
+// manipulating the pending_list_lock. XXX
+class SurrogateLockerThread: public JavaThread {
+  friend class VMStructs;
+ public:
+  enum SLT_msg_type {
+    empty = 0,           // no message
+    acquirePLL,          // acquire pending list lock
+    releaseAndNotifyPLL  // notify and release pending list lock
+  };
+ private:
+  // the following are shared with the CMSThread
+  SLT_msg_type  _buffer;  // communication buffer
+  Monitor       _monitor; // monitor controlling buffer
+  BasicLock     _basicLock; // used for PLL locking
+
+ public:
+  static SurrogateLockerThread* make(TRAPS);
+
+  SurrogateLockerThread();
+
+  bool is_hidden_from_external_view() const     { return true; }
+
+  void loop(); // main method
+
+  void manipulatePLL(SLT_msg_type msg);
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_gcOverheadReporter.cpp.incl"
+
+class COReportingThread : public ConcurrentGCThread {
+private:
+  GCOverheadReporter* _reporter;
+
+public:
+  COReportingThread(GCOverheadReporter* reporter) : _reporter(reporter) {
+    guarantee( _reporter != NULL, "precondition" );
+    create_and_start();
+  }
+
+  virtual void run() {
+    initialize_in_thread();
+    wait_for_universe_init();
+
+    int period_ms = GCOverheadReportingPeriodMS;
+
+    while ( true ) {
+      os::sleep(Thread::current(), period_ms, false);
+
+      _sts.join();
+      double now_sec = os::elapsedTime();
+      _reporter->collect_and_record_conc_overhead(now_sec);
+      _sts.leave();
+    }
+
+    terminate();
+  }
+};
+
+GCOverheadReporter* GCOverheadReporter::_reporter = NULL;
+
+GCOverheadReporter::GCOverheadReporter(size_t group_num,
+                                       const char* group_names[],
+                                       size_t length)
+    : _group_num(group_num), _prev_end_sec(0.0) {
+  guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum,
+             "precondition" );
+
+  _base = NEW_C_HEAP_ARRAY(GCOverheadReporterEntry, length);
+  _top  = _base + length;
+  _curr = _base;
+
+  for (size_t i = 0; i < group_num; ++i) {
+    guarantee( group_names[i] != NULL, "precondition" );
+    _group_names[i] = group_names[i];
+  }
+}
+
+void
+GCOverheadReporter::add(double start_sec, double end_sec,
+                        double* conc_overhead,
+                        double stw_overhead) {
+  assert( _curr <= _top, "invariant" );
+
+  if (_curr == _top) {
+    guarantee( false, "trace full" );
+    return;
+  }
+
+  _curr->_start_sec       = start_sec;
+  _curr->_end_sec         = end_sec;
+  for (size_t i = 0; i < _group_num; ++i) {
+    _curr->_conc_overhead[i] =
+      (conc_overhead != NULL) ? conc_overhead[i] : 0.0;
+  }
+  _curr->_stw_overhead    = stw_overhead;
+
+  ++_curr;
+}
+
+void
+GCOverheadReporter::collect_and_record_conc_overhead(double end_sec) {
+  double start_sec = _prev_end_sec;
+  guarantee( end_sec > start_sec, "invariant" );
+
+  double conc_overhead[MaxGCOverheadGroupNum];
+  COTracker::totalConcOverhead(end_sec, _group_num, conc_overhead);
+  add_conc_overhead(start_sec, end_sec, conc_overhead);
+  _prev_end_sec = end_sec;
+}
+
+void
+GCOverheadReporter::record_stw_start(double start_sec) {
+  guarantee( start_sec > _prev_end_sec, "invariant" );
+  collect_and_record_conc_overhead(start_sec);
+}
+
+void
+GCOverheadReporter::record_stw_end(double end_sec) {
+  double start_sec = _prev_end_sec;
+  COTracker::updateAllForSTW(start_sec, end_sec);
+  add_stw_overhead(start_sec, end_sec, 1.0);
+
+  _prev_end_sec = end_sec;
+}
+
+void
+GCOverheadReporter::print() const {
+  tty->print_cr("");
+  tty->print_cr("GC Overhead (%d entries)", _curr - _base);
+  tty->print_cr("");
+  GCOverheadReporterEntry* curr = _base;
+  while (curr < _curr) {
+    double total = curr->_stw_overhead;
+    for (size_t i = 0; i < _group_num; ++i)
+      total += curr->_conc_overhead[i];
+
+    tty->print("OVERHEAD %12.8lf %12.8lf ",
+               curr->_start_sec, curr->_end_sec);
+
+    for (size_t i = 0; i < _group_num; ++i)
+      tty->print("%s %12.8lf ", _group_names[i], curr->_conc_overhead[i]);
+
+    tty->print_cr("STW %12.8lf TOT %12.8lf", curr->_stw_overhead, total);
+    ++curr;
+  }
+  tty->print_cr("");
+}
+
+// statics
+
+void
+GCOverheadReporter::initGCOverheadReporter(size_t group_num,
+                                           const char* group_names[]) {
+  guarantee( _reporter == NULL, "should only be called once" );
+  guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum,
+             "precondition" );
+  guarantee( group_names != NULL, "pre-condition" );
+
+  if (GCOverheadReporting) {
+    _reporter = new GCOverheadReporter(group_num, group_names);
+    new COReportingThread(_reporter);
+  }
+}
+
+void
+GCOverheadReporter::recordSTWStart(double start_sec) {
+  if (_reporter != NULL)
+    _reporter->record_stw_start(start_sec);
+}
+
+void
+GCOverheadReporter::recordSTWEnd(double end_sec) {
+  if (_reporter != NULL)
+    _reporter->record_stw_end(end_sec);
+}
+
+void
+GCOverheadReporter::printGCOverhead() {
+  if (_reporter != NULL)
+    _reporter->print();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Keeps track of the GC overhead (both concurrent and STW). It stores
+// it in a large array and then prints it to tty at the end of the
+// execution.
+
+// See coTracker.hpp for the explanation on what groups are.
+
+// Let's set a maximum number of concurrent overhead groups, to
+// statically allocate any arrays we need and not to have to
+// malloc/free them. This is just a bit more convenient.
+enum {
+  MaxGCOverheadGroupNum = 4
+};
+
+typedef struct {
+  double _start_sec;
+  double _end_sec;
+
+  double _conc_overhead[MaxGCOverheadGroupNum];
+  double _stw_overhead;
+} GCOverheadReporterEntry;
+
+class GCOverheadReporter {
+  friend class COReportingThread;
+
+private:
+  enum PrivateConstants {
+    DefaultReporterLength = 128 * 1024
+  };
+
+  // Reference to the single instance of this class.
+  static GCOverheadReporter* _reporter;
+
+  // These three references point to the array that contains the GC
+  // overhead entries (_base is the base of the array, _top is the
+  // address passed the last entry of the array, _curr is the next
+  // entry to be used).
+  GCOverheadReporterEntry* _base;
+  GCOverheadReporterEntry* _top;
+  GCOverheadReporterEntry* _curr;
+
+  // The number of concurrent overhead groups.
+  size_t _group_num;
+
+  // The wall-clock time of the end of the last recorded period of GC
+  // overhead.
+  double _prev_end_sec;
+
+  // Names for the concurrent overhead groups.
+  const char* _group_names[MaxGCOverheadGroupNum];
+
+  // Add a new entry to the large array. conc_overhead being NULL is
+  // equivalent to an array full of 0.0s. conc_overhead should have a
+  // length of at least _group_num.
+  void add(double start_sec, double end_sec,
+           double* conc_overhead,
+           double stw_overhead);
+
+  // Add an entry that represents concurrent GC overhead.
+  // conc_overhead must be at least of length _group_num.
+  // conc_overhead being NULL is equivalent to an array full of 0.0s.
+  void add_conc_overhead(double start_sec, double end_sec,
+                         double* conc_overhead) {
+    add(start_sec, end_sec, conc_overhead, 0.0);
+  }
+
+  // Add an entry that represents STW GC overhead.
+  void add_stw_overhead(double start_sec, double end_sec,
+                        double stw_overhead) {
+    add(start_sec, end_sec, NULL, stw_overhead);
+  }
+
+  // It records the start of a STW pause (i.e. it records the
+  // concurrent overhead up to that point)
+  void record_stw_start(double start_sec);
+
+  // It records the end of a STW pause (i.e. it records the overhead
+  // associated with the pause and adjusts all the trackers to reflect
+  // the pause)
+  void record_stw_end(double end_sec);
+
+  // It queries all the trackers of their concurrent overhead and
+  // records it.
+  void collect_and_record_conc_overhead(double end_sec);
+
+  // It prints the contents of the GC overhead array
+  void print() const;
+
+
+  // Constructor. The same preconditions for group_num and group_names
+  // from initGCOverheadReporter apply here too.
+  GCOverheadReporter(size_t group_num,
+                     const char* group_names[],
+                     size_t length = DefaultReporterLength);
+
+public:
+
+  // statics
+
+  // It initialises the GCOverheadReporter and launches the concurrent
+  // overhead reporting thread. Both actions happen only if the
+  // GCOverheadReporting parameter is set. The length of the
+  // group_names array should be >= group_num and group_num should be
+  // <= MaxGCOverheadGroupNum. Entries group_namnes[0..group_num-1]
+  // should not be NULL.
+  static void initGCOverheadReporter(size_t group_num,
+                                     const char* group_names[]);
+
+  // The following three are provided for convenience and they are
+  // wrappers around record_stw_start(start_sec), record_stw_end(end_sec),
+  // and print(). Each of these checks whether GC overhead reporting
+  // is on (i.e. _reporter != NULL) and, if it is, calls the
+  // corresponding method. Saves from repeating this pattern again and
+  // again from the places where they need to be called.
+  static void recordSTWStart(double start_sec);
+  static void recordSTWEnd(double end_sec);
+  static void printGCOverhead();
+};
--- a/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -50,7 +50,8 @@
   size_t capacity_in_bytes() const            { return capacity_in_words() * HeapWordSize; }
 
   // Size computations.  Sizes are in heapwords.
-  size_t capacity_in_words() const            { return pointer_delta(end(), bottom()); }
+  size_t capacity_in_words() const                { return pointer_delta(end(), bottom()); }
+  virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); }
 
   // Iteration.
   virtual void oop_iterate(OopClosure* cl);
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -23,13 +23,6 @@
  */
 
 inline void MarkSweep::mark_object(oop obj) {
-#ifndef SERIALGC
-  if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) {
-    assert(PSParallelCompact::mark_bitmap()->is_marked(obj),
-           "Should be marked in the marking bitmap");
-  }
-#endif // SERIALGC
-
   // some marks may contain information we need to preserve so we store them away
   // and overwrite the mark.  We'll restore it at the end of markSweep.
   markOop mark = obj->mark();
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -181,6 +181,25 @@
   return lgrp_spaces()->at(i)->space()->free_in_bytes();
 }
 
+
+size_t MutableNUMASpace::capacity_in_words(Thread* thr) const {
+  guarantee(thr != NULL, "No thread");
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1) {
+    if (lgrp_spaces()->length() > 0) {
+      return capacity_in_words() / lgrp_spaces()->length();
+    } else {
+      assert(false, "There should be at least one locality group");
+      return 0;
+    }
+  }
+  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
+  if (i == -1) {
+    return 0;
+  }
+  return lgrp_spaces()->at(i)->space()->capacity_in_words();
+}
+
 // Check if the NUMA topology has changed. Add and remove spaces if needed.
 // The update can be forced by setting the force parameter equal to true.
 bool MutableNUMASpace::update_layout(bool force) {
@@ -372,6 +391,8 @@
 }
 
 // Produce a new chunk size. page_size() aligned.
+// This function is expected to be called on sequence of i's from 0 to
+// lgrp_spaces()->length().
 size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) {
   size_t pages_available = base_space_size();
   for (int j = 0; j < i; j++) {
@@ -386,7 +407,7 @@
   size_t chunk_size = 0;
   if (alloc_rate > 0) {
     LGRPSpace *ls = lgrp_spaces()->at(i);
-    chunk_size = (size_t)(ls->alloc_rate()->average() * pages_available / alloc_rate) * page_size();
+    chunk_size = (size_t)(ls->alloc_rate()->average() / alloc_rate * pages_available) * page_size();
   }
   chunk_size = MAX2(chunk_size, page_size());
 
@@ -722,7 +743,8 @@
     i = os::random() % lgrp_spaces()->length();
   }
 
-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace* ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
   HeapWord *p = s->allocate(size);
 
   if (p != NULL) {
@@ -743,6 +765,9 @@
       *(int*)i = 0;
     }
   }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
   return p;
 }
 
@@ -761,7 +786,8 @@
   if (i == -1) {
     i = os::random() % lgrp_spaces()->length();
   }
-  MutableSpace *s = lgrp_spaces()->at(i)->space();
+  LGRPSpace *ls = lgrp_spaces()->at(i);
+  MutableSpace *s = ls->space();
   HeapWord *p = s->cas_allocate(size);
   if (p != NULL) {
     size_t remainder = pointer_delta(s->end(), p + size);
@@ -790,6 +816,9 @@
       *(int*)i = 0;
     }
   }
+  if (p == NULL) {
+    ls->set_allocation_failed();
+  }
   return p;
 }
 
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -60,6 +60,7 @@
     MutableSpace* _space;
     MemRegion _invalid_region;
     AdaptiveWeightedAverage *_alloc_rate;
+    bool _allocation_failed;
 
     struct SpaceStats {
       size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
@@ -81,7 +82,7 @@
     char* last_page_scanned()            { return _last_page_scanned; }
     void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
    public:
-    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) {
+    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
       _space = new MutableSpace();
       _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
     }
@@ -103,8 +104,21 @@
       return *(int*)lgrp_id_value == p->lgrp_id();
     }
 
+    // Report a failed allocation.
+    void set_allocation_failed() { _allocation_failed = true;  }
+
     void sample() {
-      alloc_rate()->sample(space()->used_in_bytes());
+      // If there was a failed allocation make allocation rate equal
+      // to the size of the whole chunk. This ensures the progress of
+      // the adaptation process.
+      size_t alloc_rate_sample;
+      if (_allocation_failed) {
+        alloc_rate_sample = space()->capacity_in_bytes();
+        _allocation_failed = false;
+      } else {
+        alloc_rate_sample = space()->used_in_bytes();
+      }
+      alloc_rate()->sample(alloc_rate_sample);
     }
 
     MemRegion invalid_region() const                { return _invalid_region;      }
@@ -190,6 +204,9 @@
   virtual void ensure_parsability();
   virtual size_t used_in_words() const;
   virtual size_t free_in_words() const;
+
+  using MutableSpace::capacity_in_words;
+  virtual size_t capacity_in_words(Thread* thr) const;
   virtual size_t tlab_capacity(Thread* thr) const;
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
 
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -74,6 +74,7 @@
   // If the GC count has changed someone beat us to the collection
   // Get the Heap_lock after the pending_list_lock.
   Heap_lock->lock();
+
   // Check invocations
   if (skip_operation()) {
     // skip collection
@@ -82,6 +83,8 @@
     _prologue_succeeded = false;
   } else {
     _prologue_succeeded = true;
+    SharedHeap* sh = SharedHeap::heap();
+    if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = true;
   }
   return _prologue_succeeded;
 }
@@ -90,6 +93,8 @@
 void VM_GC_Operation::doit_epilogue() {
   assert(Thread::current()->is_Java_thread(), "just checking");
   // Release the Heap_lock first.
+  SharedHeap* sh = SharedHeap::heap();
+  if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = false;
   Heap_lock->unlock();
   release_and_notify_pending_list_lock();
 }
@@ -148,12 +153,27 @@
 void VM_GenCollectForPermanentAllocation::doit() {
   JvmtiGCForAllocationMarker jgcm;
   notify_gc_begin(true);
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  GCCauseSetter gccs(gch, _gc_cause);
-  gch->do_full_collection(gch->must_clear_all_soft_refs(),
-                          gch->n_gens() - 1);
-  _res = gch->perm_gen()->allocate(_size, false);
-  assert(gch->is_in_reserved_or_null(_res), "result not in heap");
+  SharedHeap* heap = (SharedHeap*)Universe::heap();
+  GCCauseSetter gccs(heap, _gc_cause);
+  switch (heap->kind()) {
+    case (CollectedHeap::GenCollectedHeap): {
+      GenCollectedHeap* gch = (GenCollectedHeap*)heap;
+      gch->do_full_collection(gch->must_clear_all_soft_refs(),
+                              gch->n_gens() - 1);
+      break;
+    }
+#ifndef SERIALGC
+    case (CollectedHeap::G1CollectedHeap): {
+      G1CollectedHeap* g1h = (G1CollectedHeap*)heap;
+      g1h->do_full_collection(_gc_cause == GCCause::_last_ditch_collection);
+      break;
+    }
+#endif // SERIALGC
+    default:
+      ShouldNotReachHere();
+  }
+  _res = heap->perm_gen()->allocate(_size, false);
+  assert(heap->is_in_reserved_or_null(_res), "result not in heap");
   if (_res == NULL && GC_locker::is_active_and_needs_gc()) {
     set_gc_locked();
   }
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -138,13 +138,6 @@
   return new_obj;
 }
 
-bool CollectedHeap::can_elide_permanent_oop_store_barriers() const {
-  // %%% This needs refactoring.  (It was gating logic from the server compiler.)
-  guarantee(kind() < CollectedHeap::G1CollectedHeap, "");
-  return !UseConcMarkSweepGC;
-}
-
-
 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
   guarantee(false, "thread-local allocation buffers not supported");
   return NULL;
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -364,10 +364,8 @@
   // Can a compiler initialize a new object without store barriers?
   // This permission only extends from the creation of a new object
   // via a TLAB up to the first subsequent safepoint.
-  virtual bool can_elide_tlab_store_barriers() const {
-    guarantee(kind() < CollectedHeap::G1CollectedHeap, "else change or refactor this");
-    return true;
-  }
+  virtual bool can_elide_tlab_store_barriers() const = 0;
+
   // If a compiler is eliding store barriers for TLAB-allocated objects,
   // there is probably a corresponding slow path which can produce
   // an object allocated anywhere.  The compiler's runtime support
@@ -379,12 +377,10 @@
   // Can a compiler elide a store barrier when it writes
   // a permanent oop into the heap?  Applies when the compiler
   // is storing x to the heap, where x->is_perm() is true.
-  virtual bool can_elide_permanent_oop_store_barriers() const;
+  virtual bool can_elide_permanent_oop_store_barriers() const = 0;
 
   // Does this heap support heap inspection (+PrintClassHistogram?)
-  virtual bool supports_heap_inspection() const {
-    return false;   // Until RFE 5023697 is implemented
-  }
+  virtual bool supports_heap_inspection() const = 0;
 
   // Perform a collection of the heap; intended for use in implementing
   // "System.gc".  This probably implies as full a collection as the
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -122,7 +122,7 @@
       return result;
     }
   }
-  bool gc_overhead_limit_was_exceeded;
+  bool gc_overhead_limit_was_exceeded = false;
   result = Universe::heap()->mem_allocate(size,
                                           is_noref,
                                           false,
--- a/hotspot/src/share/vm/gc_interface/gcCause.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/gc_interface/gcCause.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -60,6 +60,8 @@
     _old_generation_too_full_to_scavenge,
     _adaptive_size_policy,
 
+    _g1_inc_collection_pause, _g1_pop_region_collection_pause,
+
     _last_ditch_collection,
     _last_gc_cause
   };
@@ -68,12 +70,14 @@
     return (cause == GCCause::_java_lang_system_gc ||
             cause == GCCause::_jvmti_force_gc);
   }
+
   inline static bool is_serviceability_requested_gc(GCCause::Cause
                                                              cause) {
     return (cause == GCCause::_jvmti_force_gc ||
             cause == GCCause::_heap_inspection ||
             cause == GCCause::_heap_dump);
   }
+
   // Return a string describing the GCCause.
   static const char* to_string(GCCause::Cause cause);
   // Return true if the GCCause is for a full collection.
--- a/hotspot/src/share/vm/includeDB_compiler1	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_compiler1	Wed Jul 05 16:43:17 2017 +0200
@@ -36,6 +36,9 @@
 c1_CFGPrinter.hpp                       c1_Compilation.hpp
 c1_CFGPrinter.hpp                       c1_Instruction.hpp
 
+cardTableModRefBS.cpp			c1_LIR.hpp
+cardTableModRefBS.cpp			c1_LIRGenerator.hpp
+
 c1_Canonicalizer.cpp                    c1_Canonicalizer.hpp
 c1_Canonicalizer.cpp                    c1_InstructionPrinter.hpp
 c1_Canonicalizer.cpp                    ciArray.hpp
@@ -55,6 +58,7 @@
 c1_CodeStubs_<arch>.cpp                 c1_LIRAssembler.hpp
 c1_CodeStubs_<arch>.cpp                 c1_MacroAssembler.hpp
 c1_CodeStubs_<arch>.cpp                 c1_Runtime1.hpp
+c1_CodeStubs_<arch>.cpp                 g1SATBCardTableModRefBS.hpp
 c1_CodeStubs_<arch>.cpp                 nativeInst_<arch>.hpp
 c1_CodeStubs_<arch>.cpp                 sharedRuntime.hpp
 c1_CodeStubs_<arch>.cpp                 vmreg_<arch>.inline.hpp
@@ -141,6 +145,7 @@
 c1_globals_<os_family>.hpp              globalDefinitions.hpp
 c1_globals_<os_family>.hpp              macros.hpp
 
+c1_GraphBuilder.cpp                     bitMap.inline.hpp
 c1_GraphBuilder.cpp                     bytecode.hpp
 c1_GraphBuilder.cpp                     c1_CFGPrinter.hpp
 c1_GraphBuilder.cpp                     c1_Canonicalizer.hpp
@@ -158,6 +163,7 @@
 c1_GraphBuilder.hpp                     ciMethodData.hpp
 c1_GraphBuilder.hpp                     ciStreams.hpp
 
+c1_IR.cpp                               bitMap.inline.hpp
 c1_IR.cpp                               c1_Compilation.hpp
 c1_IR.cpp                               c1_FrameMap.hpp
 c1_IR.cpp                               c1_GraphBuilder.hpp
@@ -232,20 +238,22 @@
 
 c1_LIRAssembler_<arch>.hpp              generate_platform_dependent_include
 
-c1_LIRGenerator.cpp                    c1_Compilation.hpp
-c1_LIRGenerator.cpp                    c1_FrameMap.hpp
-c1_LIRGenerator.cpp                    c1_Instruction.hpp
-c1_LIRGenerator.cpp                    c1_LIRAssembler.hpp
-c1_LIRGenerator.cpp                    c1_LIRGenerator.hpp
-c1_LIRGenerator.cpp                    c1_ValueStack.hpp
-c1_LIRGenerator.cpp                    ciArrayKlass.hpp
-c1_LIRGenerator.cpp                    ciInstance.hpp
-c1_LIRGenerator.cpp                    sharedRuntime.hpp
+c1_LIRGenerator.cpp                     bitMap.inline.hpp
+c1_LIRGenerator.cpp                     c1_Compilation.hpp
+c1_LIRGenerator.cpp                     c1_FrameMap.hpp
+c1_LIRGenerator.cpp                     c1_Instruction.hpp
+c1_LIRGenerator.cpp                     c1_LIRAssembler.hpp
+c1_LIRGenerator.cpp                     c1_LIRGenerator.hpp
+c1_LIRGenerator.cpp                     c1_ValueStack.hpp
+c1_LIRGenerator.cpp                     ciArrayKlass.hpp
+c1_LIRGenerator.cpp                     ciInstance.hpp
+c1_LIRGenerator.cpp                     heapRegion.hpp
+c1_LIRGenerator.cpp                     sharedRuntime.hpp
 
-c1_LIRGenerator.hpp                    c1_Instruction.hpp
-c1_LIRGenerator.hpp                    c1_LIR.hpp
-c1_LIRGenerator.hpp                    ciMethodData.hpp
-c1_LIRGenerator.hpp                    sizes.hpp
+c1_LIRGenerator.hpp                     c1_Instruction.hpp
+c1_LIRGenerator.hpp                     c1_LIR.hpp
+c1_LIRGenerator.hpp                     ciMethodData.hpp
+c1_LIRGenerator.hpp                     sizes.hpp
 
 c1_LIRGenerator_<arch>.cpp             c1_Compilation.hpp
 c1_LIRGenerator_<arch>.cpp             c1_FrameMap.hpp
@@ -260,6 +268,7 @@
 c1_LIRGenerator_<arch>.cpp             sharedRuntime.hpp
 c1_LIRGenerator_<arch>.cpp             vmreg_<arch>.inline.hpp
 
+c1_LinearScan.cpp                       bitMap.inline.hpp
 c1_LinearScan.cpp                       c1_CFGPrinter.hpp
 c1_LinearScan.cpp                       c1_Compilation.hpp
 c1_LinearScan.cpp                       c1_FrameMap.hpp
@@ -276,6 +285,7 @@
 c1_LinearScan.hpp                       c1_LIR.hpp
 c1_LinearScan.hpp                       c1_LIRGenerator.hpp
 
+c1_LinearScan_<arch>.cpp                bitMap.inline.hpp
 c1_LinearScan_<arch>.cpp                c1_Instruction.hpp
 c1_LinearScan_<arch>.cpp                c1_LinearScan.hpp
 
@@ -298,6 +308,7 @@
 
 c1_MacroAssembler_<arch>.hpp            generate_platform_dependent_include
 
+c1_Optimizer.cpp                        bitMap.inline.hpp
 c1_Optimizer.cpp                        c1_Canonicalizer.hpp
 c1_Optimizer.cpp                        c1_Optimizer.hpp
 c1_Optimizer.cpp                        c1_ValueMap.hpp
@@ -363,6 +374,7 @@
 c1_Runtime1_<arch>.cpp                  vframeArray.hpp
 c1_Runtime1_<arch>.cpp                  vmreg_<arch>.inline.hpp
 
+c1_ValueMap.cpp                         bitMap.inline.hpp
 c1_ValueMap.cpp                         c1_Canonicalizer.hpp
 c1_ValueMap.cpp                         c1_IR.hpp
 c1_ValueMap.cpp                         c1_ValueMap.hpp
@@ -433,4 +445,3 @@
 top.hpp                                 c1_globals.hpp
 
 vmStructs.hpp                           c1_Runtime1.hpp
-
--- a/hotspot/src/share/vm/includeDB_compiler2	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_compiler2	Wed Jul 05 16:43:17 2017 +0200
@@ -461,10 +461,13 @@
 graphKit.cpp                            addnode.hpp
 graphKit.cpp                            barrierSet.hpp
 graphKit.cpp                            cardTableModRefBS.hpp
+graphKit.cpp                            g1SATBCardTableModRefBS.hpp
 graphKit.cpp                            collectedHeap.hpp
 graphKit.cpp                            compileLog.hpp
 graphKit.cpp                            deoptimization.hpp
 graphKit.cpp                            graphKit.hpp
+graphKit.cpp                            heapRegion.hpp
+graphKit.cpp                            idealKit.hpp
 graphKit.cpp                            locknode.hpp
 graphKit.cpp                            machnode.hpp
 graphKit.cpp                            parse.hpp
@@ -484,6 +487,7 @@
 idealKit.cpp                            callnode.hpp
 idealKit.cpp                            cfgnode.hpp
 idealKit.cpp                            idealKit.hpp
+idealKit.cpp				runtime.hpp
 
 idealKit.hpp                            connode.hpp
 idealKit.hpp                            mulnode.hpp
@@ -582,6 +586,7 @@
 loopTransform.cpp                       addnode.hpp
 loopTransform.cpp                       allocation.inline.hpp
 loopTransform.cpp                       connode.hpp
+loopTransform.cpp                       compileLog.hpp
 loopTransform.cpp                       divnode.hpp
 loopTransform.cpp                       loopnode.hpp
 loopTransform.cpp                       mulnode.hpp
@@ -597,6 +602,7 @@
 loopnode.cpp                            allocation.inline.hpp
 loopnode.cpp                            callnode.hpp
 loopnode.cpp                            ciMethodData.hpp
+loopnode.cpp                            compileLog.hpp
 loopnode.cpp                            connode.hpp
 loopnode.cpp                            divnode.hpp
 loopnode.cpp                            loopnode.hpp
@@ -915,9 +921,11 @@
 runtime.cpp                             connode.hpp
 runtime.cpp                             copy.hpp
 runtime.cpp                             fprofiler.hpp
+runtime.cpp                             g1SATBCardTableModRefBS.hpp
 runtime.cpp                             gcLocker.inline.hpp
 runtime.cpp                             graphKit.hpp
 runtime.cpp                             handles.inline.hpp
+runtime.cpp                             heapRegion.hpp
 runtime.cpp                             icBuffer.hpp
 runtime.cpp                             interfaceSupport.hpp
 runtime.cpp                             interpreter.hpp
--- a/hotspot/src/share/vm/includeDB_core	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_core	Wed Jul 05 16:43:17 2017 +0200
@@ -288,6 +288,10 @@
 attachListener.hpp                      debug.hpp
 attachListener.hpp                      ostream.hpp
 
+barrierSet.cpp				barrierSet.hpp
+barrierSet.cpp			        collectedHeap.hpp
+barrierSet.cpp				universe.hpp
+
 barrierSet.hpp                          memRegion.hpp
 barrierSet.hpp                          oopsHierarchy.hpp
 
@@ -295,7 +299,7 @@
 barrierSet.inline.hpp                   cardTableModRefBS.hpp
 
 bcEscapeAnalyzer.cpp                    bcEscapeAnalyzer.hpp
-bcEscapeAnalyzer.cpp                    bitMap.hpp
+bcEscapeAnalyzer.cpp                    bitMap.inline.hpp
 bcEscapeAnalyzer.cpp                    bytecode.hpp
 bcEscapeAnalyzer.cpp                    ciConstant.hpp
 bcEscapeAnalyzer.cpp                    ciField.hpp
@@ -320,13 +324,12 @@
 biasedLocking.hpp                       growableArray.hpp
 biasedLocking.hpp                       handles.hpp
 
-bitMap.cpp                              bitMap.hpp
+bitMap.cpp                              allocation.inline.hpp
 bitMap.cpp                              bitMap.inline.hpp
 bitMap.cpp                              copy.hpp
 bitMap.cpp                              os_<os_family>.inline.hpp
 
 bitMap.hpp                              allocation.hpp
-bitMap.hpp                              ostream.hpp
 bitMap.hpp                              top.hpp
 
 bitMap.inline.hpp                       atomic.hpp
@@ -645,6 +648,7 @@
 ciMethod.cpp                            abstractCompiler.hpp
 ciMethod.cpp                            allocation.inline.hpp
 ciMethod.cpp                            bcEscapeAnalyzer.hpp
+ciMethod.cpp                            bitMap.inline.hpp
 ciMethod.cpp                            ciCallProfile.hpp
 ciMethod.cpp                            ciExceptionHandler.hpp
 ciMethod.cpp                            ciInstanceKlass.hpp
@@ -1759,7 +1763,7 @@
 
 genRemSet.hpp                           oop.hpp
 
-generateOopMap.cpp                      bitMap.hpp
+generateOopMap.cpp                      bitMap.inline.hpp
 generateOopMap.cpp                      bytecodeStream.hpp
 generateOopMap.cpp                      generateOopMap.hpp
 generateOopMap.cpp                      handles.inline.hpp
@@ -1808,6 +1812,8 @@
 generation.inline.hpp                   generation.hpp
 generation.inline.hpp                   space.hpp
 
+genOopClosures.hpp                      oop.hpp
+
 generationSpec.cpp                      compactPermGen.hpp
 generationSpec.cpp                      defNewGeneration.hpp
 generationSpec.cpp                      filemap.hpp
@@ -2219,6 +2225,11 @@
 invocationCounter.hpp                   exceptions.hpp
 invocationCounter.hpp                   handles.hpp
 
+intHisto.cpp				intHisto.hpp
+
+intHisto.hpp				allocation.hpp
+intHisto.hpp                            growableArray.hpp
+
 iterator.cpp                            iterator.hpp
 iterator.cpp                            oop.inline.hpp
 
@@ -2818,6 +2829,7 @@
 methodKlass.hpp                         methodOop.hpp
 
 methodLiveness.cpp                      allocation.inline.hpp
+methodLiveness.cpp                      bitMap.inline.hpp
 methodLiveness.cpp                      bytecode.hpp
 methodLiveness.cpp                      bytecodes.hpp
 methodLiveness.cpp                      ciMethod.hpp
@@ -2964,6 +2976,11 @@
 nmethod.hpp                             codeBlob.hpp
 nmethod.hpp                             pcDesc.hpp
 
+numberSeq.cpp				debug.hpp
+numberSeq.cpp				numberSeq.hpp
+numberSeq.cpp				globalDefinitions.hpp
+numberSeq.cpp				allocation.inline.hpp
+
 objArrayKlass.cpp                       collectedHeap.inline.hpp
 objArrayKlass.cpp                       copy.hpp
 objArrayKlass.cpp                       genOopClosures.inline.hpp
@@ -3406,8 +3423,6 @@
 referencePolicy.cpp                     referencePolicy.hpp
 referencePolicy.cpp                     universe.hpp
 
-referencePolicy.hpp                     oop.hpp
-
 referenceProcessor.cpp                  collectedHeap.hpp
 referenceProcessor.cpp                  collectedHeap.inline.hpp
 referenceProcessor.cpp                  java.hpp
@@ -3758,6 +3773,8 @@
 specialized_oop_closures.cpp            ostream.hpp
 specialized_oop_closures.cpp            specialized_oop_closures.hpp
 
+specialized_oop_closures.hpp            atomic.hpp
+
 stackMapFrame.cpp                       globalDefinitions.hpp
 stackMapFrame.cpp                       handles.inline.hpp
 stackMapFrame.cpp                       oop.inline.hpp
@@ -4000,7 +4017,6 @@
 
 taskqueue.hpp                           allocation.hpp
 taskqueue.hpp                           allocation.inline.hpp
-taskqueue.hpp                           debug.hpp
 taskqueue.hpp                           mutex.hpp
 taskqueue.hpp                           orderAccess_<os_arch>.inline.hpp
 
@@ -4038,6 +4054,7 @@
 
 templateInterpreterGenerator_<arch>.hpp generate_platform_dependent_include
 
+templateTable.cpp                       collectedHeap.hpp
 templateTable.cpp                       templateTable.hpp
 templateTable.cpp                       timer.hpp
 
@@ -4542,6 +4559,7 @@
 vm_operations.cpp                       compilerOracle.hpp
 vm_operations.cpp                       deoptimization.hpp
 vm_operations.cpp                       interfaceSupport.hpp
+vm_operations.cpp                       isGCActiveMark.hpp
 vm_operations.cpp                       resourceArea.hpp
 vm_operations.cpp                       threadService.hpp
 vm_operations.cpp                       thread_<os_family>.inline.hpp
--- a/hotspot/src/share/vm/includeDB_features	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_features	Wed Jul 05 16:43:17 2017 +0200
@@ -99,6 +99,7 @@
 heapDumper.cpp                          reflectionUtils.hpp
 heapDumper.cpp                          symbolTable.hpp
 heapDumper.cpp                          systemDictionary.hpp
+heapDumper.cpp                          threadService.hpp
 heapDumper.cpp                          universe.hpp
 heapDumper.cpp                          vframe.hpp
 heapDumper.cpp                          vmGCOperations.hpp
--- a/hotspot/src/share/vm/includeDB_gc_parallel	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_gc_parallel	Wed Jul 05 16:43:17 2017 +0200
@@ -21,6 +21,10 @@
 // have any questions.
 //  
 
+assembler_<arch>.cpp                    g1SATBCardTableModRefBS.hpp
+assembler_<arch>.cpp                    g1CollectedHeap.inline.hpp
+assembler_<arch>.cpp                    heapRegion.hpp
+
 collectorPolicy.cpp                     cmsAdaptiveSizePolicy.hpp
 collectorPolicy.cpp                     cmsGCAdaptivePolicyCounters.hpp
 
@@ -37,6 +41,9 @@
 
 heapInspection.cpp                      parallelScavengeHeap.hpp
 
+instanceKlass.cpp                       heapRegionSeq.inline.hpp
+instanceKlass.cpp                       g1CollectedHeap.inline.hpp
+instanceKlass.cpp                       g1OopClosures.inline.hpp
 instanceKlass.cpp                       oop.pcgc.inline.hpp
 instanceKlass.cpp                       psPromotionManager.inline.hpp
 instanceKlass.cpp                       psScavenge.inline.hpp
@@ -48,6 +55,9 @@
 instanceKlassKlass.cpp                  psScavenge.inline.hpp
 instanceKlassKlass.cpp                  parOopClosures.inline.hpp
 
+instanceRefKlass.cpp                    heapRegionSeq.inline.hpp
+instanceRefKlass.cpp                    g1CollectedHeap.inline.hpp
+instanceRefKlass.cpp                    g1OopClosures.inline.hpp
 instanceRefKlass.cpp                    oop.pcgc.inline.hpp
 instanceRefKlass.cpp                    psPromotionManager.inline.hpp
 instanceRefKlass.cpp                    psScavenge.inline.hpp
@@ -70,6 +80,7 @@
 
 memoryService.cpp                       cmsPermGen.hpp
 memoryService.cpp                       concurrentMarkSweepGeneration.hpp
+memoryService.cpp                       g1CollectedHeap.inline.hpp
 memoryService.cpp                       parNewGeneration.hpp
 memoryService.cpp                       parallelScavengeHeap.hpp
 memoryService.cpp                       psMemoryPool.hpp
@@ -80,6 +91,9 @@
 methodDataKlass.cpp                     oop.pcgc.inline.hpp
 methodDataKlass.cpp                     psScavenge.inline.hpp
 
+objArrayKlass.cpp                       heapRegionSeq.inline.hpp
+objArrayKlass.cpp                       g1CollectedHeap.inline.hpp
+objArrayKlass.cpp                       g1OopClosures.inline.hpp
 objArrayKlass.cpp                       oop.pcgc.inline.hpp
 objArrayKlass.cpp                       psPromotionManager.inline.hpp
 objArrayKlass.cpp                       psScavenge.inline.hpp
@@ -122,6 +136,9 @@
 thread.cpp                              concurrentMarkSweepThread.hpp
 thread.cpp                              pcTasks.hpp
 
+thread.hpp                              dirtyCardQueue.hpp
+thread.hpp                              satbQueue.hpp
+
 universe.cpp                            parallelScavengeHeap.hpp
 universe.cpp                            cmsCollectorPolicy.hpp
 universe.cpp                            cmsAdaptiveSizePolicy.hpp
--- a/hotspot/src/share/vm/includeDB_jvmti	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_jvmti	Wed Jul 05 16:43:17 2017 +0200
@@ -209,6 +209,7 @@
 jvmtiManageCapabilities.hpp             allocation.hpp
 jvmtiManageCapabilities.hpp             jvmti.h
 
+jvmtiRedefineClasses.cpp                bitMap.inline.hpp
 jvmtiRedefineClasses.cpp                codeCache.hpp
 jvmtiRedefineClasses.cpp                deoptimization.hpp
 jvmtiRedefineClasses.cpp                gcLocker.hpp
--- a/hotspot/src/share/vm/interpreter/templateTable.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/interpreter/templateTable.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -172,6 +172,7 @@
 
 Template*                  TemplateTable::_desc;
 InterpreterMacroAssembler* TemplateTable::_masm;
+BarrierSet*                TemplateTable::_bs;
 
 
 void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(), char filler) {
@@ -244,6 +245,8 @@
   // Initialize table
   TraceTime timer("TemplateTable initialization", TraceStartupTime);
 
+  _bs = Universe::heap()->barrier_set();
+
   // For better readability
   const char _    = ' ';
   const int  ____ = 0;
--- a/hotspot/src/share/vm/interpreter/templateTable.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/interpreter/templateTable.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -82,6 +82,7 @@
   static Template*       _desc;                  // the current template to be generated
   static Bytecodes::Code bytecode()              { return _desc->bytecode(); }
 
+  static BarrierSet*     _bs;                    // Cache the barrier set.
  public:
   //%note templates_1
   static InterpreterMacroAssembler* _masm;       // the assembler used when generating templates
--- a/hotspot/src/share/vm/memory/allocation.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/allocation.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -338,6 +338,12 @@
       DEBUG_ONLY(((ResourceObj *)res)->_allocation = RESOURCE_AREA;)
       return res;
   }
+  void* operator new(size_t size, void* where, allocation_type type) {
+      void* res = where;
+      // Set allocation type in the resource object
+      DEBUG_ONLY(((ResourceObj *)res)->_allocation = type;)
+      return res;
+  }
   void  operator delete(void* p);
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/memory/barrierSet.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,36 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_barrierSet.cpp.incl"
+
+// count is in HeapWord's
+void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) {
+   Universe::heap()->barrier_set()->write_ref_array_pre(MemRegion(start, start + count));
+}
+
+// count is in HeapWord's
+void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) {
+   Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, start + count));
+}
--- a/hotspot/src/share/vm/memory/barrierSet.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/barrierSet.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -32,6 +32,8 @@
     ModRef,
     CardTableModRef,
     CardTableExtension,
+    G1SATBCT,
+    G1SATBCTLogging,
     Other,
     Uninit
   };
@@ -42,14 +44,16 @@
 
 public:
 
+  BarrierSet() { _kind = Uninit; }
   // To get around prohibition on RTTI.
-  virtual BarrierSet::Name kind() { return _kind; }
+  BarrierSet::Name kind() { return _kind; }
   virtual bool is_a(BarrierSet::Name bsn) = 0;
 
   // These operations indicate what kind of barriers the BarrierSet has.
   virtual bool has_read_ref_barrier() = 0;
   virtual bool has_read_prim_barrier() = 0;
   virtual bool has_write_ref_barrier() = 0;
+  virtual bool has_write_ref_pre_barrier() = 0;
   virtual bool has_write_prim_barrier() = 0;
 
   // These functions indicate whether a particular access of the given
@@ -57,7 +61,8 @@
   virtual bool read_ref_needs_barrier(void* field) = 0;
   virtual bool read_prim_needs_barrier(HeapWord* field, size_t bytes) = 0;
   virtual bool write_ref_needs_barrier(void* field, oop new_val) = 0;
-  virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes, juint val1, juint val2) = 0;
+  virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes,
+                                        juint val1, juint val2) = 0;
 
   // The first four operations provide a direct implementation of the
   // barrier set.  An interpreter loop, for example, could call these
@@ -75,6 +80,13 @@
   // (For efficiency reasons, this operation is specialized for certain
   // barrier types.  Semantically, it should be thought of as a call to the
   // virtual "_work" function below, which must implement the barrier.)
+  // First the pre-write versions...
+  inline void write_ref_field_pre(void* field, oop new_val);
+protected:
+  virtual void write_ref_field_pre_work(void* field, oop new_val) {};
+public:
+
+  // ...then the post-write version.
   inline void write_ref_field(void* field, oop new_val);
 protected:
   virtual void write_ref_field_work(void* field, oop new_val) = 0;
@@ -92,6 +104,7 @@
   // the particular barrier.
   virtual bool has_read_ref_array_opt() = 0;
   virtual bool has_read_prim_array_opt() = 0;
+  virtual bool has_write_ref_array_pre_opt() { return true; }
   virtual bool has_write_ref_array_opt() = 0;
   virtual bool has_write_prim_array_opt() = 0;
 
@@ -104,7 +117,13 @@
   virtual void read_ref_array(MemRegion mr) = 0;
   virtual void read_prim_array(MemRegion mr) = 0;
 
+  virtual void write_ref_array_pre(MemRegion mr) {}
   inline void write_ref_array(MemRegion mr);
+
+  // Static versions, suitable for calling from generated code.
+  static void static_write_ref_array_pre(HeapWord* start, size_t count);
+  static void static_write_ref_array_post(HeapWord* start, size_t count);
+
 protected:
   virtual void write_ref_array_work(MemRegion mr) = 0;
 public:
@@ -120,33 +139,6 @@
   virtual void write_region_work(MemRegion mr) = 0;
 public:
 
-  // The remaining sets of operations are called by compilers or other code
-  // generators to insert barriers into generated code.  There may be
-  // several such code generators; the signatures of these
-  // barrier-generating functions may differ from generator to generator.
-  // There will be a set of four function signatures for each code
-  // generator, which accomplish the generation of barriers of the four
-  // kinds listed above.
-
-#ifdef TBD
-  // Generates code to invoke the barrier, if any, necessary when reading
-  // the ref field at "offset" in "obj".
-  virtual void gen_read_ref_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when reading
-  // the primitive field of "bytes" bytes at offset" in "obj".
-  virtual void gen_read_prim_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when writing
-  // "new_val" into the ref field at "offset" in "obj".
-  virtual void gen_write_ref_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when writing
-  // the "bytes"-byte value "new_val" into the primitive field at "offset"
-  // in "obj".
-  virtual void gen_write_prim_field() = 0;
-#endif
-
   // Some barrier sets create tables whose elements correspond to parts of
   // the heap; the CardTableModRefBS is an example.  Such barrier sets will
   // normally reserve space for such tables, and commit parts of the table
--- a/hotspot/src/share/vm/memory/barrierSet.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/barrierSet.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -26,6 +26,14 @@
 // performance-critical calls when when the barrier is the most common
 // card-table kind.
 
+void BarrierSet::write_ref_field_pre(void* field, oop new_val) {
+  if (kind() == CardTableModRef) {
+    ((CardTableModRefBS*)this)->inline_write_ref_field_pre(field, new_val);
+  } else {
+    write_ref_field_pre_work(field, new_val);
+  }
+}
+
 void BarrierSet::write_ref_field(void* field, oop new_val) {
   if (kind() == CardTableModRef) {
     ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val);
--- a/hotspot/src/share/vm/memory/blockOffsetTable.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/blockOffsetTable.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -184,7 +184,7 @@
     "Offset card has an unexpected value");
   size_t start_card_for_region = start_card;
   u_char offset = max_jubyte;
-  for (int i = 0; i <= N_powers-1; i++) {
+  for (int i = 0; i < N_powers; i++) {
     // -1 so that the the card with the actual offset is counted.  Another -1
     // so that the reach ends in this region and not at the start
     // of the next.
--- a/hotspot/src/share/vm/memory/blockOffsetTable.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/blockOffsetTable.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -214,6 +214,7 @@
 //////////////////////////////////////////////////////////////////////////
 class BlockOffsetArray: public BlockOffsetTable {
   friend class VMStructs;
+  friend class G1BlockOffsetArray; // temp. until we restructure and cleanup
  protected:
   // The following enums are used by do_block_internal() below
   enum Action {
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -344,6 +344,17 @@
 }
 
 
+bool CardTableModRefBS::claim_card(size_t card_index) {
+  jbyte val = _byte_map[card_index];
+  if (val != claimed_card_val()) {
+    jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val);
+    if (res == val)
+      return true;
+    else return false;
+  }
+  return false;
+}
+
 void CardTableModRefBS::non_clean_card_iterate(Space* sp,
                                                MemRegion mr,
                                                DirtyCardToOopClosure* dcto_cl,
@@ -443,7 +454,7 @@
   }
 }
 
-void CardTableModRefBS::invalidate(MemRegion mr) {
+void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (!mri.is_empty()) dirty_MemRegion(mri);
@@ -471,11 +482,15 @@
   }
 }
 
+void CardTableModRefBS::dirty(MemRegion mr) {
+  jbyte* first = byte_for(mr.start());
+  jbyte* last  = byte_after(mr.last());
+  memset(first, dirty_card, last-first);
+}
+
 // NOTES:
 // (1) Unlike mod_oop_in_space_iterate() above, dirty_card_iterate()
 //     iterates over dirty cards ranges in increasing address order.
-// (2) Unlike, e.g., dirty_card_range_after_preclean() below,
-//     this method does not make the dirty cards prelceaned.
 void CardTableModRefBS::dirty_card_iterate(MemRegion mr,
                                            MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
@@ -501,7 +516,9 @@
   }
 }
 
-MemRegion CardTableModRefBS::dirty_card_range_after_preclean(MemRegion mr) {
+MemRegion CardTableModRefBS::dirty_card_range_after_reset(MemRegion mr,
+                                                          bool reset,
+                                                          int reset_val) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (!mri.is_empty()) {
@@ -518,8 +535,10 @@
                dirty_cards++, next_entry++);
           MemRegion cur_cards(addr_for(cur_entry),
                               dirty_cards*card_size_in_words);
-          for (size_t i = 0; i < dirty_cards; i++) {
-             cur_entry[i] = precleaned_card;
+          if (reset) {
+            for (size_t i = 0; i < dirty_cards; i++) {
+              cur_entry[i] = reset_val;
+            }
           }
           return cur_cards;
         }
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -54,6 +54,7 @@
     clean_card                  = -1,
     dirty_card                  =  0,
     precleaned_card             =  1,
+    claimed_card                =  3,
     last_card                   =  4,
     CT_MR_BS_last_reserved      = 10
   };
@@ -150,17 +151,6 @@
     return byte_for(p) + 1;
   }
 
-  // Mapping from card marking array entry to address of first word
-  HeapWord* addr_for(const jbyte* p) const {
-    assert(p >= _byte_map && p < _byte_map + _byte_map_size,
-           "out of bounds access to card marking array");
-    size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte));
-    HeapWord* result = (HeapWord*) (delta << card_shift);
-    assert(_whole_heap.contains(result),
-           "out of bounds accessor from card marking array");
-    return result;
-  }
-
   // Iterate over the portion of the card-table which covers the given
   // region mr in the given space and apply cl to any dirty sub-regions
   // of mr. cl and dcto_cl must either be the same closure or cl must
@@ -263,16 +253,22 @@
     card_size_in_words          = card_size / sizeof(HeapWord)
   };
 
+  static int clean_card_val()      { return clean_card; }
+  static int dirty_card_val()      { return dirty_card; }
+  static int claimed_card_val()    { return claimed_card; }
+  static int precleaned_card_val() { return precleaned_card; }
+
   // For RTTI simulation.
-  BarrierSet::Name kind() { return BarrierSet::CardTableModRef; }
   bool is_a(BarrierSet::Name bsn) {
-    return bsn == BarrierSet::CardTableModRef || bsn == BarrierSet::ModRef;
+    return bsn == BarrierSet::CardTableModRef || ModRefBarrierSet::is_a(bsn);
   }
 
   CardTableModRefBS(MemRegion whole_heap, int max_covered_regions);
 
   // *** Barrier set functions.
 
+  bool has_write_ref_pre_barrier() { return false; }
+
   inline bool write_ref_needs_barrier(void* field, oop new_val) {
     // Note that this assumes the perm gen is the highest generation
     // in the address space
@@ -315,11 +311,33 @@
 
   // *** Card-table-barrier-specific things.
 
+  inline void inline_write_ref_field_pre(void* field, oop newVal) {}
+
   inline void inline_write_ref_field(void* field, oop newVal) {
     jbyte* byte = byte_for(field);
     *byte = dirty_card;
   }
 
+  // These are used by G1, when it uses the card table as a temporary data
+  // structure for card claiming.
+  bool is_card_dirty(size_t card_index) {
+    return _byte_map[card_index] == dirty_card_val();
+  }
+
+  void mark_card_dirty(size_t card_index) {
+    _byte_map[card_index] = dirty_card_val();
+  }
+
+  bool is_card_claimed(size_t card_index) {
+    return _byte_map[card_index] == claimed_card_val();
+  }
+
+  bool claim_card(size_t card_index);
+
+  bool is_card_clean(size_t card_index) {
+    return _byte_map[card_index] == clean_card_val();
+  }
+
   // Card marking array base (adjusted for heap low boundary)
   // This would be the 0th element of _byte_map, if the heap started at 0x0.
   // But since the heap starts at some higher address, this points to somewhere
@@ -344,8 +362,9 @@
   }
 
   // ModRefBS functions.
-  void invalidate(MemRegion mr);
+  virtual void invalidate(MemRegion mr, bool whole_heap = false);
   void clear(MemRegion mr);
+  void dirty(MemRegion mr);
   void mod_oop_in_space_iterate(Space* sp, OopClosure* cl,
                                 bool clear = false,
                                 bool before_save_marks = false);
@@ -375,18 +394,39 @@
 
   static uintx ct_max_alignment_constraint();
 
-  // Apply closure cl to the dirty cards lying completely
-  // within MemRegion mr, setting the cards to precleaned.
-  void      dirty_card_iterate(MemRegion mr, MemRegionClosure* cl);
+  // Apply closure "cl" to the dirty cards containing some part of
+  // MemRegion "mr".
+  void dirty_card_iterate(MemRegion mr, MemRegionClosure* cl);
 
   // Return the MemRegion corresponding to the first maximal run
-  // of dirty cards lying completely within MemRegion mr, after
-  // marking those cards precleaned.
-  MemRegion dirty_card_range_after_preclean(MemRegion mr);
+  // of dirty cards lying completely within MemRegion mr.
+  // If reset is "true", then sets those card table entries to the given
+  // value.
+  MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset,
+                                         int reset_val);
 
   // Set all the dirty cards in the given region to precleaned state.
   void preclean_dirty_cards(MemRegion mr);
 
+  // Provide read-only access to the card table array.
+  const jbyte* byte_for_const(const void* p) const {
+    return byte_for(p);
+  }
+  const jbyte* byte_after_const(const void* p) const {
+    return byte_after(p);
+  }
+
+  // Mapping from card marking array entry to address of first word
+  HeapWord* addr_for(const jbyte* p) const {
+    assert(p >= _byte_map && p < _byte_map + _byte_map_size,
+           "out of bounds access to card marking array");
+    size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte));
+    HeapWord* result = (HeapWord*) (delta << card_shift);
+    assert(_whole_heap.contains(result),
+           "out of bounds accessor from card marking array");
+    return result;
+  }
+
   // Mapping from address to card marking array index.
   int index_for(void* p) {
     assert(_whole_heap.contains(p),
@@ -402,6 +442,7 @@
   static size_t par_chunk_heapword_alignment() {
     return CardsPerStrideChunk * card_size_in_words;
   }
+
 };
 
 class CardTableRS;
--- a/hotspot/src/share/vm/memory/cardTableRS.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/cardTableRS.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -27,10 +27,25 @@
 
 CardTableRS::CardTableRS(MemRegion whole_heap,
                          int max_covered_regions) :
-  GenRemSet(&_ct_bs),
-  _ct_bs(whole_heap, max_covered_regions),
-  _cur_youngergen_card_val(youngergenP1_card)
+  GenRemSet(),
+  _cur_youngergen_card_val(youngergenP1_card),
+  _regions_to_iterate(max_covered_regions - 1)
 {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    if (G1RSBarrierUseQueue) {
+      _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap,
+                                                  max_covered_regions);
+    } else {
+      _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions);
+    }
+  } else {
+    _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
+  }
+#else
+  _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
+#endif
+  set_bs(_ct_bs);
   _last_cur_val_in_gen = new jbyte[GenCollectedHeap::max_gens + 1];
   if (_last_cur_val_in_gen == NULL) {
     vm_exit_during_initialization("Could not last_cur_val_in_gen array.");
@@ -38,20 +53,19 @@
   for (int i = 0; i < GenCollectedHeap::max_gens + 1; i++) {
     _last_cur_val_in_gen[i] = clean_card_val();
   }
-  _ct_bs.set_CTRS(this);
+  _ct_bs->set_CTRS(this);
 }
 
 void CardTableRS::resize_covered_region(MemRegion new_region) {
-  _ct_bs.resize_covered_region(new_region);
+  _ct_bs->resize_covered_region(new_region);
 }
 
 jbyte CardTableRS::find_unused_youngergenP_card_value() {
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
   for (jbyte v = youngergenP1_card;
        v < cur_youngergen_and_prev_nonclean_card;
        v++) {
     bool seen = false;
-    for (int g = 0; g < gch->n_gens()+1; g++) {
+    for (int g = 0; g < _regions_to_iterate; g++) {
       if (_last_cur_val_in_gen[g] == v) {
         seen = true;
         break;
@@ -221,11 +235,11 @@
 
 void CardTableRS::younger_refs_in_space_iterate(Space* sp,
                                                 OopsInGenClosure* cl) {
-  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs.precision(),
+  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs->precision(),
                                                    cl->gen_boundary());
   ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
 
-  _ct_bs.non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
+  _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
                                 dcto_cl, &clear_cl, false);
 }
 
@@ -549,7 +563,7 @@
 
   if (ch->kind() == CollectedHeap::GenCollectedHeap) {
     GenCollectedHeap::heap()->generation_iterate(&blk, false);
-    _ct_bs.verify();
+    _ct_bs->verify();
 
     // If the old gen collections also collect perm, then we are only
     // interested in perm-to-young pointers, not perm-to-old pointers.
--- a/hotspot/src/share/vm/memory/cardTableRS.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/cardTableRS.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -44,7 +44,7 @@
     return CardTableModRefBS::card_is_dirty_wrt_gen_iter(cv);
   }
 
-  CardTableModRefBSForCTRS _ct_bs;
+  CardTableModRefBSForCTRS* _ct_bs;
 
   virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl);
 
@@ -73,6 +73,8 @@
 
   jbyte _cur_youngergen_card_val;
 
+  int _regions_to_iterate;
+
   jbyte cur_youngergen_card_val() {
     return _cur_youngergen_card_val;
   }
@@ -96,7 +98,7 @@
 
   CardTableRS* as_CardTableRS() { return this; }
 
-  CardTableModRefBS* ct_bs() { return &_ct_bs; }
+  CardTableModRefBS* ct_bs() { return _ct_bs; }
 
   // Override.
   void prepare_for_younger_refs_iterate(bool parallel);
@@ -107,7 +109,7 @@
   void younger_refs_iterate(Generation* g, OopsInGenClosure* blk);
 
   void inline_write_ref_field_gc(void* field, oop new_val) {
-    jbyte* byte = _ct_bs.byte_for(field);
+    jbyte* byte = _ct_bs->byte_for(field);
     *byte = youngergen_card;
   }
   void write_ref_field_gc_work(void* field, oop new_val) {
@@ -122,25 +124,27 @@
   void resize_covered_region(MemRegion new_region);
 
   bool is_aligned(HeapWord* addr) {
-    return _ct_bs.is_card_aligned(addr);
+    return _ct_bs->is_card_aligned(addr);
   }
 
   void verify();
   void verify_aligned_region_empty(MemRegion mr);
 
-  void clear(MemRegion mr) { _ct_bs.clear(mr); }
+  void clear(MemRegion mr) { _ct_bs->clear(mr); }
   void clear_into_younger(Generation* gen, bool clear_perm);
 
-  void invalidate(MemRegion mr) { _ct_bs.invalidate(mr); }
+  void invalidate(MemRegion mr, bool whole_heap = false) {
+    _ct_bs->invalidate(mr, whole_heap);
+  }
   void invalidate_or_clear(Generation* gen, bool younger, bool perm);
 
   static uintx ct_max_alignment_constraint() {
     return CardTableModRefBS::ct_max_alignment_constraint();
   }
 
-  jbyte* byte_for(void* p)     { return _ct_bs.byte_for(p); }
-  jbyte* byte_after(void* p)   { return _ct_bs.byte_after(p); }
-  HeapWord* addr_for(jbyte* p) { return _ct_bs.addr_for(p); }
+  jbyte* byte_for(void* p)     { return _ct_bs->byte_for(p); }
+  jbyte* byte_after(void* p)   { return _ct_bs->byte_after(p); }
+  HeapWord* addr_for(jbyte* p) { return _ct_bs->addr_for(p); }
 
   bool is_prev_nonclean_card_val(jbyte v) {
     return
--- a/hotspot/src/share/vm/memory/collectorPolicy.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/collectorPolicy.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -31,11 +31,11 @@
   if (PermSize > MaxPermSize) {
     MaxPermSize = PermSize;
   }
-  PermSize = align_size_down(PermSize, min_alignment());
+  PermSize = MAX2(min_alignment(), align_size_down_(PermSize, min_alignment()));
   MaxPermSize = align_size_up(MaxPermSize, max_alignment());
 
-  MinPermHeapExpansion = align_size_down(MinPermHeapExpansion, min_alignment());
-  MaxPermHeapExpansion = align_size_down(MaxPermHeapExpansion, min_alignment());
+  MinPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MinPermHeapExpansion, min_alignment()));
+  MaxPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MaxPermHeapExpansion, min_alignment()));
 
   MinHeapDeltaBytes = align_size_up(MinHeapDeltaBytes, min_alignment());
 
@@ -55,25 +55,21 @@
 
 void CollectorPolicy::initialize_size_info() {
   // User inputs from -mx and ms are aligned
-  _initial_heap_byte_size = align_size_up(Arguments::initial_heap_size(),
-                                          min_alignment());
-  set_min_heap_byte_size(align_size_up(Arguments::min_heap_size(),
-                                          min_alignment()));
-  set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment()));
-
-  // Check validity of heap parameters from launcher
+  set_initial_heap_byte_size(Arguments::initial_heap_size());
   if (initial_heap_byte_size() == 0) {
     set_initial_heap_byte_size(NewSize + OldSize);
-  } else {
-    Universe::check_alignment(initial_heap_byte_size(), min_alignment(),
-                            "initial heap");
   }
+  set_initial_heap_byte_size(align_size_up(_initial_heap_byte_size,
+                                           min_alignment()));
+
+  set_min_heap_byte_size(Arguments::min_heap_size());
   if (min_heap_byte_size() == 0) {
     set_min_heap_byte_size(NewSize + OldSize);
-  } else {
-    Universe::check_alignment(min_heap_byte_size(), min_alignment(),
-                            "initial heap");
   }
+  set_min_heap_byte_size(align_size_up(_min_heap_byte_size,
+                                       min_alignment()));
+
+  set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment()));
 
   // Check heap parameter properties
   if (initial_heap_byte_size() < M) {
@@ -121,8 +117,6 @@
                                            int max_covered_regions) {
   switch (rem_set_name()) {
   case GenRemSet::CardTable: {
-    if (barrier_set_name() != BarrierSet::CardTableModRef)
-      vm_exit_during_initialization("Mismatch between RS and BS.");
     CardTableRS* res = new CardTableRS(whole_heap, max_covered_regions);
     return res;
   }
@@ -345,7 +339,7 @@
 
     // At this point all three sizes have been checked against the
     // maximum sizes but have not been checked for consistency
-    // amoung the three.
+    // among the three.
 
     // Final check min <= initial <= max
     set_min_gen0_size(MIN2(_min_gen0_size, _max_gen0_size));
--- a/hotspot/src/share/vm/memory/collectorPolicy.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/collectorPolicy.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -39,10 +39,12 @@
 // Forward declarations.
 class GenCollectorPolicy;
 class TwoGenerationCollectorPolicy;
+class AdaptiveSizePolicy;
 #ifndef SERIALGC
 class ConcurrentMarkSweepPolicy;
+class G1CollectorPolicy;
 #endif // SERIALGC
-class AdaptiveSizePolicy;
+
 class GCPolicyCounters;
 class PermanentGenerationSpec;
 class MarkSweepPolicy;
@@ -55,7 +57,7 @@
   // Requires that the concrete subclass sets the alignment constraints
   // before calling.
   virtual void initialize_flags();
-  virtual void initialize_size_info() = 0;
+  virtual void initialize_size_info();
   // Initialize "_permanent_generation" to a spec for the given kind of
   // Perm Gen.
   void initialize_perm_generation(PermGen::Name pgnm);
@@ -91,17 +93,18 @@
   enum Name {
     CollectorPolicyKind,
     TwoGenerationCollectorPolicyKind,
-    TrainPolicyKind,
     ConcurrentMarkSweepPolicyKind,
-    ASConcurrentMarkSweepPolicyKind
+    ASConcurrentMarkSweepPolicyKind,
+    G1CollectorPolicyKind
   };
 
   // Identification methods.
-  virtual GenCollectorPolicy*           as_generation_policy()          { return NULL; }
+  virtual GenCollectorPolicy*           as_generation_policy()            { return NULL; }
   virtual TwoGenerationCollectorPolicy* as_two_generation_policy()        { return NULL; }
   virtual MarkSweepPolicy*              as_mark_sweep_policy()            { return NULL; }
 #ifndef SERIALGC
   virtual ConcurrentMarkSweepPolicy*    as_concurrent_mark_sweep_policy() { return NULL; }
+  virtual G1CollectorPolicy*            as_g1_policy()                    { return NULL; }
 #endif // SERIALGC
   // Note that these are not virtual.
   bool is_generation_policy()            { return as_generation_policy() != NULL; }
@@ -109,10 +112,13 @@
   bool is_mark_sweep_policy()            { return as_mark_sweep_policy() != NULL; }
 #ifndef SERIALGC
   bool is_concurrent_mark_sweep_policy() { return as_concurrent_mark_sweep_policy() != NULL; }
+  bool is_g1_policy()                    { return as_g1_policy() != NULL; }
 #else  // SERIALGC
   bool is_concurrent_mark_sweep_policy() { return false; }
+  bool is_g1_policy()                    { return false; }
 #endif // SERIALGC
 
+
   virtual PermanentGenerationSpec *permanent_generation() {
     assert(_permanent_generation != NULL, "Sanity check");
     return _permanent_generation;
--- a/hotspot/src/share/vm/memory/compactingPermGenGen.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/compactingPermGenGen.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -100,7 +100,7 @@
 
   enum {
     vtbl_list_size = 16, // number of entries in the shared space vtable list.
-    num_virtuals = 100   // number of virtual methods in Klass (or
+    num_virtuals = 200   // number of virtual methods in Klass (or
                          // subclass) objects, or greater.
   };
 
--- a/hotspot/src/share/vm/memory/dump.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/dump.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -818,6 +818,40 @@
 // across the space while doing this, as that causes the vtables to be
 // patched, undoing our useful work.  Instead, iterate to make a list,
 // then use the list to do the fixing.
+//
+// Our constructed vtables:
+// Dump time:
+//  1. init_self_patching_vtbl_list: table of pointers to current virtual method addrs
+//  2. generate_vtable_methods: create jump table, appended to above vtbl_list
+//  3. PatchKlassVtables: for Klass list, patch the vtable entry to point to jump table
+//     rather than to current vtbl
+// Table layout: NOTE FIXED SIZE
+//   1. vtbl pointers
+//   2. #Klass X #virtual methods per Klass
+//   1 entry for each, in the order:
+//   Klass1:method1 entry, Klass1:method2 entry, ... Klass1:method<num_virtuals> entry
+//   Klass2:method1 entry, Klass2:method2 entry, ... Klass2:method<num_virtuals> entry
+//   ...
+//   Klass<vtbl_list_size>:method1 entry, Klass<vtbl_list_size>:method2 entry,
+//       ... Klass<vtbl_list_size>:method<num_virtuals> entry
+//  Sample entry: (Sparc):
+//   save(sp, -256, sp)
+//   ba,pt common_code
+//   mov XXX, %L0       %L0 gets: Klass index <<8 + method index (note: max method index 255)
+//
+// Restore time:
+//   1. initialize_oops: reserve space for table
+//   2. init_self_patching_vtbl_list: update pointers to NEW virtual method addrs in text
+//
+// Execution time:
+//   First virtual method call for any object of these Klass types:
+//   1. object->klass->klass_part
+//   2. vtable entry for that klass_part points to the jump table entries
+//   3. branches to common_code with %O0/klass_part, %L0: Klass index <<8 + method index
+//   4. common_code:
+//      Get address of new vtbl pointer for this Klass from updated table
+//      Update new vtbl pointer in the Klass: future virtual calls go direct
+//      Jump to method, using new vtbl pointer and method index
 
 class PatchKlassVtables: public ObjectClosure {
 private:
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -252,6 +252,21 @@
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
   virtual HeapWord* allocate_new_tlab(size_t size);
 
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    return true;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    // CMS needs to see all, even intra-generational, ref updates.
+    return !UseConcMarkSweepGC;
+  }
+
   // The "requestor" generation is performing some garbage collection
   // action for which it would be useful to have scratch space.  The
   // requestor promises to allocate no more than "max_alloc_words" in any
--- a/hotspot/src/share/vm/memory/genMarkSweep.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/genMarkSweep.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -191,8 +191,10 @@
 
 void GenMarkSweep::deallocate_stacks() {
 
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->release_scratch();
+  if (!UseG1GC) {
+    GenCollectedHeap* gch = GenCollectedHeap::heap();
+    gch->release_scratch();
+  }
 
   if (_preserved_oop_stack) {
     delete _preserved_mark_stack;
--- a/hotspot/src/share/vm/memory/genMarkSweep.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/genMarkSweep.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -24,6 +24,7 @@
 
 class GenMarkSweep : public MarkSweep {
   friend class VM_MarkSweep;
+  friend class G1MarkSweep;
  public:
   static void invoke_at_safepoint(int level, ReferenceProcessor* rp,
                                   bool clear_all_softrefs);
--- a/hotspot/src/share/vm/memory/genOopClosures.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/genOopClosures.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -56,6 +56,9 @@
   // pointers must call the method below.
   template <class T> void do_barrier(T* p);
 
+  // Version for use by closures that may be called in parallel code.
+  void par_do_barrier(oop* p);
+
  public:
   OopsInGenClosure() : OopClosure(NULL),
     _orig_gen(NULL), _gen(NULL), _gen_boundary(NULL), _rs(NULL) {};
--- a/hotspot/src/share/vm/memory/genOopClosures.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/genOopClosures.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -48,6 +48,16 @@
   }
 }
 
+inline void OopsInGenClosure::par_do_barrier(oop* p) {
+  assert(generation()->is_in_reserved(p), "expected ref in generation");
+  oop obj = *p;
+  assert(obj != NULL, "expected non-null object");
+  // If p points to a younger generation, mark the card.
+  if ((HeapWord*)obj < gen_boundary()) {
+    rs()->write_ref_field_gc_par(p, obj);
+  }
+}
+
 // NOTE! Any changes made here should also be made
 // in FastScanClosure::do_oop_work()
 template <class T> inline void ScanClosure::do_oop_work(T* p) {
--- a/hotspot/src/share/vm/memory/genRemSet.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/genRemSet.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -42,6 +42,7 @@
   };
 
   GenRemSet(BarrierSet * bs) : _bs(bs) {}
+  GenRemSet() : _bs(NULL) {}
 
   virtual Name rs_kind() = 0;
 
@@ -53,6 +54,9 @@
   // Return the barrier set associated with "this."
   BarrierSet* bs() { return _bs; }
 
+  // Set the barrier set.
+  void set_bs(BarrierSet* bs) { _bs = bs; }
+
   // Do any (sequential) processing necessary to prepare for (possibly
   // "parallel", if that arg is true) calls to younger_refs_iterate.
   virtual void prepare_for_younger_refs_iterate(bool parallel) = 0;
@@ -116,7 +120,10 @@
 
   // Informs the RS that refs in the given "mr" may have changed
   // arbitrarily, and therefore may contain old-to-young pointers.
-  virtual void invalidate(MemRegion mr) = 0;
+  // If "whole heap" is true, then this invalidation is part of an
+  // invalidation of the whole heap, which an implementation might
+  // handle differently than that of a sub-part of the heap.
+  virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0;
 
   // Informs the RS that refs in this generation
   // may have changed arbitrarily, and therefore may contain
--- a/hotspot/src/share/vm/memory/heapInspection.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/heapInspection.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -238,11 +238,14 @@
   HeapWord* ref;
 
   CollectedHeap* heap = Universe::heap();
+  bool is_shared_heap = false;
   switch (heap->kind()) {
+    case CollectedHeap::G1CollectedHeap:
     case CollectedHeap::GenCollectedHeap: {
-      GenCollectedHeap* gch = (GenCollectedHeap*)heap;
-      gch->gc_prologue(false /* !full */); // get any necessary locks
-      ref = gch->perm_gen()->used_region().start();
+      is_shared_heap = true;
+      SharedHeap* sh = (SharedHeap*)heap;
+      sh->gc_prologue(false /* !full */); // get any necessary locks, etc.
+      ref = sh->perm_gen()->used_region().start();
       break;
     }
 #ifndef SERIALGC
@@ -284,9 +287,9 @@
   }
   st->flush();
 
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    GenCollectedHeap* gch = GenCollectedHeap::heap();
-    gch->gc_epilogue(false /* !full */); // release all acquired locks
+  if (is_shared_heap) {
+    SharedHeap* sh = (SharedHeap*)heap;
+    sh->gc_epilogue(false /* !full */); // release all acquired locks, etc.
   }
 }
 
--- a/hotspot/src/share/vm/memory/iterator.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/iterator.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -26,9 +26,23 @@
 
 class ReferenceProcessor;
 
+// Closure provides abortability.
+
+class Closure : public StackObj {
+ protected:
+  bool _abort;
+  void set_abort() { _abort = true; }
+ public:
+  Closure() : _abort(false) {}
+  // A subtype can use this mechanism to indicate to some iterator mapping
+  // functions that the iteration should cease.
+  bool abort() { return _abort; }
+  void clear_abort() { _abort = false; }
+};
+
 // OopClosure is used for iterating through roots (oop*)
 
-class OopClosure : public StackObj {
+class OopClosure : public Closure {
  public:
   ReferenceProcessor* _ref_processor;
   OopClosure(ReferenceProcessor* rp) : _ref_processor(rp) { }
@@ -55,11 +69,16 @@
   Prefetch::style prefetch_style() { // Note that this is non-virtual.
     return Prefetch::do_none;
   }
+
+  // True iff this closure may be safely applied more than once to an oop
+  // location without an intervening "major reset" (like the end of a GC).
+  virtual bool idempotent() { return false; }
+  virtual bool apply_to_weak_ref_discovered_field() { return false; }
 };
 
 // ObjectClosure is used for iterating through an object space
 
-class ObjectClosure : public StackObj {
+class ObjectClosure : public Closure {
  public:
   // Called for each object.
   virtual void do_object(oop obj) = 0;
--- a/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -31,6 +31,13 @@
 
 class ModRefBarrierSet: public BarrierSet {
 public:
+
+  ModRefBarrierSet() { _kind = BarrierSet::ModRef; }
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::ModRef;
+  }
+
   // Barriers only on ref writes.
   bool has_read_ref_barrier() { return false; }
   bool has_read_prim_barrier() { return false; }
@@ -85,8 +92,10 @@
                                         bool clear = false,
                                         bool before_save_marks = false) = 0;
 
-  // Causes all refs in "mr" to be assumed to be modified.
-  virtual void invalidate(MemRegion mr) = 0;
+  // Causes all refs in "mr" to be assumed to be modified.  If "whole_heap"
+  // is true, the caller asserts that the entire heap is being invalidated,
+  // which may admit an optimized implementation for some barriers.
+  virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0;
 
   // The caller guarantees that "mr" contains no references.  (Perhaps it's
   // objects have been moved elsewhere.)
--- a/hotspot/src/share/vm/memory/referenceProcessor.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -91,7 +91,8 @@
                                          bool               mt_discovery,
                                          BoolObjectClosure* is_alive_non_header,
                                          int                parallel_gc_threads,
-                                         bool               mt_processing) {
+                                         bool               mt_processing,
+                                         bool               dl_needs_barrier) {
   int mt_degree = 1;
   if (parallel_gc_threads > 1) {
     mt_degree = parallel_gc_threads;
@@ -99,7 +100,8 @@
   ReferenceProcessor* rp =
     new ReferenceProcessor(span, atomic_discovery,
                            mt_discovery, mt_degree,
-                           mt_processing && (parallel_gc_threads > 0));
+                           mt_processing && (parallel_gc_threads > 0),
+                           dl_needs_barrier);
   if (rp == NULL) {
     vm_exit_during_initialization("Could not allocate ReferenceProcessor object");
   }
@@ -111,10 +113,13 @@
                                        bool      atomic_discovery,
                                        bool      mt_discovery,
                                        int       mt_degree,
-                                       bool      mt_processing) :
+                                       bool      mt_processing,
+                                       bool      discovered_list_needs_barrier)  :
   _discovering_refs(false),
   _enqueuing_is_done(false),
   _is_alive_non_header(NULL),
+  _discovered_list_needs_barrier(discovered_list_needs_barrier),
+  _bs(NULL),
   _processing_is_mt(mt_processing),
   _next_id(0)
 {
@@ -135,6 +140,10 @@
         _discoveredSoftRefs[i].set_head(sentinel_ref());
     _discoveredSoftRefs[i].set_length(0);
   }
+  // If we do barreirs, cache a copy of the barrier set.
+  if (discovered_list_needs_barrier) {
+    _bs = Universe::heap()->barrier_set();
+  }
 }
 
 #ifndef PRODUCT
@@ -727,10 +736,15 @@
   refs_list.set_length(0);
 }
 
-void
-ReferenceProcessor::abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]) {
-  for (int i = 0; i < _num_q; i++) {
-    abandon_partial_discovered_list(refs_lists[i]);
+void ReferenceProcessor::abandon_partial_discovery() {
+  // loop over the lists
+  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+    if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) {
+      gclog_or_tty->print_cr(
+        "\nAbandoning %s discovered list",
+        list_name(i));
+    }
+    abandon_partial_discovered_list(_discoveredSoftRefs[i]);
   }
 }
 
@@ -994,7 +1008,16 @@
   assert(_discovery_is_mt, "!_discovery_is_mt should have been handled by caller");
   // First we must make sure this object is only enqueued once. CAS in a non null
   // discovered_addr.
-  oop retest = oopDesc::atomic_compare_exchange_oop(refs_list.head(), discovered_addr,
+  oop current_head = refs_list.head();
+
+  // Note: In the case of G1, this pre-barrier is strictly
+  // not necessary because the only case we are interested in
+  // here is when *discovered_addr is NULL, so this will expand to
+  // nothing. As a result, I am just manually eliding this out for G1.
+  if (_discovered_list_needs_barrier && !UseG1GC) {
+    _bs->write_ref_field_pre((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+  }
+  oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr,
                                                     NULL);
   if (retest == NULL) {
     // This thread just won the right to enqueue the object.
@@ -1002,6 +1025,10 @@
     // is necessary.
     refs_list.set_head(obj);
     refs_list.set_length(refs_list.length() + 1);
+    if (_discovered_list_needs_barrier) {
+      _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+    }
+
   } else {
     // If retest was non NULL, another thread beat us to it:
     // The reference has already been discovered...
@@ -1073,8 +1100,8 @@
     }
   }
 
-  HeapWord* discovered_addr = java_lang_ref_Reference::discovered_addr(obj);
-  oop  discovered = java_lang_ref_Reference::discovered(obj);
+  HeapWord* const discovered_addr = java_lang_ref_Reference::discovered_addr(obj);
+  const oop  discovered = java_lang_ref_Reference::discovered(obj);
   assert(discovered->is_oop_or_null(), "bad discovered field");
   if (discovered != NULL) {
     // The reference has already been discovered...
@@ -1094,7 +1121,7 @@
       // discovered twice except by concurrent collectors that potentially
       // trace the same Reference object twice.
       assert(UseConcMarkSweepGC,
-             "Only possible with a concurrent collector");
+             "Only possible with an incremental-update concurrent collector");
       return true;
     }
   }
@@ -1122,12 +1149,24 @@
     return false;   // nothing special needs to be done
   }
 
-  // We do a raw store here, the field will be visited later when
-  // processing the discovered references.
   if (_discovery_is_mt) {
     add_to_discovered_list_mt(*list, obj, discovered_addr);
   } else {
-    oop_store_raw(discovered_addr, list->head());
+    // If "_discovered_list_needs_barrier", we do write barriers when
+    // updating the discovered reference list.  Otherwise, we do a raw store
+    // here: the field will be visited later when processing the discovered
+    // references.
+    oop current_head = list->head();
+    // As in the case further above, since we are over-writing a NULL
+    // pre-value, we can safely elide the pre-barrier here for the case of G1.
+    assert(discovered == NULL, "control point invariant");
+    if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1
+      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+    }
+    oop_store_raw(discovered_addr, current_head);
+    if (_discovered_list_needs_barrier) {
+      _bs->write_ref_field((oop*)discovered_addr, current_head);
+    }
     list->set_head(obj);
     list->set_length(list->length() + 1);
   }
--- a/hotspot/src/share/vm/memory/referenceProcessor.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/referenceProcessor.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -54,6 +54,14 @@
   bool        _discovery_is_atomic;   // if discovery is atomic wrt
                                       // other collectors in configuration
   bool        _discovery_is_mt;       // true if reference discovery is MT.
+  // If true, setting "next" field of a discovered refs list requires
+  // write barrier(s).  (Must be true if used in a collector in which
+  // elements of a discovered list may be moved during discovery: for
+  // example, a collector like Garbage-First that moves objects during a
+  // long-term concurrent marking phase that does weak reference
+  // discovery.)
+  bool        _discovered_list_needs_barrier;
+  BarrierSet* _bs;                    // Cached copy of BarrierSet.
   bool        _enqueuing_is_done;     // true if all weak references enqueued
   bool        _processing_is_mt;      // true during phases when
                                       // reference processing is MT.
@@ -196,7 +204,6 @@
   void verify_ok_to_handle_reflists() PRODUCT_RETURN;
 
   void abandon_partial_discovered_list(DiscoveredList& refs_list);
-  void abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]);
 
   // Calculate the number of jni handles.
   unsigned int count_jni_refs();
@@ -217,6 +224,8 @@
     _discovery_is_atomic(true),
     _enqueuing_is_done(false),
     _discovery_is_mt(false),
+    _discovered_list_needs_barrier(false),
+    _bs(NULL),
     _is_alive_non_header(NULL),
     _num_q(0),
     _processing_is_mt(false),
@@ -224,8 +233,10 @@
   {}
 
   ReferenceProcessor(MemRegion span, bool atomic_discovery,
-                     bool mt_discovery, int mt_degree = 1,
-                     bool mt_processing = false);
+                     bool mt_discovery,
+                     int mt_degree = 1,
+                     bool mt_processing = false,
+                     bool discovered_list_needs_barrier = false);
 
   // Allocates and initializes a reference processor.
   static ReferenceProcessor* create_ref_processor(
@@ -234,8 +245,8 @@
     bool               mt_discovery,
     BoolObjectClosure* is_alive_non_header = NULL,
     int                parallel_gc_threads = 1,
-    bool               mt_processing = false);
-
+    bool               mt_processing = false,
+    bool               discovered_list_needs_barrier = false);
   // RefDiscoveryPolicy values
   enum {
     ReferenceBasedDiscovery = 0,
@@ -296,6 +307,11 @@
   // Enqueue references at end of GC (called by the garbage collector)
   bool enqueue_discovered_references(AbstractRefProcTaskExecutor* task_executor = NULL);
 
+  // If a discovery is in process that is being superceded, abandon it: all
+  // the discovered lists will be empty, and all the objects on them will
+  // have NULL discovered fields.  Must be called only at a safepoint.
+  void abandon_partial_discovery();
+
   // debugging
   void verify_no_references_recorded() PRODUCT_RETURN;
   static void verify();
--- a/hotspot/src/share/vm/memory/sharedHeap.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/sharedHeap.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -57,15 +57,24 @@
   }
   _sh = this;  // ch is static, should be set only once.
   if ((UseParNewGC ||
-      (UseConcMarkSweepGC && CMSParallelRemarkEnabled)) &&
+      (UseConcMarkSweepGC && CMSParallelRemarkEnabled) ||
+       UseG1GC) &&
       ParallelGCThreads > 0) {
-    _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, true);
+    _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads,
+                            /* are_GC_task_threads */true,
+                            /* are_ConcurrentGC_threads */false);
     if (_workers == NULL) {
       vm_exit_during_initialization("Failed necessary allocation.");
     }
   }
 }
 
+bool SharedHeap::heap_lock_held_for_gc() {
+  Thread* t = Thread::current();
+  return    Heap_lock->owned_by_self()
+         || (   (t->is_GC_task_thread() ||  t->is_VM_thread())
+             && _thread_holds_heap_lock_for_gc);
+}
 
 void SharedHeap::set_par_threads(int t) {
   _n_par_threads = t;
@@ -280,10 +289,11 @@
 }
 
 // Some utilities.
-void SharedHeap::print_size_transition(size_t bytes_before,
+void SharedHeap::print_size_transition(outputStream* out,
+                                       size_t bytes_before,
                                        size_t bytes_after,
                                        size_t capacity) {
-  tty->print(" %d%s->%d%s(%d%s)",
+  out->print(" %d%s->%d%s(%d%s)",
              byte_size_in_proper_unit(bytes_before),
              proper_unit_for_byte_size(bytes_before),
              byte_size_in_proper_unit(bytes_after),
--- a/hotspot/src/share/vm/memory/sharedHeap.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/sharedHeap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -44,6 +44,9 @@
 class SharedHeap : public CollectedHeap {
   friend class VMStructs;
 
+  friend class VM_GC_Operation;
+  friend class VM_CGC_Operation;
+
 private:
   // For claiming strong_roots tasks.
   SubTasksDone* _process_strong_tasks;
@@ -82,6 +85,14 @@
   // function.
   SharedHeap(CollectorPolicy* policy_);
 
+  // Returns true if the calling thread holds the heap lock,
+  // or the calling thread is a par gc thread and the heap_lock is held
+  // by the vm thread doing a gc operation.
+  bool heap_lock_held_for_gc();
+  // True if the heap_lock is held by the a non-gc thread invoking a gc
+  // operation.
+  bool _thread_holds_heap_lock_for_gc;
+
 public:
   static SharedHeap* heap() { return _sh; }
 
@@ -97,8 +108,8 @@
 
   void set_perm(PermGen* perm_gen) { _perm_gen = perm_gen; }
 
-  // A helper function that fills an allocated-but-not-yet-initialized
-  // region with a garbage object.
+  // A helper function that fills a region of the heap with
+  // with a single object.
   static void fill_region_with_object(MemRegion mr);
 
   // Minimum garbage fill object size
@@ -214,13 +225,12 @@
   // "SharedHeap" can use in the implementation of its virtual
   // functions.
 
-protected:
+public:
 
   // Do anything common to GC's.
   virtual void gc_prologue(bool full) = 0;
   virtual void gc_epilogue(bool full) = 0;
 
-public:
   //
   // New methods from CollectedHeap
   //
@@ -266,7 +276,8 @@
   }
 
   // Some utilities.
-  void print_size_transition(size_t bytes_before,
+  void print_size_transition(outputStream* out,
+                             size_t bytes_before,
                              size_t bytes_after,
                              size_t capacity);
 };
--- a/hotspot/src/share/vm/memory/space.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/space.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -105,7 +105,7 @@
          "Only ones we deal with for now.");
 
   assert(_precision != CardTableModRefBS::ObjHeadPreciseArray ||
-         _last_bottom == NULL ||
+         _cl->idempotent() || _last_bottom == NULL ||
          top <= _last_bottom,
          "Not decreasing");
   NOT_PRODUCT(_last_bottom = mr.start());
@@ -144,7 +144,14 @@
     walk_mem_region(mr, bottom_obj, top);
   }
 
-  _min_done = bottom;
+  // An idempotent closure might be applied in any order, so we don't
+  // record a _min_done for it.
+  if (!_cl->idempotent()) {
+    _min_done = bottom;
+  } else {
+    assert(_min_done == _last_explicit_min_done,
+           "Don't update _min_done for idempotent cl");
+  }
 }
 
 DirtyCardToOopClosure* Space::new_dcto_cl(OopClosure* cl,
@@ -250,7 +257,8 @@
   }
 }
 
-ContiguousSpace::ContiguousSpace(): CompactibleSpace(), _top(NULL) {
+ContiguousSpace::ContiguousSpace(): CompactibleSpace(), _top(NULL),
+    _concurrent_iteration_safe_limit(NULL) {
   _mangler = new GenSpaceMangler(this);
 }
 
@@ -263,17 +271,17 @@
                                  bool mangle_space)
 {
   CompactibleSpace::initialize(mr, clear_space, mangle_space);
-  _concurrent_iteration_safe_limit = top();
+  set_concurrent_iteration_safe_limit(top());
 }
 
 void ContiguousSpace::clear(bool mangle_space) {
   set_top(bottom());
   set_saved_mark();
-  Space::clear(mangle_space);
+  CompactibleSpace::clear(mangle_space);
 }
 
 bool Space::is_in(const void* p) const {
-  HeapWord* b = block_start(p);
+  HeapWord* b = block_start_const(p);
   return b != NULL && block_is_obj(b);
 }
 
@@ -342,8 +350,13 @@
                                   bool clear_space,
                                   bool mangle_space) {
   Space::initialize(mr, clear_space, mangle_space);
+  set_compaction_top(bottom());
+  _next_compaction_space = NULL;
+}
+
+void CompactibleSpace::clear(bool mangle_space) {
+  Space::clear(mangle_space);
   _compaction_top = bottom();
-  _next_compaction_space = NULL;
 }
 
 HeapWord* CompactibleSpace::forward(oop q, size_t size,
@@ -520,8 +533,8 @@
   }
   guarantee(p == top(), "end of last object must match end of space");
   if (top() != end()) {
-    guarantee(top() == block_start(end()-1) &&
-              top() == block_start(top()),
+    guarantee(top() == block_start_const(end()-1) &&
+              top() == block_start_const(top()),
               "top should be start of unallocated block, if it exists");
   }
 }
@@ -753,7 +766,7 @@
 #undef ContigSpace_OOP_SINCE_SAVE_MARKS_DEFN
 
 // Very general, slow implementation.
-HeapWord* ContiguousSpace::block_start(const void* p) const {
+HeapWord* ContiguousSpace::block_start_const(const void* p) const {
   assert(MemRegion(bottom(), end()).contains(p), "p not in space");
   if (p >= top()) {
     return top();
@@ -957,7 +970,8 @@
     // For a sampling of objects in the space, find it using the
     // block offset table.
     if (blocks == BLOCK_SAMPLE_INTERVAL) {
-      guarantee(p == block_start(p + (size/2)), "check offset computation");
+      guarantee(p == block_start_const(p + (size/2)),
+                "check offset computation");
       blocks = 0;
     } else {
       blocks++;
--- a/hotspot/src/share/vm/memory/space.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/space.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -105,7 +105,7 @@
   virtual void set_bottom(HeapWord* value) { _bottom = value; }
   virtual void set_end(HeapWord* value)    { _end = value; }
 
-  HeapWord* saved_mark_word() const  { return _saved_mark_word; }
+  virtual HeapWord* saved_mark_word() const  { return _saved_mark_word; }
   void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; }
 
   MemRegionClosure* preconsumptionDirtyCardClosure() const {
@@ -131,9 +131,15 @@
     return MemRegion(bottom(), saved_mark_word());
   }
 
-  // Initialization.  These may be run to reset an existing
-  // Space.
+  // Initialization.
+  // "initialize" should be called once on a space, before it is used for
+  // any purpose.  The "mr" arguments gives the bounds of the space, and
+  // the "clear_space" argument should be true unless the memory in "mr" is
+  // known to be zeroed.
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+
+  // The "clear" method must be called on a region that may have
+  // had allocation performed in it, but is now to be considered empty.
   virtual void clear(bool mangle_space);
 
   // For detecting GC bugs.  Should only be called at GC boundaries, since
@@ -218,7 +224,13 @@
   // "block" that contains "p".  We say "block" instead of "object" since
   // some heaps may not pack objects densely; a chunk may either be an
   // object or a non-object.  If "p" is not in the space, return NULL.
-  virtual HeapWord* block_start(const void* p) const = 0;
+  virtual HeapWord* block_start_const(const void* p) const = 0;
+
+  // The non-const version may have benevolent side effects on the data
+  // structure supporting these calls, possibly speeding up future calls.
+  // The default implementation, however, is simply to call the const
+  // version.
+  inline virtual HeapWord* block_start(const void* p);
 
   // Requires "addr" to be the start of a chunk, and returns its size.
   // "addr + size" is required to be the start of a new chunk, or the end
@@ -284,12 +296,13 @@
   CardTableModRefBS::PrecisionStyle _precision;
   HeapWord* _boundary;          // If non-NULL, process only non-NULL oops
                                 // pointing below boundary.
-  HeapWord* _min_done;                // ObjHeadPreciseArray precision requires
+  HeapWord* _min_done;          // ObjHeadPreciseArray precision requires
                                 // a downwards traversal; this is the
                                 // lowest location already done (or,
                                 // alternatively, the lowest address that
                                 // shouldn't be done again.  NULL means infinity.)
   NOT_PRODUCT(HeapWord* _last_bottom;)
+  NOT_PRODUCT(HeapWord* _last_explicit_min_done;)
 
   // Get the actual top of the area on which the closure will
   // operate, given where the top is assumed to be (the end of the
@@ -313,13 +326,15 @@
                         HeapWord* boundary) :
     _sp(sp), _cl(cl), _precision(precision), _boundary(boundary),
     _min_done(NULL) {
-    NOT_PRODUCT(_last_bottom = NULL;)
+    NOT_PRODUCT(_last_bottom = NULL);
+    NOT_PRODUCT(_last_explicit_min_done = NULL);
   }
 
   void do_MemRegion(MemRegion mr);
 
   void set_min_done(HeapWord* min_done) {
     _min_done = min_done;
+    NOT_PRODUCT(_last_explicit_min_done = _min_done);
   }
 #ifndef PRODUCT
   void set_last_bottom(HeapWord* last_bottom) {
@@ -356,7 +371,11 @@
   CompactibleSpace* _next_compaction_space;
 
 public:
+  CompactibleSpace() :
+   _compaction_top(NULL), _next_compaction_space(NULL) {}
+
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void clear(bool mangle_space);
 
   // Used temporarily during a compaction phase to hold the value
   // top should have when compaction is complete.
@@ -513,7 +532,7 @@
       /* prefetch beyond q */                                                \
       Prefetch::write(q, interval);                                          \
       /* size_t size = oop(q)->size();  changing this for cms for perm gen */\
-      size_t size = block_size(q);                                             \
+      size_t size = block_size(q);                                           \
       compact_top = cp->space->forward(oop(q), size, cp, compact_top);       \
       q += size;                                                             \
       end_of_live = q;                                                       \
@@ -577,156 +596,158 @@
   cp->space->set_compaction_top(compact_top);                                \
 }
 
-#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                                \
-  /* adjust all the interior pointers to point at the new locations of objects        \
-   * Used by MarkSweep::mark_sweep_phase3() */                                        \
+#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                             \
+  /* adjust all the interior pointers to point at the new locations of objects  \
+   * Used by MarkSweep::mark_sweep_phase3() */                                  \
                                                                                 \
-  HeapWord* q = bottom();                                                        \
-  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */        \
+  HeapWord* q = bottom();                                                       \
+  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */   \
                                                                                 \
-  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                        \
+  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                 \
                                                                                 \
-  if (q < t && _first_dead > q &&                                                \
+  if (q < t && _first_dead > q &&                                               \
       !oop(q)->is_gc_marked()) {                                                \
     /* we have a chunk of the space which hasn't moved and we've                \
      * reinitialized the mark word during the previous pass, so we can't        \
-     * use is_gc_marked for the traversal. */                                        \
+     * use is_gc_marked for the traversal. */                                   \
     HeapWord* end = _first_dead;                                                \
                                                                                 \
-    while (q < end) {                                                                \
-      /* I originally tried to conjoin "block_start(q) == q" to the                \
-       * assertion below, but that doesn't work, because you can't                \
-       * accurately traverse previous objects to get to the current one                \
-       * after their pointers (including pointers into permGen) have been        \
-       * updated, until the actual compaction is done.  dld, 4/00 */                \
-      assert(block_is_obj(q),                                                        \
-             "should be at block boundaries, and should be looking at objs");        \
+    while (q < end) {                                                           \
+      /* I originally tried to conjoin "block_start(q) == q" to the             \
+       * assertion below, but that doesn't work, because you can't              \
+       * accurately traverse previous objects to get to the current one         \
+       * after their pointers (including pointers into permGen) have been       \
+       * updated, until the actual compaction is done.  dld, 4/00 */            \
+      assert(block_is_obj(q),                                                   \
+             "should be at block boundaries, and should be looking at objs");   \
                                                                                 \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
                                                                                 \
-      /* point all the oops to the new location */                                \
-      size_t size = oop(q)->adjust_pointers();                                        \
-      size = adjust_obj_size(size);                                                \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
                                                                                 \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
-                                                                                      \
+                                                                                \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
-                                                                                      \
+                                                                                \
       q += size;                                                                \
-    }                                                                                \
+    }                                                                           \
                                                                                 \
-    if (_first_dead == t) {                                                        \
-      q = t;                                                                        \
-    } else {                                                                        \
-      /* $$$ This is funky.  Using this to read the previously written                \
-       * LiveRange.  See also use below. */                                        \
+    if (_first_dead == t) {                                                     \
+      q = t;                                                                    \
+    } else {                                                                    \
+      /* $$$ This is funky.  Using this to read the previously written          \
+       * LiveRange.  See also use below. */                                     \
       q = (HeapWord*)oop(_first_dead)->mark()->decode_pointer();                \
-    }                                                                                \
-  }                                                                                \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
   const intx interval = PrefetchScanIntervalInBytes;                            \
                                                                                 \
-  debug_only(HeapWord* prev_q = NULL);                                                \
-  while (q < t) {                                                                \
-    /* prefetch beyond q */                                                        \
+  debug_only(HeapWord* prev_q = NULL);                                          \
+  while (q < t) {                                                               \
+    /* prefetch beyond q */                                                     \
     Prefetch::write(q, interval);                                               \
-    if (oop(q)->is_gc_marked()) {                                                \
-      /* q is alive */                                                                \
+    if (oop(q)->is_gc_marked()) {                                               \
+      /* q is alive */                                                          \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
-      /* point all the oops to the new location */                                \
-      size_t size = oop(q)->adjust_pointers();                                        \
-      size = adjust_obj_size(size);                                                \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());                \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
+      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
-      debug_only(prev_q = q);                                                        \
+      debug_only(prev_q = q);                                                   \
       q += size;                                                                \
-    } else {                                                                        \
-      /* q is not a live object, so its mark should point at the next                \
-       * live object */                                                                \
-      debug_only(prev_q = q);                                                        \
-      q = (HeapWord*) oop(q)->mark()->decode_pointer();                                \
-      assert(q > prev_q, "we should be moving forward through memory");                \
-    }                                                                                \
-  }                                                                                \
+    } else {                                                                    \
+      /* q is not a live object, so its mark should point at the next           \
+       * live object */                                                         \
+      debug_only(prev_q = q);                                                   \
+      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
+      assert(q > prev_q, "we should be moving forward through memory");         \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
-  assert(q == t, "just checking");                                                \
+  assert(q == t, "just checking");                                              \
 }
 
-#define SCAN_AND_COMPACT(obj_size) {                                                \
+#define SCAN_AND_COMPACT(obj_size) {                                            \
   /* Copy all live objects to their new location                                \
-   * Used by MarkSweep::mark_sweep_phase4() */                                        \
+   * Used by MarkSweep::mark_sweep_phase4() */                                  \
                                                                                 \
-  HeapWord*       q = bottom();                                                        \
-  HeapWord* const t = _end_of_live;                                                \
-  debug_only(HeapWord* prev_q = NULL);                                                \
+  HeapWord*       q = bottom();                                                 \
+  HeapWord* const t = _end_of_live;                                             \
+  debug_only(HeapWord* prev_q = NULL);                                          \
                                                                                 \
-  if (q < t && _first_dead > q &&                                                \
+  if (q < t && _first_dead > q &&                                               \
       !oop(q)->is_gc_marked()) {                                                \
-    debug_only(                                                                        \
+    debug_only(                                                                 \
     /* we have a chunk of the space which hasn't moved and we've reinitialized  \
      * the mark word during the previous pass, so we can't use is_gc_marked for \
      * the traversal. */                                                        \
-    HeapWord* const end = _first_dead;                                                \
-                                                                                      \
-    while (q < end) {                                                                \
+    HeapWord* const end = _first_dead;                                          \
+                                                                                \
+    while (q < end) {                                                           \
       size_t size = obj_size(q);                                                \
       assert(!oop(q)->is_gc_marked(),                                           \
              "should be unmarked (special dense prefix handling)");             \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q));        \
-      debug_only(prev_q = q);                                                        \
+      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q));       \
+      debug_only(prev_q = q);                                                   \
       q += size;                                                                \
-    }                                                                                \
-    )  /* debug_only */                                                                \
-                                                                                      \
-    if (_first_dead == t) {                                                        \
-      q = t;                                                                        \
-    } else {                                                                        \
-      /* $$$ Funky */                                                                 \
-      q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer();                \
-    }                                                                                \
-  }                                                                                \
+    }                                                                           \
+    )  /* debug_only */                                                         \
+                                                                                \
+    if (_first_dead == t) {                                                     \
+      q = t;                                                                    \
+    } else {                                                                    \
+      /* $$$ Funky */                                                           \
+      q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer();               \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
-  const intx scan_interval = PrefetchScanIntervalInBytes;                        \
-  const intx copy_interval = PrefetchCopyIntervalInBytes;                        \
-  while (q < t) {                                                                \
-    if (!oop(q)->is_gc_marked()) {                                                \
-      /* mark is pointer to next marked oop */                                        \
-      debug_only(prev_q = q);                                                        \
-      q = (HeapWord*) oop(q)->mark()->decode_pointer();                                \
-      assert(q > prev_q, "we should be moving forward through memory");                \
-    } else {                                                                        \
-      /* prefetch beyond q */                                                        \
+  const intx scan_interval = PrefetchScanIntervalInBytes;                       \
+  const intx copy_interval = PrefetchCopyIntervalInBytes;                       \
+  while (q < t) {                                                               \
+    if (!oop(q)->is_gc_marked()) {                                              \
+      /* mark is pointer to next marked oop */                                  \
+      debug_only(prev_q = q);                                                   \
+      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
+      assert(q > prev_q, "we should be moving forward through memory");         \
+    } else {                                                                    \
+      /* prefetch beyond q */                                                   \
       Prefetch::read(q, scan_interval);                                         \
                                                                                 \
       /* size and destination */                                                \
       size_t size = obj_size(q);                                                \
       HeapWord* compaction_top = (HeapWord*)oop(q)->forwardee();                \
                                                                                 \
-      /* prefetch beyond compaction_top */                                        \
+      /* prefetch beyond compaction_top */                                      \
       Prefetch::write(compaction_top, copy_interval);                           \
                                                                                 \
-      /* copy object and reinit its mark */                                        \
+      /* copy object and reinit its mark */                                     \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size,            \
                                                             compaction_top));   \
-      assert(q != compaction_top, "everything in this pass should be moving");        \
-      Copy::aligned_conjoint_words(q, compaction_top, size);                        \
-      oop(compaction_top)->init_mark();                                                \
-      assert(oop(compaction_top)->klass() != NULL, "should have a class");        \
+      assert(q != compaction_top, "everything in this pass should be moving");  \
+      Copy::aligned_conjoint_words(q, compaction_top, size);                    \
+      oop(compaction_top)->init_mark();                                         \
+      assert(oop(compaction_top)->klass() != NULL, "should have a class");      \
                                                                                 \
-      debug_only(prev_q = q);                                                        \
+      debug_only(prev_q = q);                                                   \
       q += size;                                                                \
-    }                                                                                \
-  }                                                                                \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
+  /* Let's remember if we were empty before we did the compaction. */           \
+  bool was_empty = used_region().is_empty();                                    \
   /* Reset space after compaction is complete */                                \
-  reset_after_compaction();                                                        \
+  reset_after_compaction();                                                     \
   /* We do this clear, below, since it has overloaded meanings for some */      \
   /* space subtypes.  For example, OffsetTableContigSpace's that were   */      \
   /* compacted into will have had their offset table thresholds updated */      \
   /* continuously, but those that weren't need to have their thresholds */      \
   /* re-initialized.  Also mangles unused area for debugging.           */      \
-  if (is_empty()) {                                                             \
-    clear(SpaceDecorator::Mangle);                                              \
+  if (used_region().is_empty()) {                                               \
+    if (!was_empty) clear(SpaceDecorator::Mangle);                              \
   } else {                                                                      \
     if (ZapUnusedHeapArea) mangle_unused_area();                                \
   }                                                                             \
@@ -752,20 +773,18 @@
   inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value);
 
  public:
-
   ContiguousSpace();
   ~ContiguousSpace();
 
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void clear(bool mangle_space);
 
   // Accessors
   HeapWord* top() const            { return _top;    }
   void set_top(HeapWord* value)    { _top = value; }
 
-  void set_saved_mark()       { _saved_mark_word = top();    }
-  void reset_saved_mark()     { _saved_mark_word = bottom(); }
-
-  virtual void clear(bool mangle_space);
+  virtual void set_saved_mark()    { _saved_mark_word = top();    }
+  void reset_saved_mark()          { _saved_mark_word = bottom(); }
 
   WaterMark bottom_mark()     { return WaterMark(this, bottom()); }
   WaterMark top_mark()        { return WaterMark(this, top()); }
@@ -874,7 +893,7 @@
   virtual void object_iterate_from(WaterMark mark, ObjectClosure* blk);
 
   // Very inefficient implementation.
-  virtual HeapWord* block_start(const void* p) const;
+  virtual HeapWord* block_start_const(const void* p) const;
   size_t block_size(const HeapWord* p) const;
   // If a block is in the allocated area, it is an object.
   bool block_is_obj(const HeapWord* p) const { return p < top(); }
@@ -979,7 +998,8 @@
   HeapWord* _soft_end;
 
  public:
-  EdenSpace(DefNewGeneration* gen) : _gen(gen) { _soft_end = NULL; }
+  EdenSpace(DefNewGeneration* gen) :
+   _gen(gen), _soft_end(NULL) {}
 
   // Get/set just the 'soft' limit.
   HeapWord* soft_end()               { return _soft_end; }
@@ -1033,7 +1053,7 @@
 
   void clear(bool mangle_space);
 
-  inline HeapWord* block_start(const void* p) const;
+  inline HeapWord* block_start_const(const void* p) const;
 
   // Add offset table update.
   virtual inline HeapWord* allocate(size_t word_size);
--- a/hotspot/src/share/vm/memory/space.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/space.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -22,6 +22,10 @@
  *
  */
 
+inline HeapWord* Space::block_start(const void* p) {
+  return block_start_const(p);
+}
+
 inline HeapWord* OffsetTableContigSpace::allocate(size_t size) {
   HeapWord* res = ContiguousSpace::allocate(size);
   if (res != NULL) {
@@ -50,7 +54,8 @@
   return res;
 }
 
-inline HeapWord* OffsetTableContigSpace::block_start(const void* p) const {
+inline HeapWord*
+OffsetTableContigSpace::block_start_const(const void* p) const {
   return _offsets.block_start(p);
 }
 
--- a/hotspot/src/share/vm/memory/specialized_oop_closures.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/specialized_oop_closures.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -59,6 +59,12 @@
 // This is split into several because of a Visual C++ 6.0 compiler bug
 // where very long macros cause the compiler to crash
 
+// Some other heap might define further specialized closures.
+#ifndef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
+#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
+        /* None */
+#endif
+
 #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_S(f)       \
   f(ScanClosure,_nv)                                    \
   f(FastScanClosure,_nv)                                \
@@ -77,7 +83,7 @@
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_P(f)
 
 #ifndef SERIALGC
-#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)       \
+#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)       \
   f(MarkRefsIntoAndScanClosure,_nv)                     \
   f(Par_MarkRefsIntoAndScanClosure,_nv)                 \
   f(PushAndMarkClosure,_nv)                             \
@@ -85,11 +91,13 @@
   f(PushOrMarkClosure,_nv)                              \
   f(Par_PushOrMarkClosure,_nv)                          \
   f(CMSKeepAliveClosure,_nv)                            \
-  f(CMSInnerParMarkAndPushClosure,_nv)
+  f(CMSInnerParMarkAndPushClosure,_nv)                  \
+  FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f)
 #else  // SERIALGC
-#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)
+#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)
 #endif // SERIALGC
 
+
 // We separate these out, because sometime the general one has
 // a different definition from the specialized ones, and sometimes it
 // doesn't.
@@ -98,8 +106,8 @@
   f(OopClosure,_v)                                      \
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(f)
 
-#define ALL_OOP_OOP_ITERATE_CLOSURES_3(f)               \
-  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)
+#define ALL_OOP_OOP_ITERATE_CLOSURES_2(f)               \
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)
 
 #ifndef SERIALGC
 // This macro applies an argument macro to all OopClosures for which we
@@ -125,6 +133,13 @@
 // The "root_class" is the most general class to define; this may be
 // "OopClosure" in some applications and "OopsInGenClosure" in others.
 
+
+// Some other heap might define further specialized closures.
+#ifndef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
+#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) \
+        /* None */
+#endif
+
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_S(f) \
   f(ScanClosure,_nv)                                     \
   f(FastScanClosure,_nv)
@@ -132,7 +147,8 @@
 #ifndef SERIALGC
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f) \
   f(ParScanWithBarrierClosure,_nv)                       \
-  f(ParScanWithoutBarrierClosure,_nv)
+  f(ParScanWithoutBarrierClosure,_nv)                    \
+  FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f)
 #else  // SERIALGC
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f)
 #endif // SERIALGC
@@ -179,13 +195,15 @@
 
 #if ENABLE_SPECIALIZATION_STATS
 private:
-  static int _numCallsAll;
+  static bool _init;
+  static bool _wrapped;
+  static jint _numCallsAll;
 
-  static int _numCallsTotal[NUM_Kinds];
-  static int _numCalls_nv[NUM_Kinds];
+  static jint _numCallsTotal[NUM_Kinds];
+  static jint _numCalls_nv[NUM_Kinds];
 
-  static int _numDoOopCallsTotal[NUM_Kinds];
-  static int _numDoOopCalls_nv[NUM_Kinds];
+  static jint _numDoOopCallsTotal[NUM_Kinds];
+  static jint _numDoOopCalls_nv[NUM_Kinds];
 public:
 #endif
   static void clear()  PRODUCT_RETURN;
@@ -203,22 +221,22 @@
 #if ENABLE_SPECIALIZATION_STATS
 
 inline void SpecializationStats::record_call() {
-  _numCallsAll++;;
+  Atomic::inc(&_numCallsAll);
 }
 inline void SpecializationStats::record_iterate_call_v(Kind k) {
-  _numCallsTotal[k]++;
+  Atomic::inc(&_numCallsTotal[k]);
 }
 inline void SpecializationStats::record_iterate_call_nv(Kind k) {
-  _numCallsTotal[k]++;
-  _numCalls_nv[k]++;
+  Atomic::inc(&_numCallsTotal[k]);
+  Atomic::inc(&_numCalls_nv[k]);
 }
 
 inline void SpecializationStats::record_do_oop_call_v(Kind k) {
-  _numDoOopCallsTotal[k]++;
+  Atomic::inc(&_numDoOopCallsTotal[k]);
 }
 inline void SpecializationStats::record_do_oop_call_nv(Kind k) {
-  _numDoOopCallsTotal[k]++;
-  _numDoOopCalls_nv[k]++;
+  Atomic::inc(&_numDoOopCallsTotal[k]);
+  Atomic::inc(&_numDoOopCalls_nv[k]);
 }
 
 #else   // !ENABLE_SPECIALIZATION_STATS
--- a/hotspot/src/share/vm/memory/universe.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/memory/universe.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -739,6 +739,15 @@
     fatal("UseParallelGC not supported in java kernel vm.");
 #endif // SERIALGC
 
+  } else if (UseG1GC) {
+#ifndef SERIALGC
+    G1CollectorPolicy* g1p = new G1CollectorPolicy_BestRegionsFirst();
+    G1CollectedHeap* g1h = new G1CollectedHeap(g1p);
+    Universe::_collectedHeap = g1h;
+#else  // SERIALGC
+    fatal("UseG1GC not supported in java kernel vm.");
+#endif // SERIALGC
+
   } else {
     GenCollectorPolicy *gc_policy;
 
@@ -938,7 +947,10 @@
 
   // This needs to be done before the first scavenge/gc, since
   // it's an input to soft ref clearing policy.
-  Universe::update_heap_info_at_gc();
+  {
+    MutexLocker x(Heap_lock);
+    Universe::update_heap_info_at_gc();
+  }
 
   // ("weak") refs processing infrastructure initialization
   Universe::heap()->post_initialize();
@@ -1194,10 +1206,11 @@
     // ???: What if a CollectedHeap doesn't have a permanent generation?
     ShouldNotReachHere();
     break;
-  case CollectedHeap::GenCollectedHeap: {
-    GenCollectedHeap* gch = (GenCollectedHeap*) Universe::heap();
-    permanent_reserved = gch->perm_gen()->reserved();
-    break;
+  case CollectedHeap::GenCollectedHeap:
+  case CollectedHeap::G1CollectedHeap: {
+    SharedHeap* sh = (SharedHeap*) Universe::heap();
+    permanent_reserved = sh->perm_gen()->reserved();
+   break;
   }
 #ifndef SERIALGC
   case CollectedHeap::ParallelScavengeHeap: {
--- a/hotspot/src/share/vm/oops/generateOopMap.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/generateOopMap.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -370,21 +370,8 @@
 void GenerateOopMap ::initialize_bb() {
   _gc_points = 0;
   _bb_count  = 0;
-  int size = binsToHold(method()->code_size());
-  _bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t,size);
-  memset(_bb_hdr_bits, 0, size*sizeof(uintptr_t));
-}
-
-void GenerateOopMap ::set_bbmark_bit(int bci) {
-  int idx  = bci >> LogBitsPerWord;
-  uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1));
-  _bb_hdr_bits[idx] |= bit;
-}
-
-void GenerateOopMap ::clear_bbmark_bit(int bci) {
-  int idx   = bci >> LogBitsPerWord;
-  uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1));
-  _bb_hdr_bits[idx] &= (~bit);
+  _bb_hdr_bits.clear();
+  _bb_hdr_bits.resize(method()->code_size());
 }
 
 void GenerateOopMap::bb_mark_fct(GenerateOopMap *c, int bci, int *data) {
@@ -952,6 +939,17 @@
   _basic_blocks[bbNo-1]._end_bci = prev_bci;
 
 
+  // Check that the correct number of basicblocks was found
+  if (bbNo !=_bb_count) {
+    if (bbNo < _bb_count) {
+      verify_error("jump into the middle of instruction?");
+      return;
+    } else {
+      verify_error("extra basic blocks - should not happen?");
+      return;
+    }
+  }
+
   _max_monitors = monitor_count;
 
   // Now that we have a bound on the depth of the monitor stack, we can
@@ -985,17 +983,6 @@
   }
 #endif
 
-  // Check that the correct number of basicblocks was found
-  if (bbNo !=_bb_count) {
-    if (bbNo < _bb_count) {
-      verify_error("jump into the middle of instruction?");
-      return;
-    } else {
-      verify_error("extra basic blocks - should not happen?");
-      return;
-    }
-  }
-
   // Mark all alive blocks
   mark_reachable_code();
 }
@@ -1022,21 +1009,22 @@
                                          int new_method_size) {
   assert(new_method_size >= method()->code_size() + delta,
          "new method size is too small");
-  int newWords = binsToHold(new_method_size);
 
-  uintptr_t * new_bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t, newWords);
+  BitMap::bm_word_t* new_bb_hdr_bits =
+    NEW_RESOURCE_ARRAY(BitMap::bm_word_t,
+                       BitMap::word_align_up(new_method_size));
+  _bb_hdr_bits.set_map(new_bb_hdr_bits);
+  _bb_hdr_bits.set_size(new_method_size);
+  _bb_hdr_bits.clear();
 
-  BitMap bb_bits(new_bb_hdr_bits, new_method_size);
-  bb_bits.clear();
 
   for(int k = 0; k < _bb_count; k++) {
     if (_basic_blocks[k]._bci > bci) {
       _basic_blocks[k]._bci     += delta;
       _basic_blocks[k]._end_bci += delta;
     }
-    bb_bits.at_put(_basic_blocks[k]._bci, true);
+    _bb_hdr_bits.at_put(_basic_blocks[k]._bci, true);
   }
-  _bb_hdr_bits = new_bb_hdr_bits ;
 }
 
 //
--- a/hotspot/src/share/vm/oops/generateOopMap.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/generateOopMap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -341,16 +341,22 @@
   BasicBlock *    _basic_blocks;             // Array of basicblock info
   int             _gc_points;
   int             _bb_count;
-  uintptr_t *     _bb_hdr_bits;
+  BitMap          _bb_hdr_bits;
 
   // Basicblocks methods
   void          initialize_bb               ();
   void          mark_bbheaders_and_count_gc_points();
-  bool          is_bb_header                (int bci) const   { return (_bb_hdr_bits[bci >> LogBitsPerWord] & ((uintptr_t)1 << (bci & (BitsPerWord-1)))) != 0; }
+  bool          is_bb_header                (int bci) const   {
+    return _bb_hdr_bits.at(bci);
+  }
   int           gc_points                   () const                          { return _gc_points; }
   int           bb_count                    () const                          { return _bb_count; }
-  void          set_bbmark_bit              (int bci);
-  void          clear_bbmark_bit            (int bci);
+  void          set_bbmark_bit              (int bci) {
+    _bb_hdr_bits.at_put(bci, true);
+  }
+  void          clear_bbmark_bit            (int bci) {
+    _bb_hdr_bits.at_put(bci, false);
+  }
   BasicBlock *  get_basic_block_at          (int bci) const;
   BasicBlock *  get_basic_block_containing  (int bci) const;
   void          interp_bb                   (BasicBlock *bb);
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1515,10 +1515,9 @@
 // closure's do_header() method dicates whether the given closure should be
 // applied to the klass ptr in the object header.
 
-#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)   \
-                                                                        \
-int instanceKlass::oop_oop_iterate##nv_suffix(oop obj,                  \
-                                              OopClosureType* closure) {\
+#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)        \
+                                                                             \
+int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \
   SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik);\
   /* header */                                                          \
   if (closure->do_header()) {                                           \
@@ -1533,6 +1532,26 @@
   return size_helper();                                                 \
 }
 
+#ifndef SERIALGC
+#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \
+                                                                                \
+int instanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj,                \
+                                              OopClosureType* closure) {        \
+  SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik); \
+  /* header */                                                                  \
+  if (closure->do_header()) {                                                   \
+    obj->oop_iterate_header(closure);                                           \
+  }                                                                             \
+  /* instance variables */                                                      \
+  InstanceKlass_OOP_MAP_REVERSE_ITERATE(                                        \
+    obj,                                                                        \
+    SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::ik);\
+    (closure)->do_oop##nv_suffix(p),                                            \
+    assert_is_in_closed_subset)                                                 \
+   return size_helper();                                                        \
+}
+#endif // !SERIALGC
+
 #define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \
                                                                         \
 int instanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj,              \
@@ -1550,9 +1569,13 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
+#ifndef SERIALGC
+ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+#endif // !SERIALGC
 
 void instanceKlass::iterate_static_fields(OopClosure* closure) {
     InstanceKlass_OOP_ITERATE( \
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -655,13 +655,21 @@
     return oop_oop_iterate_v_m(obj, blk, mr);
   }
 
-#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)   \
-  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);        \
-  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,     \
+#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)      \
+  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);           \
+  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,        \
                                       MemRegion mr);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \
+  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   void iterate_static_fields(OopClosure* closure);
   void iterate_static_fields(OopClosure* closure, MemRegion mr);
--- a/hotspot/src/share/vm/oops/instanceRefKlass.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -176,6 +176,11 @@
 }
 
 #define InstanceRefKlass_SPECIALIZED_OOP_ITERATE(T, nv_suffix, contains)        \
+  if (closure->apply_to_weak_ref_discovered_field()) {                          \
+    T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);           \
+    closure->do_oop##nv_suffix(disc_addr);                                      \
+  }                                                                             \
+                                                                                \
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);           \
   oop referent = oopDesc::load_decode_heap_oop(referent_addr);                  \
   if (referent != NULL && contains(referent_addr)) {                            \
@@ -219,6 +224,25 @@
   }                                                                             \
 }
 
+#ifndef SERIALGC
+#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \
+                                                                                \
+int instanceRefKlass::                                                          \
+oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {        \
+  /* Get size before changing pointers */                                       \
+  SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::irk);\
+                                                                                \
+  int size = instanceKlass::oop_oop_iterate_backwards##nv_suffix(obj, closure); \
+                                                                                \
+  if (UseCompressedOops) {                                                      \
+    InstanceRefKlass_SPECIALIZED_OOP_ITERATE(narrowOop, nv_suffix, contains);   \
+  } else {                                                                      \
+    InstanceRefKlass_SPECIALIZED_OOP_ITERATE(oop, nv_suffix, contains);         \
+  }                                                                             \
+}
+#endif // !SERIALGC
+
+
 #define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)      \
                                                                                 \
 int instanceRefKlass::                                                          \
@@ -236,9 +260,13 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
+#ifndef SERIALGC
+ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+#endif // SERIALGC
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
 
 #ifndef SERIALGC
 template <class T>
@@ -423,7 +451,7 @@
   // Verify next field
   oop next = java_lang_ref_Reference::next(obj);
   if (next != NULL) {
-    guarantee(next->is_oop(), "next field verify fa iled");
+    guarantee(next->is_oop(), "next field verify failed");
     guarantee(next->is_instanceRef(), "next field verify failed");
     if (gch != NULL && !gch->is_in_youngest(obj)) {
       // We do a specific remembered set check here since the next field is
--- a/hotspot/src/share/vm/oops/instanceRefKlass.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -72,7 +72,15 @@
   int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)      \
+  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock);
   static void acquire_pending_list_lock(BasicLock *pending_list_basic_lock);
--- a/hotspot/src/share/vm/oops/klass.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/klass.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -134,14 +134,14 @@
   // Every subclass on which vtbl_value is called must include this macro.
   // Delay the installation of the klassKlass pointer until after the
   // the vtable for a new klass has been installed (after the call to new()).
-#define DEFINE_ALLOCATE_PERMANENT(thisKlass) \
+#define DEFINE_ALLOCATE_PERMANENT(thisKlass)                                  \
   void* allocate_permanent(KlassHandle& klass_klass, int size, TRAPS) const { \
-    void* result = new(klass_klass, size, THREAD) thisKlass(); \
-    if (HAS_PENDING_EXCEPTION) return NULL;                    \
-    klassOop new_klass = ((Klass*) result)->as_klassOop();      \
-    OrderAccess::storestore();  \
-    post_new_init_klass(klass_klass, new_klass, size);  \
-    return result;      \
+    void* result = new(klass_klass, size, THREAD) thisKlass();                \
+    if (HAS_PENDING_EXCEPTION) return NULL;                                   \
+    klassOop new_klass = ((Klass*) result)->as_klassOop();                    \
+    OrderAccess::storestore();                                                \
+    post_new_init_klass(klass_klass, new_klass, size);                        \
+    return result;                                                            \
   }
 
   bool null_vtbl() { return *(intptr_t*)this == 0; }
@@ -694,6 +694,14 @@
     return oop_oop_iterate(obj, blk);
   }
 
+#ifndef SERIALGC
+  // In case we don't have a specialized backward scanner use forward
+  // iteration.
+  virtual int oop_oop_iterate_backwards_v(oop obj, OopClosure* blk) {
+    return oop_oop_iterate_v(obj, blk);
+  }
+#endif // !SERIALGC
+
   // Iterates "blk" over all the oops in "obj" (of type "this") within "mr".
   // (I don't see why the _m should be required, but without it the Solaris
   // C++ gives warning messages about overridings of the "oop_oop_iterate"
@@ -722,7 +730,19 @@
   }
 
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL)
-  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(Klass_OOP_OOP_ITERATE_DECL)
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)      \
+  virtual int oop_oop_iterate_backwards##nv_suffix(oop obj,                  \
+                                                   OopClosureType* blk) {    \
+    /* Default implementation reverts to general version. */                 \
+    return oop_oop_iterate_backwards_v(obj, blk);                            \
+  }
+
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   virtual void array_klasses_do(void f(klassOop k)) {}
   virtual void with_array_klasses_do(void f(klassOop k));
--- a/hotspot/src/share/vm/oops/markOop.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/markOop.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -222,11 +222,7 @@
   static markOop INFLATING() { return (markOop) 0; }    // inflate-in-progress
 
   // Should this header be preserved during GC?
-  bool must_be_preserved(oop obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (!is_unlocked() || !has_no_hash());
-    return must_be_preserved_with_bias(obj_containing_mark);
-  }
+  inline bool must_be_preserved(oop obj_containing_mark) const;
   inline bool must_be_preserved_with_bias(oop obj_containing_mark) const;
 
   // Should this header (including its age bits) be preserved in the
@@ -246,22 +242,14 @@
   // observation is that promotion failures are quite rare and
   // reducing the number of mark words preserved during them isn't a
   // high priority.
-  bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (this != prototype());
-    return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark);
-  }
+  inline bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const;
   inline bool must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const;
 
   // Should this header be preserved during a scavenge where CMS is
   // the old generation?
   // (This is basically the same body as must_be_preserved_for_promotion_failure(),
   // but takes the klassOop as argument instead)
-  bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (this != prototype());
-    return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark);
-  }
+  inline bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const;
   inline bool must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const;
 
   // WARNING: The following routines are used EXCLUSIVELY by
--- a/hotspot/src/share/vm/oops/markOop.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/markOop.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -39,6 +39,12 @@
   return (!is_unlocked() || !has_no_hash());
 }
 
+inline bool markOopDesc::must_be_preserved(oop obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (!is_unlocked() || !has_no_hash());
+  return must_be_preserved_with_bias(obj_containing_mark);
+}
+
 // Should this header (including its age bits) be preserved in the
 // case of a promotion failure during scavenge?
 inline bool markOopDesc::must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const {
@@ -59,6 +65,13 @@
   return (this != prototype());
 }
 
+inline bool markOopDesc::must_be_preserved_for_promotion_failure(oop obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (this != prototype());
+  return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark);
+}
+
+
 // Should this header (including its age bits) be preserved in the
 // case of a scavenge in which CMS is the old generation?
 inline bool markOopDesc::must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
@@ -70,6 +83,11 @@
   }
   return (this != prototype());
 }
+inline bool markOopDesc::must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (this != prototype());
+  return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark);
+}
 
 inline markOop markOopDesc::prototype_for_object(oop obj) {
 #ifdef ASSERT
--- a/hotspot/src/share/vm/oops/objArrayKlass.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -86,14 +86,18 @@
 
   const size_t word_len = objArrayOopDesc::array_size(length);
 
-  // For performance reasons, we assume we are using a card marking write
-  // barrier. The assert will fail if this is not the case.
   BarrierSet* bs = Universe::heap()->barrier_set();
+  // For performance reasons, we assume we are that the write barrier we
+  // are using has optimized modes for arrays of references.  At least one
+  // of the asserts below will fail if this is not the case.
   assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
+  assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");
 
+  MemRegion dst_mr = MemRegion((HeapWord*)dst, word_len);
   if (s == d) {
     // since source and destination are equal we do not need conversion checks.
     assert(length > 0, "sanity check");
+    bs->write_ref_array_pre(dst_mr);
     Copy::conjoint_oops_atomic(src, dst, length);
   } else {
     // We have to make sure all elements conform to the destination array
@@ -101,6 +105,7 @@
     klassOop stype = objArrayKlass::cast(s->klass())->element_klass();
     if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) {
       // elements are guaranteed to be subtypes, so no check necessary
+      bs->write_ref_array_pre(dst_mr);
       Copy::conjoint_oops_atomic(src, dst, length);
     } else {
       // slow case: need individual subtype checks
@@ -110,8 +115,13 @@
       for (T* p = dst; from < end; from++, p++) {
         // XXX this is going to be slow.
         T element = *from;
-        if (oopDesc::is_null(element) ||
-            Klass::cast(oopDesc::decode_heap_oop_not_null(element)->klass())->is_subtype_of(bound)) {
+        // even slower now
+        bool element_is_null = oopDesc::is_null(element);
+        oop new_val = element_is_null ? oop(NULL)
+                                      : oopDesc::decode_heap_oop_not_null(element);
+        if (element_is_null ||
+            Klass::cast((new_val->klass()))->is_subtype_of(bound)) {
+          bs->write_ref_field_pre(p, new_val);
           *p = *from;
         } else {
           // We must do a barrier to cover the partial copy.
@@ -401,11 +411,11 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
 
 int objArrayKlass::oop_adjust_pointers(oop obj) {
   assert(obj->is_objArray(), "obj must be obj array");
@@ -465,8 +475,8 @@
     assert(Universe::is_bootstrapping(), "partial objArray only at startup");
     return JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC;
   }
-  // Recurse down the element list
-  jint element_flags = Klass::cast(element_klass())->compute_modifier_flags(CHECK_0);
+  // Return the flags of the bottom element type.
+  jint element_flags = Klass::cast(bottom_klass())->compute_modifier_flags(CHECK_0);
 
   return (element_flags & (JVM_ACC_PUBLIC | JVM_ACC_PRIVATE | JVM_ACC_PROTECTED))
                         | (JVM_ACC_ABSTRACT | JVM_ACC_FINAL);
--- a/hotspot/src/share/vm/oops/objArrayKlass.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/objArrayKlass.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -111,7 +111,7 @@
                                      int start, int end);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
 
   // JVM support
   jint compute_modifier_flags(TRAPS) const;
--- a/hotspot/src/share/vm/oops/objArrayOop.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/objArrayOop.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -33,4 +33,4 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DEFN)
--- a/hotspot/src/share/vm/oops/objArrayOop.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/objArrayOop.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -29,6 +29,8 @@
   friend class objArrayKlass;
   friend class Runtime1;
   friend class psPromotionManager;
+  friend class CSMarkOopClosure;
+  friend class G1ParScanPartialArrayClosure;
 
   template <class T> T* obj_at_addr(int index) const {
     assert(is_within_bounds(index), "index out of bounds");
@@ -88,5 +90,5 @@
   int oop_iterate_range(OopClosureType* blk, int start, int end);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DECL)
 };
--- a/hotspot/src/share/vm/oops/oop.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/oop.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -363,12 +363,21 @@
   static void set_bs(BarrierSet* bs) { _bs = bs; }
 
   // iterators, returns size of object
-#define OOP_ITERATE_DECL(OopClosureType, nv_suffix)                             \
+#define OOP_ITERATE_DECL(OopClosureType, nv_suffix)                      \
   int oop_iterate(OopClosureType* blk);                                  \
   int oop_iterate(OopClosureType* blk, MemRegion mr);  // Only in mr.
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+
+#define OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)            \
+  int oop_iterate_backwards(OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DECL)
+#endif
 
   void oop_iterate_header(OopClosure* blk);
   void oop_iterate_header(OopClosure* blk, MemRegion mr);
--- a/hotspot/src/share/vm/oops/oop.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -395,10 +395,11 @@
       s = (int)((size_t)round_to(size_in_bytes, MinObjAlignmentInBytes) /
         HeapWordSize);
 
-      // UseParNewGC can change the length field of an "old copy" of an object
-      // array in the young gen so it indicates the stealable portion of
-      // an already copied array. This will cause the first disjunct below
-      // to fail if the sizes are computed across such a concurrent change.
+      // UseParNewGC, UseParallelGC and UseG1GC can change the length field
+      // of an "old copy" of an object array in the young gen so it indicates
+      // the grey portion of an already copied array. This will cause the first
+      // disjunct below to fail if the two comparands are computed across such
+      // a concurrent change.
       // UseParNewGC also runs with promotion labs (which look like int
       // filler arrays) which are subject to changing their declared size
       // when finally retiring a PLAB; this also can cause the first disjunct
@@ -408,13 +409,11 @@
       //     is_objArray() && is_forwarded()   // covers first scenario above
       //  || is_typeArray()                    // covers second scenario above
       // If and when UseParallelGC uses the same obj array oop stealing/chunking
-      // technique, or when G1 is integrated (and currently uses this array chunking
-      // technique) we will need to suitably modify the assertion.
+      // technique, we will need to suitably modify the assertion.
       assert((s == klass->oop_size(this)) ||
-             (((UseParNewGC || UseParallelGC) &&
-              Universe::heap()->is_gc_active()) &&
-              (is_typeArray() ||
-               (is_objArray() && is_forwarded()))),
+             (Universe::heap()->is_gc_active() &&
+              ((is_typeArray() && UseParNewGC) ||
+               (is_objArray()  && is_forwarded() && (UseParNewGC || UseParallelGC || UseG1GC)))),
              "wrong array object size");
     } else {
       // Must be zero, so bite the bullet and take the virtual call.
@@ -441,16 +440,22 @@
   oopDesc::bs()->write_ref_field(p, v);
 }
 
+inline void update_barrier_set_pre(void* p, oop v) {
+  oopDesc::bs()->write_ref_field_pre(p, v);
+}
+
 template <class T> inline void oop_store(T* p, oop v) {
   if (always_do_update_barrier) {
     oop_store((volatile T*)p, v);
   } else {
+    update_barrier_set_pre(p, v);
     oopDesc::encode_store_heap_oop(p, v);
     update_barrier_set(p, v);
   }
 }
 
 template <class T> inline void oop_store(volatile T* p, oop v) {
+  update_barrier_set_pre((void*)p, v);
   // Used by release_obj_field_put, so use release_store_ptr.
   oopDesc::release_encode_store_heap_oop(p, v);
   update_barrier_set((void*)p, v);
@@ -698,8 +703,19 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DEFN)
 
+#ifndef SERIALGC
+#define OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
+                                                                           \
+inline int oopDesc::oop_iterate_backwards(OopClosureType* blk) {           \
+  SpecializationStats::record_call();                                      \
+  return blueprint()->oop_oop_iterate_backwards##nv_suffix(this, blk);     \
+}
+
+ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DEFN)
+#endif // !SERIALGC
 
 inline bool oopDesc::is_shared() const {
   return CompactingPermGenGen::is_shared(this);
--- a/hotspot/src/share/vm/opto/addnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/addnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -156,7 +156,8 @@
   if( add1_op == this_op && !con_right ) {
     Node *a12 = add1->in(2);
     const Type *t12 = phase->type( a12 );
-    if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) ) {
+    if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) &&
+       !(add1->in(1)->is_Phi() && add1->in(1)->as_Phi()->is_tripcount()) ) {
       assert(add1->in(1) != this, "dead loop in AddNode::Ideal");
       add2 = add1->clone();
       add2->set_req(2, in(2));
@@ -173,7 +174,8 @@
   if( add2_op == this_op && !con_left ) {
     Node *a22 = add2->in(2);
     const Type *t22 = phase->type( a22 );
-    if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) ) {
+    if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) &&
+       !(add2->in(1)->is_Phi() && add2->in(1)->as_Phi()->is_tripcount()) ) {
       assert(add2->in(1) != this, "dead loop in AddNode::Ideal");
       Node *addx = add2->clone();
       addx->set_req(1, in(1));
@@ -225,34 +227,63 @@
 //=============================================================================
 //------------------------------Idealize---------------------------------------
 Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  int op1 = in(1)->Opcode();
-  int op2 = in(2)->Opcode();
+  Node* in1 = in(1);
+  Node* in2 = in(2);
+  int op1 = in1->Opcode();
+  int op2 = in2->Opcode();
   // Fold (con1-x)+con2 into (con1+con2)-x
+  if ( op1 == Op_AddI && op2 == Op_SubI ) {
+    // Swap edges to try optimizations below
+    in1 = in2;
+    in2 = in(1);
+    op1 = op2;
+    op2 = in2->Opcode();
+  }
   if( op1 == Op_SubI ) {
-    const Type *t_sub1 = phase->type( in(1)->in(1) );
-    const Type *t_2    = phase->type( in(2)        );
+    const Type *t_sub1 = phase->type( in1->in(1) );
+    const Type *t_2    = phase->type( in2        );
     if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
       return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ),
-                              in(1)->in(2) );
+                              in1->in(2) );
     // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
     if( op2 == Op_SubI ) {
       // Check for dead cycle: d = (a-b)+(c-d)
-      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+      assert( in1->in(2) != this && in2->in(2) != this,
               "dead loop in AddINode::Ideal" );
       Node *sub  = new (phase->C, 3) SubINode(NULL, NULL);
-      sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in(1)->in(1), in(2)->in(1) ) ));
-      sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in(1)->in(2), in(2)->in(2) ) ));
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in1->in(1), in2->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in1->in(2), in2->in(2) ) ));
       return sub;
     }
+    // Convert "(a-b)+(b+c)" into "(a+c)"
+    if( op2 == Op_AddI && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) AddINode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c+b)" into "(a+c)"
+    if( op2 == Op_AddI && in1->in(2) == in2->in(2) ) {
+      assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) AddINode(in1->in(1), in2->in(1));
+    }
+    // Convert "(a-b)+(b-c)" into "(a-c)"
+    if( op2 == Op_SubI && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) SubINode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c-a)" into "(c-b)"
+    if( op2 == Op_SubI && in1->in(1) == in2->in(2) ) {
+      assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) SubINode(in2->in(1), in1->in(2));
+    }
   }
 
   // Convert "x+(0-y)" into "(x-y)"
-  if( op2 == Op_SubI && phase->type(in(2)->in(1)) == TypeInt::ZERO )
-    return new (phase->C, 3) SubINode(in(1), in(2)->in(2) );
+  if( op2 == Op_SubI && phase->type(in2->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode(in1, in2->in(2) );
 
   // Convert "(0-y)+x" into "(x-y)"
-  if( op1 == Op_SubI && phase->type(in(1)->in(1)) == TypeInt::ZERO )
-    return new (phase->C, 3) SubINode( in(2), in(1)->in(2) );
+  if( op1 == Op_SubI && phase->type(in1->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode( in2, in1->in(2) );
 
   // Convert (x>>>z)+y into (x+(y<<z))>>>z for small constant z and y.
   // Helps with array allocation math constant folding
@@ -266,15 +297,15 @@
   // Have not observed cases where type information exists to support
   // positive y and (x <= -(y << z))
   if( op1 == Op_URShiftI && op2 == Op_ConI &&
-      in(1)->in(2)->Opcode() == Op_ConI ) {
-    jint z = phase->type( in(1)->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
-    jint y = phase->type( in(2) )->is_int()->get_con();
+      in1->in(2)->Opcode() == Op_ConI ) {
+    jint z = phase->type( in1->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
+    jint y = phase->type( in2 )->is_int()->get_con();
 
     if( z < 5 && -5 < y && y < 0 ) {
-      const Type *t_in11 = phase->type(in(1)->in(1));
+      const Type *t_in11 = phase->type(in1->in(1));
       if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) {
-        Node *a = phase->transform( new (phase->C, 3) AddINode( in(1)->in(1), phase->intcon(y<<z) ) );
-        return new (phase->C, 3) URShiftINode( a, in(1)->in(2) );
+        Node *a = phase->transform( new (phase->C, 3) AddINode( in1->in(1), phase->intcon(y<<z) ) );
+        return new (phase->C, 3) URShiftINode( a, in1->in(2) );
       }
     }
   }
@@ -328,39 +359,73 @@
 //=============================================================================
 //------------------------------Idealize---------------------------------------
 Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  int op1 = in(1)->Opcode();
-  int op2 = in(2)->Opcode();
+  Node* in1 = in(1);
+  Node* in2 = in(2);
+  int op1 = in1->Opcode();
+  int op2 = in2->Opcode();
+  // Fold (con1-x)+con2 into (con1+con2)-x
+  if ( op1 == Op_AddL && op2 == Op_SubL ) {
+    // Swap edges to try optimizations below
+    in1 = in2;
+    in2 = in(1);
+    op1 = op2;
+    op2 = in2->Opcode();
+  }
   // Fold (con1-x)+con2 into (con1+con2)-x
   if( op1 == Op_SubL ) {
-    const Type *t_sub1 = phase->type( in(1)->in(1) );
-    const Type *t_2    = phase->type( in(2)        );
+    const Type *t_sub1 = phase->type( in1->in(1) );
+    const Type *t_2    = phase->type( in2        );
     if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
       return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ),
-                              in(1)->in(2) );
+                              in1->in(2) );
     // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
     if( op2 == Op_SubL ) {
       // Check for dead cycle: d = (a-b)+(c-d)
-      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+      assert( in1->in(2) != this && in2->in(2) != this,
               "dead loop in AddLNode::Ideal" );
       Node *sub  = new (phase->C, 3) SubLNode(NULL, NULL);
-      sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(1), in(2)->in(1) ) ));
-      sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(2), in(2)->in(2) ) ));
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in1->in(1), in2->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in1->in(2), in2->in(2) ) ));
       return sub;
     }
+    // Convert "(a-b)+(b+c)" into "(a+c)"
+    if( op2 == Op_AddL && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) AddLNode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c+b)" into "(a+c)"
+    if( op2 == Op_AddL && in1->in(2) == in2->in(2) ) {
+      assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) AddLNode(in1->in(1), in2->in(1));
+    }
+    // Convert "(a-b)+(b-c)" into "(a-c)"
+    if( op2 == Op_SubL && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) SubLNode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c-a)" into "(c-b)"
+    if( op2 == Op_SubL && in1->in(1) == in1->in(2) ) {
+      assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) SubLNode(in2->in(1), in1->in(2));
+    }
   }
 
   // Convert "x+(0-y)" into "(x-y)"
-  if( op2 == Op_SubL && phase->type(in(2)->in(1)) == TypeLong::ZERO )
-    return new (phase->C, 3) SubLNode(in(1), in(2)->in(2) );
+  if( op2 == Op_SubL && phase->type(in2->in(1)) == TypeLong::ZERO )
+    return new (phase->C, 3) SubLNode( in1, in2->in(2) );
+
+  // Convert "(0-y)+x" into "(x-y)"
+  if( op1 == Op_SubL && phase->type(in1->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubLNode( in2, in1->in(2) );
 
   // Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)"
   // into "(X<<1)+Y" and let shift-folding happen.
   if( op2 == Op_AddL &&
-      in(2)->in(1) == in(1) &&
+      in2->in(1) == in1 &&
       op1 != Op_ConL &&
       0 ) {
-    Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in(1),phase->intcon(1)));
-    return new (phase->C, 3) AddLNode(shift,in(2)->in(2));
+    Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in1,phase->intcon(1)));
+    return new (phase->C, 3) AddLNode(shift,in2->in(2));
   }
 
   return AddNode::Ideal(phase, can_reshape);
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -25,19 +25,6 @@
 #include "incls/_precompiled.incl"
 #include "incls/_bytecodeInfo.cpp.incl"
 
-// These variables are declared in parse1.cpp
-extern int  explicit_null_checks_inserted;
-extern int  explicit_null_checks_elided;
-extern int  explicit_null_checks_inserted_old;
-extern int  explicit_null_checks_elided_old;
-extern int  nodes_created_old;
-extern int  nodes_created;
-extern int  methods_parsed_old;
-extern int  methods_parsed;
-extern int  methods_seen;
-extern int  methods_seen_old;
-
-
 //=============================================================================
 //------------------------------InlineTree-------------------------------------
 InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
@@ -517,27 +504,3 @@
   }
   return iltp;
 }
-
-// ----------------------------------------------------------------------------
-#ifndef PRODUCT
-
-static void per_method_stats() {
-  // Compute difference between this method's cumulative totals and old totals
-  int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
-  int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
-
-  // Print differences
-  if( explicit_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
-  if( elided_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
-
-  // Store the current cumulative totals
-  nodes_created_old = nodes_created;
-  methods_parsed_old = methods_parsed;
-  methods_seen_old = methods_seen;
-  explicit_null_checks_inserted_old = explicit_null_checks_inserted;
-  explicit_null_checks_elided_old = explicit_null_checks_elided;
-}
-
-#endif
--- a/hotspot/src/share/vm/opto/callnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1034,6 +1034,39 @@
 //=============================================================================
 uint AllocateArrayNode::size_of() const { return sizeof(*this); }
 
+// Retrieve the length from the AllocateArrayNode. Narrow the type with a
+// CastII, if appropriate.  If we are not allowed to create new nodes, and
+// a CastII is appropriate, return NULL.
+Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) {
+  Node *length = in(AllocateNode::ALength);
+  assert(length != NULL, "length is not null");
+
+  const TypeInt* length_type = phase->find_int_type(length);
+  const TypeAryPtr* ary_type = oop_type->isa_aryptr();
+
+  if (ary_type != NULL && length_type != NULL) {
+    const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type);
+    if (narrow_length_type != length_type) {
+      // Assert one of:
+      //   - the narrow_length is 0
+      //   - the narrow_length is not wider than length
+      assert(narrow_length_type == TypeInt::ZERO ||
+             (narrow_length_type->_hi <= length_type->_hi &&
+              narrow_length_type->_lo >= length_type->_lo),
+             "narrow type must be narrower than length type");
+
+      // Return NULL if new nodes are not allowed
+      if (!allow_new_nodes) return NULL;
+      // Create a cast which is control dependent on the initialization to
+      // propagate the fact that the array length must be positive.
+      length = new (phase->C, 2) CastIINode(length, narrow_length_type);
+      length->set_req(0, initialization()->proj_out(0));
+    }
+  }
+
+  return length;
+}
+
 //=============================================================================
 uint LockNode::size_of() const { return sizeof(*this); }
 
--- a/hotspot/src/share/vm/opto/callnode.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/callnode.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -755,6 +755,15 @@
   virtual int Opcode() const;
   virtual uint size_of() const; // Size is bigger
 
+  // Dig the length operand out of a array allocation site.
+  Node* Ideal_length() {
+    return in(AllocateNode::ALength);
+  }
+
+  // Dig the length operand out of a array allocation site and narrow the
+  // type with a CastII, if necesssary
+  Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true);
+
   // Pattern-match a possible usage of AllocateArrayNode.
   // Return null if no allocation is recognized.
   static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) {
@@ -762,12 +771,6 @@
     return (allo == NULL || !allo->is_AllocateArray())
            ? NULL : allo->as_AllocateArray();
   }
-
-  // Dig the length operand out of a (possible) array allocation site.
-  static Node* Ideal_length(Node* ptr, PhaseTransform* phase) {
-    AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase);
-    return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength);
-  }
 };
 
 //------------------------------AbstractLockNode-----------------------------------
--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1665,7 +1665,11 @@
             // compress paths and change unreachable cycles to TOP
             // If not, we can update the input infinitely along a MergeMem cycle
             // Equivalent code is in MemNode::Ideal_common
-            Node         *m  = phase->transform(n);
+            Node *m  = phase->transform(n);
+            if (outcnt() == 0) {  // Above transform() may kill us!
+              progress = phase->C->top();
+              break;
+            }
             // If tranformed to a MergeMem, get the desired slice
             // Otherwise the returned node represents memory for every slice
             Node *new_mem = (m->is_MergeMem()) ?
@@ -1765,9 +1769,60 @@
     }
   }
 
+#ifdef _LP64
+  // Push DecodeN down through phi.
+  // The rest of phi graph will transform by split EncodeP node though phis up.
+  if (UseCompressedOops && can_reshape && progress == NULL) {
+    bool may_push = true;
+    bool has_decodeN = false;
+    Node* in_decodeN = NULL;
+    for (uint i=1; i<req(); ++i) {// For all paths in
+      Node *ii = in(i);
+      if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
+        has_decodeN = true;
+        in_decodeN = ii->in(1);
+      } else if (!ii->is_Phi()) {
+        may_push = false;
+      }
+    }
+
+    if (has_decodeN && may_push) {
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      // Note: in_decodeN is used only to define the type of new phi here.
+      PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
+      uint orig_cnt = req();
+      for (uint i=1; i<req(); ++i) {// For all paths in
+        Node *ii = in(i);
+        Node* new_ii = NULL;
+        if (ii->is_DecodeN()) {
+          assert(ii->bottom_type() == bottom_type(), "sanity");
+          new_ii = ii->in(1);
+        } else {
+          assert(ii->is_Phi(), "sanity");
+          if (ii->as_Phi() == this) {
+            new_ii = new_phi;
+          } else {
+            new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type());
+            igvn->register_new_node_with_optimizer(new_ii);
+          }
+        }
+        new_phi->set_req(i, new_ii);
+      }
+      igvn->register_new_node_with_optimizer(new_phi, this);
+      progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type());
+    }
+  }
+#endif
+
   return progress;              // Return any progress
 }
 
+//------------------------------is_tripcount-----------------------------------
+bool PhiNode::is_tripcount() const {
+  return (in(0) != NULL && in(0)->is_CountedLoop() &&
+          in(0)->as_CountedLoop()->phi() == this);
+}
+
 //------------------------------out_RegMask------------------------------------
 const RegMask &PhiNode::in_RegMask(uint i) const {
   return i ? out_RegMask() : RegMask::Empty;
@@ -1783,9 +1838,7 @@
 #ifndef PRODUCT
 void PhiNode::dump_spec(outputStream *st) const {
   TypeNode::dump_spec(st);
-  if (in(0) != NULL &&
-      in(0)->is_CountedLoop() &&
-      in(0)->as_CountedLoop()->phi() == this) {
+  if (is_tripcount()) {
     st->print(" #tripcount");
   }
 }
--- a/hotspot/src/share/vm/opto/cfgnode.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/cfgnode.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -162,6 +162,8 @@
     return NULL;  // not a copy!
   }
 
+  bool is_tripcount() const;
+
   // Determine a unique non-trivial input, if any.
   // Ignore casts if it helps.  Return NULL on failure.
   Node* unique_input(PhaseTransform *phase);
--- a/hotspot/src/share/vm/opto/compile.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/compile.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -467,6 +467,7 @@
     }
   }
   set_print_assembly(print_opto_assembly);
+  set_parsed_irreducible_loop(false);
 #endif
 
   if (ProfileTraps) {
@@ -550,6 +551,8 @@
       rethrow_exceptions(kit.transfer_exceptions_into_jvms());
     }
 
+    print_method("Before RemoveUseless");
+
     // Remove clutter produced by parsing.
     if (!failing()) {
       ResourceMark rm;
@@ -615,8 +618,6 @@
   if (failing())  return;
   NOT_PRODUCT( verify_graph_edges(); )
 
-  print_method("Before Matching");
-
 #ifndef PRODUCT
   if (PrintIdeal) {
     ttyLocker ttyl;  // keep the following output all in one block
@@ -720,6 +721,7 @@
   TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
   TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
   set_print_assembly(PrintFrameConverterAssembly);
+  set_parsed_irreducible_loop(false);
 #endif
   CompileWrapper cw(this);
   Init(/*AliasLevel=*/ 0);
@@ -2073,6 +2075,44 @@
   }
 
 #ifdef _LP64
+  case Op_CastPP:
+    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
+      Compile* C = Compile::current();
+      Node* in1 = n->in(1);
+      const Type* t = n->bottom_type();
+      Node* new_in1 = in1->clone();
+      new_in1->as_DecodeN()->set_type(t);
+
+      if (!Matcher::clone_shift_expressions) {
+        //
+        // x86, ARM and friends can handle 2 adds in addressing mode
+        // and Matcher can fold a DecodeN node into address by using
+        // a narrow oop directly and do implicit NULL check in address:
+        //
+        // [R12 + narrow_oop_reg<<3 + offset]
+        // NullCheck narrow_oop_reg
+        //
+        // On other platforms (Sparc) we have to keep new DecodeN node and
+        // use it to do implicit NULL check in address:
+        //
+        // decode_not_null narrow_oop_reg, base_reg
+        // [base_reg + offset]
+        // NullCheck base_reg
+        //
+        // Pin the new DecodeN node to non-null path on these patforms (Sparc)
+        // to keep the information to which NULL check the new DecodeN node
+        // corresponds to use it as value in implicit_null_check().
+        //
+        new_in1->set_req(0, n->in(0));
+      }
+
+      n->subsume_by(new_in1);
+      if (in1->outcnt() == 0) {
+        in1->disconnect_inputs(NULL);
+      }
+    }
+    break;
+
   case Op_CmpP:
     // Do this transformation here to preserve CmpPNode::sub() and
     // other TypePtr related Ideal optimizations (for example, ptr nullness).
@@ -2092,24 +2132,44 @@
       } else if (in2->Opcode() == Op_ConP) {
         const Type* t = in2->bottom_type();
         if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
-          if (Matcher::clone_shift_expressions) {
-            // x86, ARM and friends can handle 2 adds in addressing mode.
-            // Decode a narrow oop and do implicit NULL check in address
-            // [R12 + narrow_oop_reg<<3 + offset]
-            new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
-          } else {
-            // Don't replace CmpP(o ,null) if 'o' is used in AddP
-            // to generate implicit NULL check on Sparc where
-            // narrow oops can't be used in address.
-            uint i = 0;
-            for (; i < in1->outcnt(); i++) {
-              if (in1->raw_out(i)->is_AddP())
-                break;
-            }
-            if (i >= in1->outcnt()) {
-              new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
-            }
-          }
+          new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+          //
+          // This transformation together with CastPP transformation above
+          // will generated code for implicit NULL checks for compressed oops.
+          //
+          // The original code after Optimize()
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    decode narrow_oop_reg, base_reg
+          //    CmpP base_reg, NULL
+          //    CastPP base_reg // NotNull
+          //    Load [base_reg + offset], val_reg
+          //
+          // after these transformations will be
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    CmpN narrow_oop_reg, NULL
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //
+          // and the uncommon path (== NULL) will use narrow_oop_reg directly
+          // since narrow oops can be used in debug info now (see the code in
+          // final_graph_reshaping_walk()).
+          //
+          // At the end the code will be matched to
+          // on x86:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
+          //    NullCheck narrow_oop_reg
+          //
+          // and on sparc:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //    NullCheck base_reg
+          //
         } else if (t->isa_oopptr()) {
           new_in2 = ConNode::make(C, t->make_narrowoop());
         }
@@ -2126,6 +2186,49 @@
       }
     }
     break;
+
+  case Op_DecodeN:
+    assert(!n->in(1)->is_EncodeP(), "should be optimized out");
+    break;
+
+  case Op_EncodeP: {
+    Node* in1 = n->in(1);
+    if (in1->is_DecodeN()) {
+      n->subsume_by(in1->in(1));
+    } else if (in1->Opcode() == Op_ConP) {
+      Compile* C = Compile::current();
+      const Type* t = in1->bottom_type();
+      if (t == TypePtr::NULL_PTR) {
+        n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
+      } else if (t->isa_oopptr()) {
+        n->subsume_by(ConNode::make(C, t->make_narrowoop()));
+      }
+    }
+    if (in1->outcnt() == 0) {
+      in1->disconnect_inputs(NULL);
+    }
+    break;
+  }
+
+  case Op_Phi:
+    if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
+      // The EncodeP optimization may create Phi with the same edges
+      // for all paths. It is not handled well by Register Allocator.
+      Node* unique_in = n->in(1);
+      assert(unique_in != NULL, "");
+      uint cnt = n->req();
+      for (uint i = 2; i < cnt; i++) {
+        Node* m = n->in(i);
+        assert(m != NULL, "");
+        if (unique_in != m)
+          unique_in = NULL;
+      }
+      if (unique_in != NULL) {
+        n->subsume_by(unique_in);
+      }
+    }
+    break;
+
 #endif
 
   case Op_ModI:
--- a/hotspot/src/share/vm/opto/compile.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/compile.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -160,6 +160,7 @@
   bool                  _print_assembly;        // True if we should dump assembly code for this compilation
 #ifndef PRODUCT
   bool                  _trace_opto_output;
+  bool                  _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
 #endif
 
   // Compilation environment.
@@ -319,6 +320,8 @@
   }
 #ifndef PRODUCT
   bool          trace_opto_output() const       { return _trace_opto_output; }
+  bool              parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
+  void          set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
 #endif
 
   void begin_method() {
--- a/hotspot/src/share/vm/opto/connode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/connode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -433,8 +433,8 @@
 // If not converting int->oop, throw away cast after constant propagation
 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
   const Type *t = ccp->type(in(1));
-  if (!t->isa_oop_ptr()) {
-    return NULL;                // do not transform raw pointers
+  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
+    return NULL; // do not transform raw pointers or narrow oops
   }
   return ConstraintCastNode::Ideal_DU_postCCP(ccp);
 }
--- a/hotspot/src/share/vm/opto/divnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/divnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -110,10 +110,13 @@
     } else if( dividend->Opcode() == Op_AndI ) {
       // An AND mask of sufficient size clears the low bits and
       // I can avoid rounding.
-      const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int();
-      if( andconi && andconi->is_con(-d) ) {
-        dividend = dividend->in(1);
-        needs_rounding = false;
+      const TypeInt *andconi_t = phase->type( dividend->in(2) )->isa_int();
+      if( andconi_t && andconi_t->is_con() ) {
+        jint andconi = andconi_t->get_con();
+        if( andconi < 0 && is_power_of_2(-andconi) && (-andconi) >= d ) {
+          dividend = dividend->in(1);
+          needs_rounding = false;
+        }
       }
     }
 
@@ -316,10 +319,13 @@
     } else if( dividend->Opcode() == Op_AndL ) {
       // An AND mask of sufficient size clears the low bits and
       // I can avoid rounding.
-      const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long();
-      if( andconl && andconl->is_con(-d)) {
-        dividend = dividend->in(1);
-        needs_rounding = false;
+      const TypeLong *andconl_t = phase->type( dividend->in(2) )->isa_long();
+      if( andconl_t && andconl_t->is_con() ) {
+        jlong andconl = andconl_t->get_con();
+        if( andconl < 0 && is_power_of_2_long(-andconl) && (-andconl) >= d ) {
+          dividend = dividend->in(1);
+          needs_rounding = false;
+        }
       }
     }
 
@@ -704,11 +710,18 @@
   if( t2 == TypeD::ONE )
     return t1;
 
-  // If divisor is a constant and not zero, divide them numbers
-  if( t1->base() == Type::DoubleCon &&
-      t2->base() == Type::DoubleCon &&
-      t2->getd() != 0.0 ) // could be negative zero
-    return TypeD::make( t1->getd()/t2->getd() );
+#if defined(IA32)
+  if (!phase->C->method()->is_strict())
+    // Can't trust native compilers to properly fold strict double
+    // division with round-to-zero on this platform.
+#endif
+    {
+      // If divisor is a constant and not zero, divide them numbers
+      if( t1->base() == Type::DoubleCon &&
+          t2->base() == Type::DoubleCon &&
+          t2->getd() != 0.0 ) // could be negative zero
+        return TypeD::make( t1->getd()/t2->getd() );
+    }
 
   // If the dividend is a constant zero
   // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
--- a/hotspot/src/share/vm/opto/doCall.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/doCall.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -795,7 +795,7 @@
 
     ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
     if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
-        (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
+        (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) {
       // ikl is a same or better type than the original actual_receiver,
       // e.g. static receiver from bytecodes.
       actual_receiver = ikl;
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -587,7 +587,7 @@
 #ifdef ASSERT
   _bci    = kit->bci();
   Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
   _block  = block;
 #endif
 }
@@ -596,7 +596,7 @@
 #ifdef ASSERT
   assert(kit->bci() == _bci, "bci must not shift");
   Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
   assert(block == _block,    "block must not shift");
 #endif
   kit->set_map(_map);
@@ -1049,10 +1049,19 @@
 //-------------------------load_array_length-----------------------------------
 Node* GraphKit::load_array_length(Node* array) {
   // Special-case a fresh allocation to avoid building nodes:
-  Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn);
-  if (alen != NULL)  return alen;
-  Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
-  return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+  AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn);
+  Node *alen;
+  if (alloc == NULL) {
+    Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
+    alen = _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+  } else {
+    alen = alloc->Ideal_length();
+    Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_aryptr(), &_gvn);
+    if (ccast != alen) {
+      alen = _gvn.transform(ccast);
+    }
+  }
+  return alen;
 }
 
 //------------------------------do_null_check----------------------------------
@@ -1180,6 +1189,12 @@
   else
     reason = Deoptimization::Reason_div0_check;
 
+  // %%% Since Reason_unhandled is not recorded on a per-bytecode basis,
+  // ciMethodData::has_trap_at will return a conservative -1 if any
+  // must-be-null assertion has failed.  This could cause performance
+  // problems for a method after its first do_null_assert failure.
+  // Consider using 'Reason_class_check' instead?
+
   // To cause an implicit null check, we set the not-null probability
   // to the maximum (PROB_MAX).  For an explicit check the probablity
   // is set to a smaller value.
@@ -1367,6 +1382,10 @@
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+        g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      break;
 
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
@@ -1391,6 +1410,10 @@
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+        g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
+      break;
 
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
@@ -2833,20 +2856,18 @@
   assert(just_allocated_object(control()) == javaoop, "just allocated");
 
 #ifdef ASSERT
-  { // Verify that the AllocateNode::Ideal_foo recognizers work:
-    Node* kn = alloc->in(AllocateNode::KlassNode);
-    Node* ln = alloc->in(AllocateNode::ALength);
-    assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn,
-           "Ideal_klass works");
-    assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn,
-           "Ideal_klass works");
+  { // Verify that the AllocateNode::Ideal_allocation recognizers work:
+    assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc,
+           "Ideal_allocation works");
+    assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc,
+           "Ideal_allocation works");
     if (alloc->is_AllocateArray()) {
-      assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln,
-             "Ideal_length works");
-      assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln,
-             "Ideal_length works");
+      assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(),
+             "Ideal_allocation works");
+      assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(),
+             "Ideal_allocation works");
     } else {
-      assert(ln->is_top(), "no length, please");
+      assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please");
     }
   }
 #endif //ASSERT
@@ -3095,25 +3116,20 @@
   // (This happens via a non-constant argument to inline_native_newArray.)
   // In any case, the value of klass_node provides the desired array type.
   const TypeInt* length_type = _gvn.find_int_type(length);
-  const TypeInt* narrow_length_type = NULL;
   const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
   if (ary_type->isa_aryptr() && length_type != NULL) {
     // Try to get a better type than POS for the size
     ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
-    narrow_length_type = ary_type->is_aryptr()->size();
-    if (narrow_length_type == length_type)
-      narrow_length_type = NULL;
   }
 
   Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);
 
-  // Cast length on remaining path to be positive:
-  if (narrow_length_type != NULL) {
-    Node* ccast = new (C, 2) CastIINode(length, narrow_length_type);
-    ccast->set_req(0, control());
-    _gvn.set_type_bottom(ccast);
-    record_for_igvn(ccast);
-    if (map()->find_edge(length) >= 0) {
+  // Cast length on remaining path to be as narrow as possible
+  if (map()->find_edge(length) >= 0) {
+    Node* ccast = alloc->make_ideal_length(ary_type, &_gvn);
+    if (ccast != length) {
+      _gvn.set_type_bottom(ccast);
+      record_for_igvn(ccast);
       replace_in_map(length, ccast);
     }
   }
@@ -3177,3 +3193,251 @@
   }
   return NULL;
 }
+
+void GraphKit::g1_write_barrier_pre(Node* obj,
+                                    Node* adr,
+                                    uint alias_idx,
+                                    Node* val,
+                                    const Type* val_type,
+                                    BasicType bt) {
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+#define __ ideal.
+  __ declares_done();
+
+  Node* thread = __ thread();
+
+  Node* no_ctrl = NULL;
+  Node* no_base = __ top();
+  Node* zero = __ ConI(0);
+
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+
+  BasicType active_type = in_bytes(PtrQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
+  assert(in_bytes(PtrQueue::byte_width_of_active()) == 4 || in_bytes(PtrQueue::byte_width_of_active()) == 1, "flag width");
+
+  // Offsets into the thread
+  const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +  // 648
+                                          PtrQueue::byte_offset_of_active());
+  const int index_offset   = in_bytes(JavaThread::satb_mark_queue_offset() +  // 656
+                                          PtrQueue::byte_offset_of_index());
+  const int buffer_offset  = in_bytes(JavaThread::satb_mark_queue_offset() +  // 652
+                                          PtrQueue::byte_offset_of_buf());
+  // Now the actual pointers into the thread
+
+  // set_control( ctl);
+
+  Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset));
+  Node* buffer_adr  = __ AddP(no_base, thread, __ ConX(buffer_offset));
+  Node* index_adr   = __ AddP(no_base, thread, __ ConX(index_offset));
+
+  // Now some of the values
+
+  Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
+  Node* index   = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
+  Node* buffer  = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+  // if (!marking)
+  __ if_then(marking, BoolTest::ne, zero); {
+
+    const Type* t1 = adr->bottom_type();
+    const Type* t2 = val->bottom_type();
+
+    Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
+    // if (orig != NULL)
+    __ if_then(orig, BoolTest::ne, null()); {
+
+      // load original value
+      // alias_idx correct??
+
+      // is the queue for this thread full?
+      __ if_then(index, BoolTest::ne, zero, likely); {
+
+        // decrement the index
+        Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+        Node* next_indexX = next_index;
+#ifdef _LP64
+          // We could refine the type for what it's worth
+          // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+          next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif // _LP64
+
+        // Now get the buffer location we will log the original value into and store it
+
+        Node *log_addr = __ AddP(no_base, buffer, next_indexX);
+        // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM));
+        __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);
+
+
+        // update the index
+        // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
+        // This is a hack to force this store to occur before the oop store that is coming up
+        __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM));
+
+      } __ else_(); {
+
+        // logging buffer is full, call the runtime
+        const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
+        // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread);
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread);
+      } __ end_if();
+    } __ end_if();
+  } __ end_if();
+
+  __ drain_delay_transform();
+  set_control( __ ctrl());
+  set_all_memory( __ merged_memory());
+
+#undef __
+}
+
+//
+// Update the card table and add card address to the queue
+//
+void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) {
+#define __ ideal->
+  Node* zero = __ ConI(0);
+  Node* no_base = __ top();
+  BasicType card_bt = T_BYTE;
+  // Smash zero into card. MUST BE ORDERED WRT TO STORE
+  __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw);
+
+  //  Now do the queue work
+  __ if_then(index, BoolTest::ne, zero); {
+
+    Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+    Node* next_indexX = next_index;
+#ifdef _LP64
+    // We could refine the type for what it's worth
+    // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+    next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif // _LP64
+    Node* log_addr = __ AddP(no_base, buffer, next_indexX);
+
+    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw);
+    __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
+
+  } __ else_(); {
+    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
+  } __ end_if();
+#undef __
+}
+
+void GraphKit::g1_write_barrier_post(Node* store,
+                                     Node* obj,
+                                     Node* adr,
+                                     uint alias_idx,
+                                     Node* val,
+                                     BasicType bt,
+                                     bool use_precise) {
+  // If we are writing a NULL then we need no post barrier
+
+  if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
+    // Must be NULL
+    const Type* t = val->bottom_type();
+    assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");
+    // No post barrier if writing NULLx
+    return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+#define __ ideal.
+  __ declares_done();
+
+  Node* thread = __ thread();
+
+  Node* no_ctrl = NULL;
+  Node* no_base = __ top();
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+  Node* zero = __ ConI(0);
+  Node* zeroX = __ ConX(0);
+
+  // Get the alias_index for raw card-mark memory
+  const TypePtr* card_type = TypeRawPtr::BOTTOM;
+
+  const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();
+
+  // Get the address of the card table
+  CardTableModRefBS* ct =
+    (CardTableModRefBS*)(Universe::heap()->barrier_set());
+  Node *card_table = __ makecon(TypeRawPtr::make((address)ct->byte_map_base));
+  // Get base of card map
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+
+  // Offsets into the thread
+  const int index_offset  = in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_index());
+  const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_buf());
+
+  // Pointers into the thread
+
+  Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset));
+  Node* index_adr =  __ AddP(no_base, thread, __ ConX(index_offset));
+
+  // Now some values
+
+  Node* index  = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
+  Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+
+  // Convert the store obj pointer to an int prior to doing math on it
+  // Use addr not obj gets accurate card marks
+
+  // Node* cast = __ CastPX(no_ctrl, adr /* obj */);
+
+  // Must use ctrl to prevent "integerized oop" existing across safepoint
+  Node* cast =  __ CastPX(__ ctrl(), ( use_precise ? adr : obj ));
+
+  // Divide pointer by card size
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
+
+  // Combine card table base and card offset
+  Node *card_adr = __ AddP(no_base, card_table, card_offset );
+
+  // If we know the value being stored does it cross regions?
+
+  if (val != NULL) {
+    // Does the store cause us to cross regions?
+
+    // Should be able to do an unsigned compare of region_size instead of
+    // and extra shift. Do we have an unsigned compare??
+    // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
+    Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
+
+    // if (xor_res == 0) same region so skip
+    __ if_then(xor_res, BoolTest::ne, zeroX); {
+
+      // No barrier if we are storing a NULL
+      __ if_then(val, BoolTest::ne, null(), unlikely); {
+
+        // Ok must mark the card if not already dirty
+
+        // load the original value of the card
+        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+
+        __ if_then(card_val, BoolTest::ne, zero); {
+          g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+        } __ end_if();
+      } __ end_if();
+    } __ end_if();
+  } else {
+    g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+  }
+
+
+  __ drain_delay_transform();
+  set_control( __ ctrl());
+  set_all_memory( __ merged_memory());
+#undef __
+
+}
--- a/hotspot/src/share/vm/opto/graphKit.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -24,6 +24,7 @@
 
 class FastLockNode;
 class FastUnlockNode;
+class IdealKit;
 class Parse;
 class RootNode;
 
@@ -581,6 +582,27 @@
             && Universe::heap()->can_elide_tlab_store_barriers());
   }
 
+  // G1 pre/post barriers
+  void g1_write_barrier_pre(Node* obj,
+                            Node* adr,
+                            uint alias_idx,
+                            Node* val,
+                            const Type* val_type,
+                            BasicType bt);
+
+  void g1_write_barrier_post(Node* store,
+                             Node* obj,
+                             Node* adr,
+                             uint alias_idx,
+                             Node* val,
+                             BasicType bt,
+                             bool use_precise);
+  // Helper function for g1
+  private:
+  void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
+                    Node* buffer, const TypeFunc* tf);
+
+  public:
   // Helper function to round double arguments before a call
   void round_double_arguments(ciMethod* dest_method);
   void round_double_result(ciMethod* dest_method);
--- a/hotspot/src/share/vm/opto/ifg.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/ifg.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -485,8 +485,9 @@
     // Liveout things are presumed live for the whole block.  We accumulate
     // 'area' accordingly.  If they get killed in the block, we'll subtract
     // the unused part of the block from the area.
-    double cost = b->_freq * double(last_inst-last_phi);
-    assert( cost >= 0, "negative spill cost" );
+    int inst_count = last_inst - last_phi;
+    double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+    assert(!(cost < 0.0), "negative spill cost" );
     IndexSetIterator elements(&liveout);
     uint lidx;
     while ((lidx = elements.next()) != 0) {
@@ -590,7 +591,7 @@
         } else {                // Else it is live
           // A DEF also ends 'area' partway through the block.
           lrgs(r)._area -= cost;
-          assert( lrgs(r)._area >= 0, "negative spill area" );
+          assert(!(lrgs(r)._area < 0.0), "negative spill area" );
 
           // Insure high score for immediate-use spill copies so they get a color
           if( n->is_SpillCopy()
@@ -703,8 +704,9 @@
 
       } // End of if normal register-allocated value
 
-      cost -= b->_freq;         // Area remaining in the block
-      if( cost < 0.0 ) cost = 0.0;  // Cost goes negative in the Phi area
+      // Area remaining in the block
+      inst_count--;
+      cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
 
       // Make all inputs live
       if( !n->is_Phi() ) {      // Phi function uses come from prior block
@@ -751,7 +753,7 @@
             assert( pressure[0] == count_int_pressure  (&liveout), "" );
             assert( pressure[1] == count_float_pressure(&liveout), "" );
           }
-          assert( lrg._area >= 0, "negative spill area" );
+          assert(!(lrg._area < 0.0), "negative spill area" );
         }
       }
     } // End of reverse pass over all instructions in block
--- a/hotspot/src/share/vm/opto/lcm.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/lcm.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -595,7 +595,7 @@
 
       // A few node types require changing a required edge to a precedence edge
       // before allocation.
-      if( UseConcMarkSweepGC ) {
+      if( UseConcMarkSweepGC || UseG1GC ) {
         if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
           // Note: Required edges with an index greater than oper_input_base
           // are not supported by the allocator.
--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -679,6 +679,10 @@
   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
   post_head->set_post_loop(main_head);
 
+  // Reduce the post-loop trip count.
+  CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
+  post_end->_prob = PROB_FAIR;
+
   // Build the main-loop normal exit.
   IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end);
   _igvn.register_new_node_with_optimizer( new_main_exit );
@@ -748,6 +752,9 @@
   pre_head->set_pre_loop(main_head);
   Node *pre_incr = old_new[incr->_idx];
 
+  // Reduce the pre-loop trip count.
+  pre_end->_prob = PROB_FAIR;
+
   // Find the pre-loop normal exit.
   Node* pre_exit = pre_end->proj_out(false);
   assert( pre_exit->Opcode() == Op_IfFalse, "" );
@@ -767,8 +774,8 @@
   register_new_node( min_cmp , new_pre_exit );
   register_new_node( min_bol , new_pre_exit );
 
-  // Build the IfNode
-  IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_FAIR, COUNT_UNKNOWN );
+  // Build the IfNode (assume the main-loop is executed always).
+  IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
   _igvn.register_new_node_with_optimizer( min_iff );
   set_idom(min_iff, new_pre_exit, dd_main_head);
   set_loop(min_iff, loop->_parent);
@@ -1012,6 +1019,8 @@
     if (!has_ctrl(old))
       set_loop(nnn, loop);
   }
+
+  loop->record_for_igvn();
 }
 
 //------------------------------do_maximally_unroll----------------------------
@@ -1581,10 +1590,10 @@
 
 //=============================================================================
 //------------------------------iteration_split_impl---------------------------
-void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
+bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
   // Check and remove empty loops (spam micro-benchmarks)
   if( policy_do_remove_empty_loop(phase) )
-    return;                     // Here we removed an empty loop
+    return true;                     // Here we removed an empty loop
 
   bool should_peel = policy_peeling(phase); // Should we peel?
 
@@ -1594,7 +1603,8 @@
   // This removes loop-invariant tests (usually null checks).
   if( !_head->is_CountedLoop() ) { // Non-counted loop
     if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
-      return;
+      // Partial peel succeeded so terminate this round of loop opts
+      return false;
     }
     if( should_peel ) {            // Should we peel?
 #ifndef PRODUCT
@@ -1604,14 +1614,14 @@
     } else if( should_unswitch ) {
       phase->do_unswitching(this, old_new);
     }
-    return;
+    return true;
   }
   CountedLoopNode *cl = _head->as_CountedLoop();
 
-  if( !cl->loopexit() ) return; // Ignore various kinds of broken loops
+  if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops
 
   // Do nothing special to pre- and post- loops
-  if( cl->is_pre_loop() || cl->is_post_loop() ) return;
+  if( cl->is_pre_loop() || cl->is_post_loop() ) return true;
 
   // Compute loop trip count from profile data
   compute_profile_trip_cnt(phase);
@@ -1624,11 +1634,11 @@
       // Here we did some unrolling and peeling.  Eventually we will
       // completely unroll this loop and it will no longer be a loop.
       phase->do_maximally_unroll(this,old_new);
-      return;
+      return true;
     }
     if (should_unswitch) {
       phase->do_unswitching(this, old_new);
-      return;
+      return true;
     }
   }
 
@@ -1689,14 +1699,16 @@
     if( should_peel )           // Might want to peel but do nothing else
       phase->do_peeling(this,old_new);
   }
+  return true;
 }
 
 
 //=============================================================================
 //------------------------------iteration_split--------------------------------
-void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
+bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
   // Recursively iteration split nested loops
-  if( _child ) _child->iteration_split( phase, old_new );
+  if( _child && !_child->iteration_split( phase, old_new ))
+    return false;
 
   // Clean out prior deadwood
   DCE_loop_body();
@@ -1718,7 +1730,9 @@
       _allow_optimizations &&
       !tail()->is_top() ) {     // Also ignore the occasional dead backedge
     if (!_has_call) {
-      iteration_split_impl( phase, old_new );
+      if (!iteration_split_impl( phase, old_new )) {
+        return false;
+      }
     } else if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
     }
@@ -1727,5 +1741,7 @@
   // Minor offset re-organization to remove loop-fallout uses of
   // trip counter.
   if( _head->is_CountedLoop() ) phase->reorg_offsets( this );
-  if( _next ) _next->iteration_split( phase, old_new );
+  if( _next && !_next->iteration_split( phase, old_new ))
+    return false;
+  return true;
 }
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1279,7 +1279,7 @@
     // Visit all children, looking for Phis
     for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
       Node *out = cl->out(i);
-      if (!out->is_Phi())  continue; // Looking for phis
+      if (!out->is_Phi() || out == phi)  continue; // Looking for other phis
       PhiNode* phi2 = out->as_Phi();
       Node *incr2 = phi2->in( LoopNode::LoopBackControl );
       // Look for induction variables of the form:  X += constant
@@ -1388,6 +1388,37 @@
 
 #endif
 
+static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) {
+  if (loop == root) {
+    if (loop->_child != NULL) {
+      log->begin_head("loop_tree");
+      log->end_head();
+      if( loop->_child ) log_loop_tree(root, loop->_child, log);
+      log->tail("loop_tree");
+      assert(loop->_next == NULL, "what?");
+    }
+  } else {
+    Node* head = loop->_head;
+    log->begin_head("loop");
+    log->print(" idx='%d' ", head->_idx);
+    if (loop->_irreducible) log->print("irreducible='1' ");
+    if (head->is_Loop()) {
+      if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' ");
+      if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' ");
+    }
+    if (head->is_CountedLoop()) {
+      CountedLoopNode* cl = head->as_CountedLoop();
+      if (cl->is_pre_loop())  log->print("pre_loop='%d' ",  cl->main_idx());
+      if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx);
+      if (cl->is_post_loop()) log->print("post_loop='%d' ",  cl->main_idx());
+    }
+    log->end_head();
+    if( loop->_child ) log_loop_tree(root, loop->_child, log);
+    log->tail("loop");
+    if( loop->_next  ) log_loop_tree(root, loop->_next, log);
+  }
+}
+
 //=============================================================================
 //------------------------------PhaseIdealLoop---------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
@@ -1624,10 +1655,13 @@
   // Cleanup any modified bits
   _igvn.optimize();
 
-  // Do not repeat loop optimizations if irreducible loops are present
-  // by claiming no-progress.
-  if( _has_irreducible_loops )
-    C->clear_major_progress();
+  // disable assert until issue with split_flow_path is resolved (6742111)
+  // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(),
+  //        "shouldn't introduce irreducible loops");
+
+  if (C->log() != NULL) {
+    log_loop_tree(_ltree_root, _ltree_root, C->log());
+  }
 }
 
 #ifndef PRODUCT
@@ -2732,11 +2766,7 @@
 }
 
 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
-
-  // Indent by loop nesting depth
-  for( uint x = 0; x < loop->_nest; x++ )
-    tty->print("  ");
-  tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx);
+  loop->dump_head();
 
   // Now scan for CFG nodes in the same loop
   for( uint j=idx; j > 0;  j-- ) {
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -192,6 +192,8 @@
   int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
   void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
 
+  int main_idx() const { return _main_idx; }
+
 
   void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
   void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
@@ -323,12 +325,14 @@
   // Returns TRUE if loop tree is structurally changed.
   bool beautify_loops( PhaseIdealLoop *phase );
 
-  // Perform iteration-splitting on inner loops.  Split iterations to avoid
-  // range checks or one-shot null checks.
-  void iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
+  // Perform iteration-splitting on inner loops.  Split iterations to
+  // avoid range checks or one-shot null checks.  Returns false if the
+  // current round of loop opts should stop.
+  bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
 
-  // Driver for various flavors of iteration splitting
-  void iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
+  // Driver for various flavors of iteration splitting.  Returns false
+  // if the current round of loop opts should stop.
+  bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
 
   // Given dominators, try to find loops with calls that must always be
   // executed (call dominates loop tail).  These loops do not need non-call
--- a/hotspot/src/share/vm/opto/loopopts.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1903,9 +1903,6 @@
       // Use in a phi is considered a use in the associated predecessor block
       use_c = use->in(0)->in(j);
     }
-    if (use_c->is_CountedLoop()) {
-      use_c = use_c->in(LoopNode::EntryControl);
-    }
     set_ctrl(n_clone, use_c);
     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
     get_loop(use_c)->_body.push(n_clone);
@@ -2667,6 +2664,10 @@
   // Fix this by adjusting to use the post-increment trip counter.
   Node *phi = cl->phi();
   if( !phi ) return;            // Dead infinite loop
+
+  // Shape messed up, probably by iteration_split_impl
+  if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return;
+
   bool progress = true;
   while (progress) {
     progress = false;
--- a/hotspot/src/share/vm/opto/macro.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/macro.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -944,25 +944,7 @@
     mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
   }
 
-  Node* eden_top_adr;
-  Node* eden_end_adr;
-  set_eden_pointers(eden_top_adr, eden_end_adr);
-
-  uint raw_idx = C->get_alias_index(TypeRawPtr::BOTTOM);
   assert(ctrl != NULL, "must have control");
-
-  // Load Eden::end.  Loop invariant and hoisted.
-  //
-  // Note: We set the control input on "eden_end" and "old_eden_top" when using
-  //       a TLAB to work around a bug where these values were being moved across
-  //       a safepoint.  These are not oops, so they cannot be include in the oop
-  //       map, but the can be changed by a GC.   The proper way to fix this would
-  //       be to set the raw memory state when generating a  SafepointNode.  However
-  //       this will require extensive changes to the loop optimization in order to
-  //       prevent a degradation of the optimization.
-  //       See comment in memnode.hpp, around line 227 in class LoadPNode.
-  Node* eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
-
   // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
   // they will not be used if "always_slow" is set
   enum { slow_result_path = 1, fast_result_path = 2 };
@@ -982,12 +964,15 @@
     initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn);
   }
 
-  if (DTraceAllocProbes) {
+  if (DTraceAllocProbes ||
+      !UseTLAB && (!Universe::heap()->supports_inline_contig_alloc() ||
+                   (UseConcMarkSweepGC && CMSIncrementalMode))) {
     // Force slow-path allocation
     always_slow = true;
     initial_slow_test = NULL;
   }
 
+
   enum { too_big_or_final_path = 1, need_gc_path = 2 };
   Node *slow_region = NULL;
   Node *toobig_false = ctrl;
@@ -1016,6 +1001,23 @@
   Node *slow_mem = mem;  // save the current memory state for slow path
   // generate the fast allocation code unless we know that the initial test will always go slow
   if (!always_slow) {
+    Node* eden_top_adr;
+    Node* eden_end_adr;
+
+    set_eden_pointers(eden_top_adr, eden_end_adr);
+
+    // Load Eden::end.  Loop invariant and hoisted.
+    //
+    // Note: We set the control input on "eden_end" and "old_eden_top" when using
+    //       a TLAB to work around a bug where these values were being moved across
+    //       a safepoint.  These are not oops, so they cannot be include in the oop
+    //       map, but the can be changed by a GC.   The proper way to fix this would
+    //       be to set the raw memory state when generating a  SafepointNode.  However
+    //       this will require extensive changes to the loop optimization in order to
+    //       prevent a degradation of the optimization.
+    //       See comment in memnode.hpp, around line 227 in class LoadPNode.
+    Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
+
     // allocate the Region and Phi nodes for the result
     result_region = new (C, 3) RegionNode(3);
     result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM );
--- a/hotspot/src/share/vm/opto/matcher.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -273,7 +273,7 @@
   find_shared( C->root() );
   find_shared( C->top() );
 
-  C->print_method("Before Matching", 2);
+  C->print_method("Before Matching");
 
   // Swap out to old-space; emptying new-space
   Arena *old = C->node_arena()->move_contents(C->old_arena());
@@ -840,7 +840,7 @@
               _new2old_map.map(m->_idx, n);
 #endif
               if (m->in(0) != NULL) // m might be top
-                collect_null_checks(m);
+                collect_null_checks(m, n);
             } else {                // Else just a regular 'ol guy
               m = n->clone();       // So just clone into new-space
 #ifdef ASSERT
@@ -1478,12 +1478,19 @@
         m = _mem_node;
         assert(m != NULL && m->is_Mem(), "expecting memory node");
       }
-      if (m->adr_type() != mach->adr_type()) {
+      const Type* mach_at = mach->adr_type();
+      // DecodeN node consumed by an address may have different type
+      // then its input. Don't compare types for such case.
+      if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
+          m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
+        mach_at = m->adr_type();
+      }
+      if (m->adr_type() != mach_at) {
         m->dump();
         tty->print_cr("mach:");
         mach->dump(1);
       }
-      assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type");
+      assert(m->adr_type() == mach_at, "matcher should not change adr type");
     }
 #endif
   }
@@ -1995,7 +2002,7 @@
 // it.  Used by later implicit-null-check handling.  Actually collects
 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
 // value being tested.
-void Matcher::collect_null_checks( Node *proj ) {
+void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
   Node *iff = proj->in(0);
   if( iff->Opcode() == Op_If ) {
     // During matching If's have Bool & Cmp side-by-side
@@ -2008,20 +2015,47 @@
     if (ct == TypePtr::NULL_PTR ||
         (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {
 
+      bool push_it = false;
       if( proj->Opcode() == Op_IfTrue ) {
         extern int all_null_checks_found;
         all_null_checks_found++;
         if( b->_test._test == BoolTest::ne ) {
-          _null_check_tests.push(proj);
-          _null_check_tests.push(cmp->in(1));
+          push_it = true;
         }
       } else {
         assert( proj->Opcode() == Op_IfFalse, "" );
         if( b->_test._test == BoolTest::eq ) {
-          _null_check_tests.push(proj);
-          _null_check_tests.push(cmp->in(1));
+          push_it = true;
         }
       }
+      if( push_it ) {
+        _null_check_tests.push(proj);
+        Node* val = cmp->in(1);
+#ifdef _LP64
+        if (UseCompressedOops && !Matcher::clone_shift_expressions &&
+            val->bottom_type()->isa_narrowoop()) {
+          //
+          // Look for DecodeN node which should be pinned to orig_proj.
+          // On platforms (Sparc) which can not handle 2 adds
+          // in addressing mode we have to keep a DecodeN node and
+          // use it to do implicit NULL check in address.
+          //
+          // DecodeN node was pinned to non-null path (orig_proj) during
+          // CastPP transformation in final_graph_reshaping_impl().
+          //
+          uint cnt = orig_proj->outcnt();
+          for (uint i = 0; i < orig_proj->outcnt(); i++) {
+            Node* d = orig_proj->raw_out(i);
+            if (d->is_DecodeN() && d->in(1) == val) {
+              val = d;
+              val->set_req(0, NULL); // Unpin now.
+              break;
+            }
+          }
+        }
+#endif
+        _null_check_tests.push(val);
+      }
     }
   }
 }
--- a/hotspot/src/share/vm/opto/matcher.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/matcher.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -166,7 +166,7 @@
   // List of IfFalse or IfTrue Nodes that indicate a taken null test.
   // List is valid in the post-matching space.
   Node_List _null_check_tests;
-  void collect_null_checks( Node *proj );
+  void collect_null_checks( Node *proj, Node *orig_proj );
   void validate_null_checks( );
 
   Matcher( Node_List &proj_list );
--- a/hotspot/src/share/vm/opto/memnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1887,6 +1887,38 @@
   return tap->size();
 }
 
+//-------------------------------Ideal---------------------------------------
+// Feed through the length in AllocateArray(...length...)._length.
+Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* p = MemNode::Ideal_common(phase, can_reshape);
+  if (p)  return (p == NodeSentinel) ? NULL : p;
+
+  // Take apart the address into an oop and and offset.
+  // Return 'this' if we cannot.
+  Node*    adr    = in(MemNode::Address);
+  intptr_t offset = 0;
+  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase,  offset);
+  if (base == NULL)     return NULL;
+  const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
+  if (tary == NULL)     return NULL;
+
+  // We can fetch the length directly through an AllocateArrayNode.
+  // This works even if the length is not constant (clone or newArray).
+  if (offset == arrayOopDesc::length_offset_in_bytes()) {
+    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
+    if (alloc != NULL) {
+      Node* allocated_length = alloc->Ideal_length();
+      Node* len = alloc->make_ideal_length(tary, phase);
+      if (allocated_length != len) {
+        // New CastII improves on this.
+        return len;
+      }
+    }
+  }
+
+  return NULL;
+}
+
 //------------------------------Identity---------------------------------------
 // Feed through the length in AllocateArray(...length...)._length.
 Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
@@ -1905,15 +1937,22 @@
   // We can fetch the length directly through an AllocateArrayNode.
   // This works even if the length is not constant (clone or newArray).
   if (offset == arrayOopDesc::length_offset_in_bytes()) {
-    Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase);
-    if (allocated_length != NULL) {
-      return allocated_length;
+    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
+    if (alloc != NULL) {
+      Node* allocated_length = alloc->Ideal_length();
+      // Do not allow make_ideal_length to allocate a CastII node.
+      Node* len = alloc->make_ideal_length(tary, phase, false);
+      if (allocated_length == len) {
+        // Return allocated_length only if it would not be improved by a CastII.
+        return allocated_length;
+      }
     }
   }
 
   return this;
 
 }
+
 //=============================================================================
 //---------------------------StoreNode::make-----------------------------------
 // Polymorphic factory method:
--- a/hotspot/src/share/vm/opto/memnode.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -241,6 +241,7 @@
   virtual int Opcode() const;
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
 };
 
 //------------------------------LoadLNode--------------------------------------
--- a/hotspot/src/share/vm/opto/mulnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/mulnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -152,6 +152,14 @@
   if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
     return bottom_type();
 
+#if defined(IA32)
+  // Can't trust native compilers to properly fold strict double
+  // multiplication with round-to-zero on this platform.
+  if (op == Op_MulD && phase->C->method()->is_strict()) {
+    return TypeD::DOUBLE;
+  }
+#endif
+
   return mul_ring(t1,t2);            // Local flavor of type multiplication
 }
 
@@ -360,7 +368,7 @@
 // Compute the product type of two double ranges into this node.
 const Type *MulDNode::mul_ring(const Type *t0, const Type *t1) const {
   if( t0 == Type::DOUBLE || t1 == Type::DOUBLE ) return Type::DOUBLE;
-  // We must be adding 2 double constants.
+  // We must be multiplying 2 double constants.
   return TypeD::make( t0->getd() * t1->getd() );
 }
 
--- a/hotspot/src/share/vm/opto/node.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/node.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1320,7 +1320,8 @@
   Node *pop() {
     if( _clock_index >= size() ) _clock_index = 0;
     Node *b = at(_clock_index);
-    map( _clock_index++, Node_List::pop());
+    map( _clock_index, Node_List::pop());
+    if (size() != 0) _clock_index++; // Always start from 0
     _in_worklist >>= b->_idx;
     return b;
   }
--- a/hotspot/src/share/vm/opto/parse.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/parse.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -167,9 +167,19 @@
 
     int start() const                      { return flow()->start(); }
     int limit() const                      { return flow()->limit(); }
-    int pre_order() const                  { return flow()->pre_order(); }
+    int rpo() const                        { return flow()->rpo(); }
     int start_sp() const                   { return flow()->stack_size(); }
 
+    bool is_loop_head() const              { return flow()->is_loop_head(); }
+    bool is_SEL_head() const               { return flow()->is_single_entry_loop_head(); }
+    bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); }
+    bool is_invariant_local(uint i) const  {
+      const JVMState* jvms = start_map()->jvms();
+      if (!jvms->is_loc(i)) return false;
+      return flow()->is_invariant_local(i - jvms->locoff());
+    }
+    bool can_elide_SEL_phi(uint i) const  { assert(is_SEL_head(),""); return is_invariant_local(i); }
+
     const Type* peek(int off=0) const      { return stack_type_at(start_sp() - (off+1)); }
 
     const Type* stack_type_at(int i) const;
@@ -305,7 +315,7 @@
   //            entry_bci()     -- see osr_bci, etc.
 
   ciTypeFlow*   flow()          const { return _flow; }
-  //            blocks()        -- see pre_order_at, start_block, etc.
+  //            blocks()        -- see rpo_at, start_block, etc.
   int           block_count()   const { return _block_count; }
 
   GraphKit&     exits()               { return _exits; }
@@ -330,12 +340,12 @@
   // Must this parse be aborted?
   bool failing()                { return C->failing(); }
 
-  Block* pre_order_at(int po) {
-    assert(0 <= po && po < _block_count, "oob");
-    return &_blocks[po];
+  Block* rpo_at(int rpo) {
+    assert(0 <= rpo && rpo < _block_count, "oob");
+    return &_blocks[rpo];
   }
   Block* start_block() {
-    return pre_order_at(flow()->start_block()->pre_order());
+    return rpo_at(flow()->start_block()->rpo());
   }
   // Can return NULL if the flow pass did not complete a block.
   Block* successor_for_bci(int bci) {
@@ -359,9 +369,6 @@
   // Parse all the basic blocks.
   void do_all_blocks();
 
-  // Helper for do_all_blocks; makes one pass in pre-order.
-  void visit_blocks();
-
   // Parse the current basic block
   void do_one_block();
 
--- a/hotspot/src/share/vm/opto/parse1.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -29,17 +29,17 @@
 // the most. Some of the non-static variables are needed in bytecodeInfo.cpp
 // and eventually should be encapsulated in a proper class (gri 8/18/98).
 
-int nodes_created              = 0; int nodes_created_old              = 0;
-int methods_parsed             = 0; int methods_parsed_old             = 0;
-int methods_seen               = 0; int methods_seen_old               = 0;
+int nodes_created              = 0;
+int methods_parsed             = 0;
+int methods_seen               = 0;
+int blocks_parsed              = 0;
+int blocks_seen                = 0;
 
-int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0;
-int explicit_null_checks_elided   = 0, explicit_null_checks_elided_old   = 0;
+int explicit_null_checks_inserted = 0;
+int explicit_null_checks_elided   = 0;
 int all_null_checks_found         = 0, implicit_null_checks              = 0;
 int implicit_null_throws          = 0;
 
-int parse_idx = 0;
-size_t parse_arena = 0;
 int reclaim_idx  = 0;
 int reclaim_in   = 0;
 int reclaim_node = 0;
@@ -61,6 +61,7 @@
   tty->cr();
   if (methods_seen != methods_parsed)
     tty->print_cr("Reasons for parse failures (NOT cumulative):");
+  tty->print_cr("Blocks parsed: %d  Blocks seen: %d", blocks_parsed, blocks_seen);
 
   if( explicit_null_checks_inserted )
     tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
@@ -373,6 +374,12 @@
     C->record_method_not_compilable_all_tiers(_flow->failure_reason());
   }
 
+#ifndef PRODUCT
+  if (_flow->has_irreducible_entry()) {
+    C->set_parsed_irreducible_loop(true);
+  }
+#endif
+
   if (_expected_uses <= 0) {
     _prof_factor = 1;
   } else {
@@ -556,118 +563,93 @@
   set_map(entry_map);
   do_exits();
 
-  // Collect a few more statistics.
-  parse_idx += C->unique();
-  parse_arena += C->node_arena()->used();
-
   if (log)  log->done("parse nodes='%d' memory='%d'",
                       C->unique(), C->node_arena()->used());
 }
 
 //---------------------------do_all_blocks-------------------------------------
 void Parse::do_all_blocks() {
-  _blocks_merged = 0;
-  _blocks_parsed = 0;
+  bool has_irreducible = flow()->has_irreducible_entry();
+
+  // Walk over all blocks in Reverse Post-Order.
+  while (true) {
+    bool progress = false;
+    for (int rpo = 0; rpo < block_count(); rpo++) {
+      Block* block = rpo_at(rpo);
+
+      if (block->is_parsed()) continue;
 
-  int old_blocks_merged = -1;
-  int old_blocks_parsed = -1;
+      if (!block->is_merged()) {
+        // Dead block, no state reaches this block
+        continue;
+      }
 
-  for (int tries = 0; ; tries++) {
-    visit_blocks();
-    if (failing())  return; // Check for bailout
+      // Prepare to parse this block.
+      load_state_from(block);
+
+      if (stopped()) {
+        // Block is dead.
+        continue;
+      }
+
+      blocks_parsed++;
 
-    // No need for a work list.  The outer loop is hardly ever repeated.
-    // The following loop traverses the blocks in a reasonable pre-order,
-    // as produced by the ciTypeFlow pass.
+      progress = true;
+      if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) {
+        // Not all preds have been parsed.  We must build phis everywhere.
+        // (Note that dead locals do not get phis built, ever.)
+        ensure_phis_everywhere();
+
+        // Leave behind an undisturbed copy of the map, for future merges.
+        set_map(clone_map());
+      }
 
-    // This loop can be taken more than once if there are two entries to
-    // a loop (irreduceable CFG), and the edge which ciTypeFlow chose
-    // as the first predecessor to the loop goes dead in the parser,
-    // due to parse-time optimization.  (Could happen with obfuscated code.)
+      if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) {
+        // In the absence of irreducible loops, the Region and Phis
+        // associated with a merge that doesn't involve a backedge can
+        // be simplfied now since the RPO parsing order guarantees
+        // that any path which was supposed to reach here has already
+        // been parsed or must be dead.
+        Node* c = control();
+        Node* result = _gvn.transform_no_reclaim(control());
+        if (c != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx);
+        }
+        if (result != top()) {
+          record_for_igvn(result);
+        }
+      }
 
-    // Look for progress, or the lack of it:
-    if (_blocks_parsed == block_count()) {
-      // That's all, folks.
-      if (TraceOptoParse) {
-        tty->print_cr("All blocks parsed.");
-      }
+      // Parse the block.
+      do_one_block();
+
+      // Check for bailouts.
+      if (failing())  return;
+    }
+
+    // with irreducible loops multiple passes might be necessary to parse everything
+    if (!has_irreducible || !progress) {
       break;
     }
+  }
 
-    // How much work was done this time around?
-    int new_blocks_merged = _blocks_merged - old_blocks_merged;
-    int new_blocks_parsed = _blocks_parsed - old_blocks_parsed;
-    if (new_blocks_merged == 0) {
-      if (TraceOptoParse) {
-        tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed);
-      }
-      // No new blocks have become parseable.  Some blocks are just dead.
-      break;
-    }
-    assert(new_blocks_parsed > 0, "must make progress");
-    assert(tries < block_count(), "the pre-order cannot be this bad!");
-
-    old_blocks_merged = _blocks_merged;
-    old_blocks_parsed = _blocks_parsed;
-  }
+  blocks_seen += block_count();
 
 #ifndef PRODUCT
   // Make sure there are no half-processed blocks remaining.
   // Every remaining unprocessed block is dead and may be ignored now.
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (int rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
     if (!block->is_parsed()) {
       if (TraceOptoParse) {
-        tty->print("Skipped dead block %d at bci:%d", po, block->start());
-        assert(!block->is_merged(), "no half-processed blocks");
+        tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start());
       }
+      assert(!block->is_merged(), "no half-processed blocks");
     }
   }
 #endif
 }
 
-//---------------------------visit_blocks--------------------------------------
-void Parse::visit_blocks() {
-  // Walk over all blocks, parsing every one that has been reached (merged).
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-
-    if (block->is_parsed()) {
-      // Do not parse twice.
-      continue;
-    }
-
-    if (!block->is_merged()) {
-      // No state on this block.  It had not yet been reached.
-      // Delay reaching it until later.
-      continue;
-    }
-
-    // Prepare to parse this block.
-    load_state_from(block);
-
-    if (stopped()) {
-      // Block is dead.
-      continue;
-    }
-
-    if (!block->is_ready() || block->is_handler()) {
-      // Not all preds have been parsed.  We must build phis everywhere.
-      // (Note that dead locals do not get phis built, ever.)
-      ensure_phis_everywhere();
-
-      // Leave behind an undisturbed copy of the map, for future merges.
-      set_map(clone_map());
-    }
-
-    // Ready or not, parse the block.
-    do_one_block();
-
-    // Check for bailouts.
-    if (failing())  return;
-  }
-}
-
 //-------------------------------build_exits----------------------------------
 // Build normal and exceptional exit merge points.
 void Parse::build_exits() {
@@ -1134,24 +1116,24 @@
   _blocks = NEW_RESOURCE_ARRAY(Block, _block_count);
   Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count);
 
-  int po;
+  int rpo;
 
   // Initialize the structs.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-    block->init_node(this, po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
+    block->init_node(this, rpo);
   }
 
   // Collect predecessor and successor information.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
     block->init_graph(this);
   }
 }
 
 //-------------------------------init_node-------------------------------------
-void Parse::Block::init_node(Parse* outer, int po) {
-  _flow = outer->flow()->pre_order_at(po);
+void Parse::Block::init_node(Parse* outer, int rpo) {
+  _flow = outer->flow()->rpo_at(rpo);
   _pred_count = 0;
   _preds_parsed = 0;
   _count = 0;
@@ -1177,7 +1159,7 @@
   int p = 0;
   for (int i = 0; i < ns+ne; i++) {
     ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns);
-    Block* block2 = outer->pre_order_at(tf2->pre_order());
+    Block* block2 = outer->rpo_at(tf2->rpo());
     _successors[i] = block2;
 
     // Accumulate pred info for the other block, too.
@@ -1368,10 +1350,11 @@
     int nt = b->all_successors();
 
     tty->print("Parsing block #%d at bci [%d,%d), successors: ",
-                  block()->pre_order(), block()->start(), block()->limit());
+                  block()->rpo(), block()->start(), block()->limit());
     for (int i = 0; i < nt; i++) {
-      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order());
+      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo());
     }
+    if (b->is_loop_head()) tty->print("  lphd");
     tty->print_cr("");
   }
 
@@ -1501,7 +1484,7 @@
 #ifndef PRODUCT
   Block* b = block();
   int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1;
-  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci);
+  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci);
 #endif
   ShouldNotReachHere();
 }
@@ -1509,7 +1492,7 @@
 //--------------------------merge_common---------------------------------------
 void Parse::merge_common(Parse::Block* target, int pnum) {
   if (TraceOptoParse) {
-    tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start());
+    tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start());
   }
 
   // Zap extra stack slots to top
@@ -1534,6 +1517,7 @@
     // which must not be allowed into this block's map.)
     if (pnum > PhiNode::Input         // Known multiple inputs.
         || target->is_handler()       // These have unpredictable inputs.
+        || target->is_loop_head()     // Known multiple inputs
         || control()->is_Region()) {  // We must hide this guy.
       // Add a Region to start the new basic block.  Phis will be added
       // later lazily.
@@ -1575,15 +1559,21 @@
 
     // Compute where to merge into
     // Merge incoming control path
-    r->set_req(pnum, newin->control());
+    r->init_req(pnum, newin->control());
 
     if (pnum == 1) {            // Last merge for this Region?
-      _gvn.transform_no_reclaim(r);
+      if (!block()->flow()->is_irreducible_entry()) {
+        Node* result = _gvn.transform_no_reclaim(r);
+        if (r != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx);
+        }
+      }
       record_for_igvn(r);
     }
 
     // Update all the non-control inputs to map:
     assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms");
+    bool check_elide_phi = target->is_SEL_backedge(save_block);
     for (uint j = 1; j < newin->req(); j++) {
       Node* m = map()->in(j);   // Current state of target.
       Node* n = newin->in(j);   // Incoming change to target state.
@@ -1603,7 +1593,11 @@
           merge_memory_edges(n->as_MergeMem(), pnum, nophi);
           continue;
         default:                // All normal stuff
-          if (phi == NULL)  phi = ensure_phi(j, nophi);
+          if (phi == NULL) {
+            if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
+              phi = ensure_phi(j, nophi);
+            }
+          }
           break;
         }
       }
@@ -1736,9 +1730,13 @@
   uint nof_monitors = map()->jvms()->nof_monitors();
 
   assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms");
+  bool check_elide_phi = block()->is_SEL_head();
   for (uint i = TypeFunc::Parms; i < monoff; i++) {
-    ensure_phi(i);
+    if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) {
+      ensure_phi(i);
+    }
   }
+
   // Even monitors need Phis, though they are well-structured.
   // This is true for OSR methods, and also for the rare cases where
   // a monitor object is the subject of a replace_in_map operation.
--- a/hotspot/src/share/vm/opto/parse2.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/parse2.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -100,16 +100,17 @@
 
   // Do the range check
   if (GenerateRangeChecks && need_range_check) {
-    // Range is constant in array-oop, so we can use the original state of mem
-    Node* len = load_array_length(ary);
     Node* tst;
     if (sizetype->_hi <= 0) {
-      // If the greatest array bound is negative, we can conclude that we're
+      // The greatest array bound is negative, so we can conclude that we're
       // compiling unreachable code, but the unsigned compare trick used below
       // only works with non-negative lengths.  Instead, hack "tst" to be zero so
       // the uncommon_trap path will always be taken.
       tst = _gvn.intcon(0);
     } else {
+      // Range is constant in array-oop, so we can use the original state of mem
+      Node* len = load_array_length(ary);
+
       // Test length vs index (standard trick using unsigned compare)
       Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) );
       BoolTest::mask btest = BoolTest::lt;
@@ -137,9 +138,12 @@
   // Check for always knowing you are throwing a range-check exception
   if (stopped())  return top();
 
-  Node* ptr = array_element_address( ary, idx, type, sizetype);
+  Node* ptr = array_element_address(ary, idx, type, sizetype);
 
   if (result2 != NULL)  *result2 = elemtype;
+
+  assert(ptr != top(), "top should go hand-in-hand with stopped");
+
   return ptr;
 }
 
--- a/hotspot/src/share/vm/opto/postaloc.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/postaloc.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -34,7 +34,7 @@
 #endif
 }
 
-//------------------------------may_be_copy_of_callee-----------------------------
+//---------------------------may_be_copy_of_callee-----------------------------
 // Check to see if we can possibly be a copy of a callee-save value.
 bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
   // Short circuit if there are no callee save registers
@@ -225,6 +225,20 @@
 
   // Scan all registers to see if this value is around already
   for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
+    if (reg == (uint)nk_reg) {
+      // Found ourselves so check if there is only one user of this
+      // copy and keep on searching for a better copy if so.
+      bool ignore_self = true;
+      x = n->in(k);
+      DUIterator_Fast imax, i = x->fast_outs(imax);
+      Node* first = x->fast_out(i); i++;
+      while (i < imax && ignore_self) {
+        Node* use = x->fast_out(i); i++;
+        if (use != first) ignore_self = false;
+      }
+      if (ignore_self) continue;
+    }
+
     Node *vv = value[reg];
     if( !single ) {             // Doubles check for aligned-adjacent pair
       if( (reg&1)==0 ) continue;  // Wrong half of a pair
--- a/hotspot/src/share/vm/opto/runtime.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -44,6 +44,8 @@
 address OptoRuntime::_multianewarray3_Java                        = NULL;
 address OptoRuntime::_multianewarray4_Java                        = NULL;
 address OptoRuntime::_multianewarray5_Java                        = NULL;
+address OptoRuntime::_g1_wb_pre_Java                              = NULL;
+address OptoRuntime::_g1_wb_post_Java                             = NULL;
 address OptoRuntime::_vtable_must_compile_Java                    = NULL;
 address OptoRuntime::_complete_monitor_locking_Java               = NULL;
 address OptoRuntime::_rethrow_Java                                = NULL;
@@ -89,6 +91,8 @@
   gen(env, _multianewarray3_Java           , multianewarray3_Type         , multianewarray3_C               ,    0 , true , false, false);
   gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
   gen(env, _multianewarray5_Java           , multianewarray5_Type         , multianewarray5_C               ,    0 , true , false, false);
+  gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
+  gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
   gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C      ,    0 , false, false, false);
   gen(env, _rethrow_Java                   , rethrow_Type                 , rethrow_C                       ,    2 , true , false, true );
 
@@ -385,6 +389,33 @@
   return multianewarray_Type(5);
 }
 
+const TypeFunc *OptoRuntime::g1_wb_pre_Type() {
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::g1_wb_post_Type() {
+
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
 const TypeFunc *OptoRuntime::uncommon_trap_Type() {
   // create input type (domain)
   const Type **fields = TypeTuple::fields(1);
--- a/hotspot/src/share/vm/opto/runtime.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -108,6 +108,8 @@
   static address _multianewarray3_Java;
   static address _multianewarray4_Java;
   static address _multianewarray5_Java;
+  static address _g1_wb_pre_Java;
+  static address _g1_wb_post_Java;
   static address _vtable_must_compile_Java;
   static address _complete_monitor_locking_Java;
   static address _rethrow_Java;
@@ -140,6 +142,8 @@
   static void multianewarray3_C(klassOopDesc* klass, int len1, int len2, int len3, JavaThread *thread);
   static void multianewarray4_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
   static void multianewarray5_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
+  static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread);
+  static void g1_wb_post_C(void* card_addr, JavaThread* thread);
 
 public:
   // Slow-path Locking and Unlocking
@@ -195,6 +199,8 @@
   static address multianewarray3_Java()                  { return _multianewarray3_Java; }
   static address multianewarray4_Java()                  { return _multianewarray4_Java; }
   static address multianewarray5_Java()                  { return _multianewarray5_Java; }
+  static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
+  static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
   static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
   static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java;   }
 
@@ -232,6 +238,8 @@
   static const TypeFunc* multianewarray3_Type(); // multianewarray
   static const TypeFunc* multianewarray4_Type(); // multianewarray
   static const TypeFunc* multianewarray5_Type(); // multianewarray
+  static const TypeFunc* g1_wb_pre_Type();
+  static const TypeFunc* g1_wb_post_Type();
   static const TypeFunc* complete_monitor_enter_Type();
   static const TypeFunc* complete_monitor_exit_Type();
   static const TypeFunc* uncommon_trap_Type();
--- a/hotspot/src/share/vm/opto/subnode.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/subnode.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -206,6 +206,14 @@
   if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) )
     return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) );
 
+  // Convert "(A+X) - (X+B)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(1) )
+    return new (phase->C, 3) SubINode( in1->in(1), in2->in(2) );
+
+  // Convert "(X+A) - (B+X)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(2) )
+    return new (phase->C, 3) SubINode( in1->in(2), in2->in(1) );
+
   // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally
   // nicer to optimize than subtract.
   if( op2 == Op_SubI && in2->outcnt() == 1) {
--- a/hotspot/src/share/vm/opto/type.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/type.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -3157,17 +3157,18 @@
 
 // Narrow the given size type to the index range for the given array base type.
 // Return NULL if the resulting int type becomes empty.
-const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) {
+const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size) const {
   jint hi = size->_hi;
   jint lo = size->_lo;
   jint min_lo = 0;
-  jint max_hi = max_array_length(elem);
+  jint max_hi = max_array_length(elem()->basic_type());
   //if (index_not_size)  --max_hi;     // type of a valid array index, FTR
   bool chg = false;
   if (lo < min_lo) { lo = min_lo; chg = true; }
   if (hi > max_hi) { hi = max_hi; chg = true; }
+  // Negative length arrays will produce weird intermediate dead fath-path code
   if (lo > hi)
-    return NULL;
+    return TypeInt::ZERO;
   if (!chg)
     return size;
   return TypeInt::make(lo, hi, Type::WidenMin);
@@ -3176,9 +3177,7 @@
 //-------------------------------cast_to_size----------------------------------
 const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const {
   assert(new_size != NULL, "");
-  new_size = narrow_size_type(new_size, elem()->basic_type());
-  if (new_size == NULL)       // Negative length arrays will produce weird
-    new_size = TypeInt::ZERO; // intermediate dead fast-path goo
+  new_size = narrow_size_type(new_size);
   if (new_size == size())  return this;
   const TypeAry* new_ary = TypeAry::make(elem(), new_size);
   return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id);
--- a/hotspot/src/share/vm/opto/type.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/opto/type.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -840,6 +840,7 @@
   virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const;
 
   virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const;
+  virtual const TypeInt* narrow_size_type(const TypeInt* size) const;
 
   virtual bool empty(void) const;        // TRUE if type is vacuous
   virtual const TypePtr *add_offset( intptr_t offset ) const;
@@ -865,7 +866,6 @@
   }
   static const TypeAryPtr *_array_body_type[T_CONFLICT+1];
   // sharpen the type of an int which is used as an array size
-  static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem);
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
 #endif
--- a/hotspot/src/share/vm/prims/jvm.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvm.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -377,7 +377,11 @@
 JVM_ENTRY_NO_ENV(jlong, JVM_FreeMemory(void))
   JVMWrapper("JVM_FreeMemory");
   CollectedHeap* ch = Universe::heap();
-  size_t n = ch->capacity() - ch->used();
+  size_t n;
+  {
+     MutexLocker x(Heap_lock);
+     n = ch->capacity() - ch->used();
+  }
   return convert_size_t_to_jlong(n);
 JVM_END
 
@@ -624,6 +628,32 @@
   if (PrintJVMWarnings) warning("JVM_ResolveClass not implemented");
 JVM_END
 
+// Common implementation for JVM_FindClassFromBootLoader and
+// JVM_FindClassFromLoader
+static jclass jvm_find_class_from_class_loader(JNIEnv* env, const char* name,
+                                  jboolean init, jobject loader,
+                                  jboolean throwError, TRAPS) {
+  // Java libraries should ensure that name is never null...
+  if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) {
+    // It's impossible to create this class;  the name cannot fit
+    // into the constant pool.
+    if (throwError) {
+      THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name);
+    } else {
+      THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name);
+    }
+  }
+  symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL);
+  Handle h_loader(THREAD, JNIHandles::resolve(loader));
+  jclass result = find_class_from_class_loader(env, h_name, init, h_loader,
+                                               Handle(), throwError, THREAD);
+
+  if (TraceClassResolution && result != NULL) {
+    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
+  }
+  return result;
+}
+
 // Rationale behind JVM_FindClassFromBootLoader
 // a> JVM_FindClassFromClassLoader was never exported in the export tables.
 // b> because of (a) java.dll has a direct dependecy on the  unexported
@@ -645,8 +675,8 @@
                                               jboolean throwError))
   JVMWrapper3("JVM_FindClassFromBootLoader %s throw %s", name,
               throwError ? "error" : "exception");
-  return JVM_FindClassFromClassLoader(env, name, JNI_FALSE,
-                                      (jobject)NULL, throwError);
+  return jvm_find_class_from_class_loader(env, name, JNI_FALSE,
+                                          (jobject)NULL, throwError, THREAD);
 JVM_END
 
 JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name,
@@ -654,26 +684,8 @@
                                                jboolean throwError))
   JVMWrapper3("JVM_FindClassFromClassLoader %s throw %s", name,
                throwError ? "error" : "exception");
-  // Java libraries should ensure that name is never null...
-  if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) {
-    // It's impossible to create this class;  the name cannot fit
-    // into the constant pool.
-    if (throwError) {
-      THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name);
-    } else {
-      THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name);
-    }
-  }
-  symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL);
-  Handle h_loader(THREAD, JNIHandles::resolve(loader));
-  jclass result = find_class_from_class_loader(env, h_name, init, h_loader,
-                                               Handle(), throwError, thread);
-
-  if (TraceClassResolution && result != NULL) {
-    trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result)));
-  }
-
-  return result;
+  return jvm_find_class_from_class_loader(env, name, init, loader,
+                                          throwError, THREAD);
 JVM_END
 
 
--- a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -121,7 +121,7 @@
   JvmtiEventController::env_initialize((JvmtiEnv*)this);
 
 #ifdef JVMTI_TRACE
-  _jvmti_external.functions = strlen(TraceJVMTI)? &jvmtiTrace_Interface : &jvmti_Interface;
+  _jvmti_external.functions = TraceJVMTI != NULL ? &jvmtiTrace_Interface : &jvmti_Interface;
 #else
   _jvmti_external.functions = &jvmti_Interface;
 #endif
--- a/hotspot/src/share/vm/prims/jvmtiExport.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiExport.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -2433,18 +2433,7 @@
   // so we record the number of collections so that it can be checked in
   // the destructor.
   if (!_full) {
-    if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-      GenCollectedHeap* gch = GenCollectedHeap::heap();
-      assert(gch->n_gens() == 2, "configuration not recognized");
-      _invocation_count = (unsigned int)gch->get_gen(1)->stat_record()->invocations;
-    } else {
-#ifndef SERIALGC
-      assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap, "checking");
-      _invocation_count = PSMarkSweep::total_invocations();
-#else  // SERIALGC
-      fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
-    }
+    _invocation_count = Universe::heap()->total_full_collections();
   }
 
   // Do clean up tasks that need to be done at a safepoint
@@ -2466,20 +2455,7 @@
   // generation but could have ended up doing a "full" GC - check the
   // GC count to see.
   if (!_full) {
-    if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-      GenCollectedHeap* gch = GenCollectedHeap::heap();
-      if (_invocation_count != (unsigned int)gch->get_gen(1)->stat_record()->invocations) {
-        _full = true;
-      }
-    } else {
-#ifndef SERIALGC
-      if (_invocation_count != PSMarkSweep::total_invocations()) {
-        _full = true;
-      }
-#else  // SERIALGC
-      fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
-    }
+    _full = (_invocation_count != Universe::heap()->total_full_collections());
   }
 
   // Full collection probably means the perm generation has been GC'ed
--- a/hotspot/src/share/vm/prims/jvmtiTagMap.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiTagMap.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -400,16 +400,28 @@
 
 // get the memory region used for the young generation
 void JvmtiTagMap::get_young_generation() {
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    GenCollectedHeap* gch = GenCollectedHeap::heap();
-    _young_gen = gch->get_gen(0)->reserved();
-  } else {
+  CollectedHeap* ch = Universe::heap();
+  switch (ch->kind()) {
+    case (CollectedHeap::GenCollectedHeap): {
+      _young_gen = ((GenCollectedHeap*)ch)->get_gen(0)->reserved();
+      break;
+    }
 #ifndef SERIALGC
-    ParallelScavengeHeap* psh = ParallelScavengeHeap::heap();
-    _young_gen= psh->young_gen()->reserved();
-#else  // SERIALGC
-    fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
+    case (CollectedHeap::ParallelScavengeHeap): {
+      _young_gen = ((ParallelScavengeHeap*)ch)->young_gen()->reserved();
+      break;
+    }
+    case (CollectedHeap::G1CollectedHeap): {
+      // Until a more satisfactory solution is implemented, all
+      // oops in the tag map will require rehash at each gc.
+      // This is a correct, if extremely inefficient solution.
+      // See RFE 6621729 for related commentary.
+      _young_gen = ch->reserved_region();
+      break;
+    }
+#endif  // !SERIALGC
+    default:
+      ShouldNotReachHere();
   }
 }
 
--- a/hotspot/src/share/vm/prims/jvmtiTrace.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiTrace.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -73,7 +73,7 @@
 
   const char *very_end;
   const char *curr;
-  if (strlen(TraceJVMTI)) {
+  if (TraceJVMTI != NULL) {
     curr = TraceJVMTI;
   } else {
     curr = "";  // hack in fixed tracing here
--- a/hotspot/src/share/vm/prims/unsafe.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/prims/unsafe.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -891,6 +891,7 @@
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
   HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
+  update_barrier_set_pre((void*)addr, e);
   oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e);
   jboolean success  = (res == e);
   if (success)
--- a/hotspot/src/share/vm/runtime/aprofiler.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/aprofiler.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -33,6 +33,7 @@
 
 class AllocationProfiler: AllStatic {
   friend class GenCollectedHeap;
+  friend class G1CollectedHeap;
   friend class MarkSweep;
  private:
   static bool _active;                          // tells whether profiler is active
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -947,18 +947,17 @@
 // UseParNewGC and not explicitly set ParallelGCThreads we
 // set it, unless this is a single cpu machine.
 void Arguments::set_parnew_gc_flags() {
-  assert(!UseSerialGC && !UseParallelGC, "control point invariant");
+  assert(!UseSerialGC && !UseParallelGC && !UseG1GC,
+         "control point invariant");
+  assert(UseParNewGC, "Error");
 
   // Turn off AdaptiveSizePolicy by default for parnew until it is
   // complete.
-  if (UseParNewGC &&
-      FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
+  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
     FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
   }
 
-  if (FLAG_IS_DEFAULT(UseParNewGC) && ParallelGCThreads > 1) {
-    FLAG_SET_DEFAULT(UseParNewGC, true);
-  } else if (UseParNewGC && ParallelGCThreads == 0) {
+  if (ParallelGCThreads == 0) {
     FLAG_SET_DEFAULT(ParallelGCThreads,
                      Abstract_VM_Version::parallel_worker_threads());
     if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) {
@@ -994,15 +993,12 @@
 // further optimization and tuning efforts, and would almost
 // certainly gain from analysis of platform and environment.
 void Arguments::set_cms_and_parnew_gc_flags() {
-  if (UseSerialGC || UseParallelGC) {
-    return;
-  }
-
+  assert(!UseSerialGC && !UseParallelGC, "Error");
   assert(UseConcMarkSweepGC, "CMS is expected to be on here");
 
   // If we are using CMS, we prefer to UseParNewGC,
   // unless explicitly forbidden.
-  if (!UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) {
+  if (FLAG_IS_DEFAULT(UseParNewGC)) {
     FLAG_SET_ERGO(bool, UseParNewGC, true);
   }
 
@@ -1182,6 +1178,7 @@
     // machine class and automatic selection policy.
     if (!UseSerialGC &&
         !UseConcMarkSweepGC &&
+        !UseG1GC &&
         !UseParNewGC &&
         !DumpSharedSpaces &&
         FLAG_IS_DEFAULT(UseParallelGC)) {
@@ -1200,9 +1197,13 @@
   // Check that UseCompressedOops can be set with the max heap size allocated
   // by ergonomics.
   if (MaxHeapSize <= max_heap_for_compressed_oops()) {
-    if (FLAG_IS_DEFAULT(UseCompressedOops)) {
+    if (FLAG_IS_DEFAULT(UseCompressedOops) && !UseG1GC) {
       // Turn off until bug is fixed.
+      // the following line to return it to default status.
       // FLAG_SET_ERGO(bool, UseCompressedOops, true);
+    } else if (UseCompressedOops && UseG1GC) {
+      warning(" UseCompressedOops does not currently work with UseG1GC; switching off UseCompressedOops. ");
+      FLAG_SET_DEFAULT(UseCompressedOops, false);
     }
 #ifdef _WIN64
     if (UseLargePages && UseCompressedOops) {
@@ -1213,8 +1214,7 @@
 #endif //  _WIN64
   } else {
     if (UseCompressedOops && !FLAG_IS_DEFAULT(UseCompressedOops)) {
-      // If specified, give a warning
-      warning( "Max heap size too large for Compressed Oops");
+      warning("Max heap size too large for Compressed Oops");
       FLAG_SET_DEFAULT(UseCompressedOops, false);
     }
   }
@@ -1224,6 +1224,7 @@
 }
 
 void Arguments::set_parallel_gc_flags() {
+  assert(UseParallelGC || UseParallelOldGC, "Error");
   // If parallel old was requested, automatically enable parallel scavenge.
   if (UseParallelOldGC && !UseParallelGC && FLAG_IS_DEFAULT(UseParallelGC)) {
     FLAG_SET_DEFAULT(UseParallelGC, true);
@@ -1235,51 +1236,8 @@
     FLAG_SET_ERGO(uintx, ParallelGCThreads,
                   Abstract_VM_Version::parallel_worker_threads());
 
-    if (FLAG_IS_DEFAULT(MaxHeapSize)) {
-      const uint64_t reasonable_fraction =
-        os::physical_memory() / DefaultMaxRAMFraction;
-      const uint64_t maximum_size = (uint64_t)
-                 (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ?
-                     MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) :
-                     DefaultMaxRAM);
-      size_t reasonable_max =
-        (size_t) os::allocatable_physical_memory(reasonable_fraction);
-      if (reasonable_max > maximum_size) {
-        reasonable_max = maximum_size;
-      }
-      if (PrintGCDetails && Verbose) {
-        // Cannot use gclog_or_tty yet.
-        tty->print_cr("  Max heap size for server class platform "
-                      SIZE_FORMAT, reasonable_max);
-      }
-      // If the initial_heap_size has not been set with -Xms,
-      // then set it as fraction of size of physical memory
-      // respecting the maximum and minimum sizes of the heap.
-      if (initial_heap_size() == 0) {
-        const uint64_t reasonable_initial_fraction =
-          os::physical_memory() / DefaultInitialRAMFraction;
-        const size_t reasonable_initial =
-          (size_t) os::allocatable_physical_memory(reasonable_initial_fraction);
-        const size_t minimum_size = NewSize + OldSize;
-        set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max),
-                                  minimum_size));
-        // Currently the minimum size and the initial heap sizes are the same.
-        set_min_heap_size(initial_heap_size());
-        if (PrintGCDetails && Verbose) {
-          // Cannot use gclog_or_tty yet.
-          tty->print_cr("  Initial heap size for server class platform "
-                        SIZE_FORMAT, initial_heap_size());
-        }
-      } else {
-        // An minimum size was specified on the command line.  Be sure
-        // that the maximum size is consistent.
-        if (initial_heap_size() > reasonable_max) {
-          reasonable_max = initial_heap_size();
-        }
-      }
-      FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max);
-    }
-
+    // PS is a server collector, setup the heap sizes accordingly.
+    set_server_heap_size();
     // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
     // SurvivorRatio has been set, reset their default values to SurvivorRatio +
     // 2.  By doing this we make SurvivorRatio also work for Parallel Scavenger.
@@ -1307,6 +1265,70 @@
   }
 }
 
+void Arguments::set_g1_gc_flags() {
+  assert(UseG1GC, "Error");
+  // G1 is a server collector, setup the heap sizes accordingly.
+  set_server_heap_size();
+#ifdef COMPILER1
+  FastTLABRefill = false;
+#endif
+  FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads());
+  if (ParallelGCThreads == 0) {
+    FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads
+());
+  }
+  no_shared_spaces();
+}
+
+void Arguments::set_server_heap_size() {
+  if (FLAG_IS_DEFAULT(MaxHeapSize)) {
+    const uint64_t reasonable_fraction =
+      os::physical_memory() / DefaultMaxRAMFraction;
+    const uint64_t maximum_size = (uint64_t)
+                 (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ?
+                     MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) :
+                     DefaultMaxRAM);
+    size_t reasonable_max =
+      (size_t) os::allocatable_physical_memory(reasonable_fraction);
+    if (reasonable_max > maximum_size) {
+      reasonable_max = maximum_size;
+    }
+    if (PrintGCDetails && Verbose) {
+      // Cannot use gclog_or_tty yet.
+      tty->print_cr("  Max heap size for server class platform "
+                    SIZE_FORMAT, reasonable_max);
+    }
+    // If the initial_heap_size has not been set with -Xms,
+    // then set it as fraction of size of physical memory
+    // respecting the maximum and minimum sizes of the heap.
+    if (initial_heap_size() == 0) {
+      const uint64_t reasonable_initial_fraction =
+        os::physical_memory() / DefaultInitialRAMFraction;
+      const size_t reasonable_initial =
+        (size_t) os::allocatable_physical_memory(reasonable_initial_fraction);
+      const size_t minimum_size = NewSize + OldSize;
+      set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max),
+                                minimum_size));
+      // Currently the minimum size and the initial heap sizes are the same.
+      set_min_heap_size(initial_heap_size());
+      if (PrintGCDetails && Verbose) {
+        // Cannot use gclog_or_tty yet.
+        tty->print_cr("  Initial heap size for server class platform "
+                      SIZE_FORMAT, initial_heap_size());
+      }
+    } else {
+      // A minimum size was specified on the command line.  Be sure
+      // that the maximum size is consistent.
+      if (initial_heap_size() > reasonable_max) {
+        reasonable_max = initial_heap_size();
+      }
+    }
+    FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max);
+  }
+}
+
 // This must be called after ergonomics because we want bytecode rewriting
 // if the server compiler is used, or if UseSharedSpaces is disabled.
 void Arguments::set_bytecode_flags() {
@@ -1393,12 +1415,13 @@
   FLAG_SET_DEFAULT(UseConcMarkSweepGC, false);
   FLAG_SET_DEFAULT(UseParallelGC, false);
   FLAG_SET_DEFAULT(UseParallelOldGC, false);
+  FLAG_SET_DEFAULT(UseG1GC, false);
 }
 
 static bool verify_serial_gc_flags() {
   return (UseSerialGC &&
-        !(UseParNewGC || UseConcMarkSweepGC || UseParallelGC ||
-          UseParallelOldGC));
+        !(UseParNewGC || UseConcMarkSweepGC || UseG1GC ||
+          UseParallelGC || UseParallelOldGC));
 }
 
 // Check consistency of GC selection
@@ -1501,8 +1524,8 @@
   status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit");
 
   // Check user specified sharing option conflict with Parallel GC
-  bool cannot_share = (UseConcMarkSweepGC || UseParallelGC ||
-                       UseParallelOldGC || UseParNewGC ||
+  bool cannot_share = (UseConcMarkSweepGC || UseG1GC || UseParNewGC ||
+                       UseParallelGC || UseParallelOldGC ||
                        SOLARIS_ONLY(UseISM) NOT_SOLARIS(UseLargePages));
 
   if (cannot_share) {
@@ -1542,11 +1565,6 @@
                   "The CMS collector (-XX:+UseConcMarkSweepGC) must be "
                   "selected in order\nto use CMSIncrementalMode.\n");
       status = false;
-    } else if (!UseTLAB) {
-      jio_fprintf(defaultStream::error_stream(),
-                  "error:  CMSIncrementalMode requires thread-local "
-                  "allocation buffers\n(-XX:+UseTLAB).\n");
-      status = false;
     } else {
       status = status && verify_percentage(CMSIncrementalDutyCycle,
                                   "CMSIncrementalDutyCycle");
@@ -1566,13 +1584,6 @@
     }
   }
 
-  if (UseNUMA && !UseTLAB) {
-    jio_fprintf(defaultStream::error_stream(),
-                "error:  NUMA allocator (-XX:+UseNUMA) requires thread-local "
-                "allocation\nbuffers (-XX:+UseTLAB).\n");
-    status = false;
-  }
-
   // CMS space iteration, which FLSVerifyAllHeapreferences entails,
   // insists that we hold the requisite locks so that the iteration is
   // MT-safe. For the verification at start-up and shut-down, we don't
@@ -2361,10 +2372,15 @@
     SOLARIS_ONLY(FLAG_SET_DEFAULT(UseMPSS, false));
     SOLARIS_ONLY(FLAG_SET_DEFAULT(UseISM, false));
   }
+
 #else
   if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) {
     FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1);
   }
+  // Temporary disable bulk zeroing reduction with G1. See CR 6627983.
+  if (UseG1GC) {
+    FLAG_SET_DEFAULT(ReduceBulkZeroing, false);
+  }
 #endif
 
   if (!check_vm_args_consistency()) {
@@ -2519,12 +2535,29 @@
     }
   }
 
+
   // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS
   jint result = parse_vm_init_args(args);
   if (result != JNI_OK) {
     return result;
   }
 
+  // These are hacks until G1 is fully supported and tested
+  // but lets you force -XX:+UseG1GC in PRT and get it where it (mostly) works
+  if (UseG1GC) {
+    if (UseConcMarkSweepGC || UseParNewGC || UseParallelGC || UseParallelOldGC || UseSerialGC) {
+#ifndef PRODUCT
+      tty->print_cr("-XX:+UseG1GC is incompatible with other collectors, using UseG1GC");
+#endif // PRODUCT
+      UseConcMarkSweepGC = false;
+      UseParNewGC        = false;
+      UseParallelGC      = false;
+      UseParallelOldGC   = false;
+      UseSerialGC        = false;
+    }
+    no_shared_spaces();
+  }
+
 #ifndef PRODUCT
   if (TraceBytecodesAt != 0) {
     TraceBytecodes = true;
@@ -2570,6 +2603,12 @@
     // Set some flags for ParNew
     set_parnew_gc_flags();
   }
+  // Temporary; make the "if" an "else-if" before
+  // we integrate G1. XXX
+  if (UseG1GC) {
+    // Set some flags for garbage-first, if needed.
+    set_g1_gc_flags();
+  }
 
 #ifdef SERIALGC
   assert(verify_serial_gc_flags(), "SerialGC unset");
--- a/hotspot/src/share/vm/runtime/arguments.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/arguments.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -294,10 +294,14 @@
   // CMS/ParNew garbage collectors
   static void set_parnew_gc_flags();
   static void set_cms_and_parnew_gc_flags();
-  // UseParallelGC
+  // UseParallel[Old]GC
   static void set_parallel_gc_flags();
+  // Garbage-First (UseG1GC)
+  static void set_g1_gc_flags();
   // GC ergonomics
   static void set_ergonomics_flags();
+  // Setup heap size for a server platform
+  static void set_server_heap_size();
   // Based on automatic selection criteria, should the
   // low pause collector be used.
   static bool should_auto_select_low_pause_collector();
--- a/hotspot/src/share/vm/runtime/globals.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/globals.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,8 @@
 
 RUNTIME_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \
               MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \
-              MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG, \
+              MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \
+              MATERIALIZE_NOTPRODUCT_FLAG, \
               MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG, \
               MATERIALIZE_LP64_PRODUCT_FLAG)
 
@@ -37,12 +38,16 @@
                  MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
 
 bool Flag::is_unlocker() const {
-  return strcmp(name, "UnlockDiagnosticVMOptions") == 0;
+  return strcmp(name, "UnlockDiagnosticVMOptions") == 0     ||
+         strcmp(name, "UnlockExperimentalVMOptions") == 0;
+
 }
 
 bool Flag::is_unlocked() const {
   if (strcmp(kind, "{diagnostic}") == 0) {
     return UnlockDiagnosticVMOptions;
+  } else if (strcmp(kind, "{experimental}") == 0) {
+    return UnlockExperimentalVMOptions;
   } else {
     return true;
   }
@@ -125,6 +130,7 @@
 #define RUNTIME_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{product}", DEFAULT },
 #define RUNTIME_PD_PRODUCT_FLAG_STRUCT(type, name, doc)     { #type, XSTR(name), &name, "{pd product}", DEFAULT },
 #define RUNTIME_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{diagnostic}", DEFAULT },
+#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{experimental}", DEFAULT },
 #define RUNTIME_MANAGEABLE_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{manageable}", DEFAULT },
 #define RUNTIME_PRODUCT_RW_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{product rw}", DEFAULT },
 
@@ -172,8 +178,11 @@
 
 
 static Flag flagTable[] = {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT)
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_EXPERIMENTAL_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT)
  RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT)
+#ifndef SERIALGC
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_EXPERIMENTAL_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT)
+#endif // SERIALGC
 #ifdef COMPILER1
  C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, C1_PD_DEVELOP_FLAG_STRUCT, C1_PRODUCT_FLAG_STRUCT, C1_PD_PRODUCT_FLAG_STRUCT, C1_NOTPRODUCT_FLAG_STRUCT)
 #endif
@@ -196,7 +205,8 @@
   for (Flag* current = &flagTable[0]; current->name; current++) {
     if (str_equal(current->name, name, length)) {
       if (!(current->is_unlocked() || current->is_unlocker())) {
-        // disable use of diagnostic flags until they are unlocked
+        // disable use of diagnostic or experimental flags until they
+        // are explicitly unlocked
         return NULL;
       }
       return current;
@@ -355,8 +365,11 @@
   if (result == NULL) return false;
   if (!result->is_ccstr()) return false;
   ccstr old_value = result->get_ccstr();
-  char* new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
-  strcpy(new_value, *value);
+  char* new_value = NULL;
+  if (*value != NULL) {
+    new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
+    strcpy(new_value, *value);
+  }
   result->set_ccstr(new_value);
   if (result->origin == DEFAULT && old_value != NULL) {
     // Prior value is NOT heap allocated, but was a literal constant.
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -255,7 +255,19 @@
 // diagnostic information about VM problems.  To use a VM diagnostic
 // option, you must first specify +UnlockDiagnosticVMOptions.
 // (This master switch also affects the behavior of -Xprintflags.)
-
+//
+// experimental flags are in support of features that are not
+//    part of the officially supported product, but are available
+//    for experimenting with. They could, for example, be performance
+//    features that may not have undergone full or rigorous QA, but which may
+//    help performance in some cases and released for experimentation
+//    by the community of users and developers. This flag also allows one to
+//    be able to build a fully supported product that nonetheless also
+//    ships with some unsupported, lightly tested, experimental features.
+//    Like the UnlockDiagnosticVMOptions flag above, there is a corresponding
+//    UnlockExperimentalVMOptions flag, which allows the control and
+//    modification of the experimental flags.
+//
 // manageable flags are writeable external product flags.
 //    They are dynamically writeable through the JDK management interface
 //    (com.sun.management.HotSpotDiagnosticMXBean API) and also through JConsole.
@@ -285,7 +297,7 @@
 // Note that when there is a need to support develop flags to be writeable,
 // it can be done in the same way as product_rw.
 
-#define RUNTIME_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, manageable, product_rw, lp64_product) \
+#define RUNTIME_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw, lp64_product) \
                                                                             \
   lp64_product(bool, UseCompressedOops, false,                              \
             "Use 32-bit object references in 64-bit VM. "                   \
@@ -307,7 +319,10 @@
           "Prints flags that appeared on the command line")                 \
                                                                             \
   diagnostic(bool, UnlockDiagnosticVMOptions, trueInDebug,                  \
-          "Enable processing of flags relating to field diagnostics")       \
+          "Enable normal processing of flags relating to field diagnostics")\
+                                                                            \
+  experimental(bool, UnlockExperimentalVMOptions, false,                    \
+          "Enable normal processing of flags relating to experimental features")\
                                                                             \
   product(bool, JavaMonitorsInStackTrace, true,                             \
           "Print info. about Java monitor locks when the stacks are dumped")\
@@ -315,6 +330,12 @@
   product_pd(bool, UseLargePages,                                           \
           "Use large page memory")                                          \
                                                                             \
+  product_pd(bool, UseLargePagesIndividualAllocation,                       \
+          "Allocate large pages individually for better affinity")          \
+                                                                            \
+  develop(bool, LargePagesIndividualAllocationInjectError, false,           \
+          "Fail large pages individual allocation")                         \
+                                                                            \
   develop(bool, TracePageSizes, false,                                      \
           "Trace page size selection and usage.")                           \
                                                                             \
@@ -692,7 +713,7 @@
   diagnostic(bool, PrintAssembly, false,                                    \
           "Print assembly code (using external disassembler.so)")           \
                                                                             \
-  diagnostic(ccstr, PrintAssemblyOptions, false,                            \
+  diagnostic(ccstr, PrintAssemblyOptions, NULL,                             \
           "Options string passed to disassembler.so")                       \
                                                                             \
   diagnostic(bool, PrintNMethods, false,                                    \
@@ -833,7 +854,7 @@
           "Use LWP-based instead of libthread-based synchronization "       \
           "(SPARC only)")                                                   \
                                                                             \
-  product(ccstr, SyncKnobs, "",                                             \
+  product(ccstr, SyncKnobs, NULL,                                           \
           "(Unstable) Various monitor synchronization tunables")            \
                                                                             \
   product(intx, EmitSync, 0,                                                \
@@ -976,6 +997,12 @@
   product(bool, UseXmmI2F, false,                                           \
           "Use SSE2 CVTDQ2PS instruction to convert Integer to Float")      \
                                                                             \
+  product(bool, UseXMMForArrayCopy, false,                                  \
+          "Use SSE2 MOVQ instruction for Arraycopy")                        \
+                                                                            \
+  product(bool, UseUnalignedLoadStores, false,                              \
+          "Use SSE2 MOVDQU instruction for Arraycopy")                      \
+                                                                            \
   product(intx, FieldsAllocationStyle, 1,                                   \
           "0 - type based with oops first, 1 - with oops last")             \
                                                                             \
@@ -1017,7 +1044,7 @@
   notproduct(bool, TraceJVMCalls, false,                                    \
           "Trace JVM calls")                                                \
                                                                             \
-  product(ccstr, TraceJVMTI, "",                                            \
+  product(ccstr, TraceJVMTI, NULL,                                          \
           "Trace flags for JVMTI functions and events")                     \
                                                                             \
   /* This option can change an EMCP method into an obsolete method. */      \
@@ -1124,7 +1151,10 @@
   /* gc */                                                                  \
                                                                             \
   product(bool, UseSerialGC, false,                                         \
-          "Tells whether the VM should use serial garbage collector")       \
+          "Use the serial garbage collector")                               \
+                                                                            \
+  experimental(bool, UseG1GC, false,                                        \
+          "Use the Garbage-First garbage collector")                        \
                                                                             \
   product(bool, UseParallelGC, false,                                       \
           "Use the Parallel Scavenge garbage collector")                    \
@@ -1139,10 +1169,6 @@
           "In the Parallel Old garbage collector use parallel dense"        \
           " prefix update")                                                 \
                                                                             \
-  develop(bool, UseParallelOldGCChunkPointerCalc, true,                     \
-          "In the Parallel Old garbage collector use chucks to calculate"   \
-          " new object locations")                                          \
-                                                                            \
   product(uintx, HeapMaximumCompactionInterval, 20,                         \
           "How often should we maximally compact the heap (not allowing "   \
           "any dead space)")                                                \
@@ -1171,21 +1197,17 @@
   product(uintx, ParallelCMSThreads, 0,                                     \
           "Max number of threads CMS will use for concurrent work")         \
                                                                             \
-  develop(bool, VerifyParallelOldWithMarkSweep, false,                      \
-          "Use the MarkSweep code to verify phases of Parallel Old")        \
-                                                                            \
-  develop(uintx, VerifyParallelOldWithMarkSweepInterval, 1,                 \
-          "Interval at which the MarkSweep code is used to verify "         \
-          "phases of Parallel Old")                                         \
-                                                                            \
   develop(bool, ParallelOldMTUnsafeMarkBitMap, false,                       \
           "Use the Parallel Old MT unsafe in marking the bitmap")           \
                                                                             \
   develop(bool, ParallelOldMTUnsafeUpdateLiveData, false,                   \
           "Use the Parallel Old MT unsafe in update of live size")          \
                                                                             \
-  develop(bool, TraceChunkTasksQueuing, false,                              \
-          "Trace the queuing of the chunk tasks")                           \
+  develop(bool, TraceRegionTasksQueuing, false,                             \
+          "Trace the queuing of the region tasks")                          \
+                                                                            \
+  product(uintx, ParallelMarkingThreads, 0,                                 \
+          "Number of marking threads concurrent gc will use")               \
                                                                             \
   product(uintx, YoungPLABSize, 4096,                                       \
           "Size of young gen promotion labs (in HeapWords)")                \
@@ -1283,6 +1305,12 @@
           "The amount of young gen chosen by default per GC worker "        \
           "thread available ")                                              \
                                                                             \
+  product(bool, GCOverheadReporting, false,                                 \
+         "Enables the GC overhead reporting facility")                      \
+                                                                            \
+  product(intx, GCOverheadReportingPeriodMS, 100,                           \
+          "Reporting period for conc GC overhead reporting, in ms ")        \
+                                                                            \
   product(bool, CMSIncrementalMode, false,                                  \
           "Whether CMS GC should operate in \"incremental\" mode")          \
                                                                             \
@@ -1611,6 +1639,9 @@
   product(bool, ZeroTLAB, false,                                            \
           "Zero out the newly created TLAB")                                \
                                                                             \
+  product(bool, FastTLABRefill, true,                                       \
+          "Use fast TLAB refill code")                                      \
+                                                                            \
   product(bool, PrintTLAB, false,                                           \
           "Print various TLAB related information")                         \
                                                                             \
@@ -1800,6 +1831,9 @@
   diagnostic(bool, VerifyDuringGC, false,                                   \
           "Verify memory system during GC (between phases)")                \
                                                                             \
+  diagnostic(bool, GCParallelVerificationEnabled, true,                     \
+          "Enable parallel memory system verification")                     \
+                                                                            \
   diagnostic(bool, VerifyRememberedSets, false,                             \
           "Verify GC remembered sets")                                      \
                                                                             \
@@ -2527,7 +2561,7 @@
   develop(intx, MaxRecursiveInlineLevel, 1,                                 \
           "maximum number of nested recursive calls that are inlined")      \
                                                                             \
-  develop(intx, InlineSmallCode, 1000,                                      \
+  product(intx, InlineSmallCode, 1000,                                      \
           "Only inline already compiled methods if their code size is "     \
           "less than this")                                                 \
                                                                             \
@@ -2811,6 +2845,12 @@
           "how many entries we'll try to leave on the stack during "        \
           "parallel GC")                                                    \
                                                                             \
+  product(intx, DCQBarrierQueueBufferSize, 256,                             \
+          "Number of elements in a dirty card queue buffer")                \
+                                                                            \
+  product(intx, DCQBarrierProcessCompletedThreshold, 5,                     \
+          "Number of completed dirty card buffers to trigger processing.")  \
+                                                                            \
   /* stack parameters */                                                    \
   product_pd(intx, StackYellowPages,                                        \
           "Number of yellow zone (recoverable overflows) pages")            \
@@ -3232,6 +3272,7 @@
 #define DECLARE_PRODUCT_FLAG(type, name, value, doc)    extern "C" type name;
 #define DECLARE_PD_PRODUCT_FLAG(type, name, doc)        extern "C" type name;
 #define DECLARE_DIAGNOSTIC_FLAG(type, name, value, doc) extern "C" type name;
+#define DECLARE_EXPERIMENTAL_FLAG(type, name, value, doc) extern "C" type name;
 #define DECLARE_MANAGEABLE_FLAG(type, name, value, doc) extern "C" type name;
 #define DECLARE_PRODUCT_RW_FLAG(type, name, value, doc) extern "C" type name;
 #ifdef PRODUCT
@@ -3254,6 +3295,7 @@
 #define MATERIALIZE_PRODUCT_FLAG(type, name, value, doc)   type name = value;
 #define MATERIALIZE_PD_PRODUCT_FLAG(type, name, doc)       type name = pd_##name;
 #define MATERIALIZE_DIAGNOSTIC_FLAG(type, name, value, doc) type name = value;
+#define MATERIALIZE_EXPERIMENTAL_FLAG(type, name, value, doc) type name = value;
 #define MATERIALIZE_MANAGEABLE_FLAG(type, name, value, doc) type name = value;
 #define MATERIALIZE_PRODUCT_RW_FLAG(type, name, value, doc) type name = value;
 #ifdef PRODUCT
@@ -3271,6 +3313,6 @@
 #define MATERIALIZE_LP64_PRODUCT_FLAG(type, name, value, doc) /* flag is constant */
 #endif // _LP64
 
-RUNTIME_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG, DECLARE_LP64_PRODUCT_FLAG)
+RUNTIME_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG, DECLARE_LP64_PRODUCT_FLAG)
 
 RUNTIME_OS_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
--- a/hotspot/src/share/vm/runtime/globals_extension.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/globals_extension.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -30,6 +30,7 @@
 #define RUNTIME_PRODUCT_FLAG_MEMBER(type, name, value, doc)    FLAG_MEMBER(name),
 #define RUNTIME_PD_PRODUCT_FLAG_MEMBER(type, name, doc)        FLAG_MEMBER(name),
 #define RUNTIME_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
+#define RUNTIME_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
 #define RUNTIME_MANAGEABLE_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
 #define RUNTIME_PRODUCT_RW_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
 #ifdef PRODUCT
@@ -74,21 +75,16 @@
 #endif
 
 typedef enum {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER,
-               RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER,
-               RUNTIME_PRODUCT_RW_FLAG_MEMBER,
-               RUNTIME_LP64_PRODUCT_FLAG_MEMBER)
- RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER,
-               RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER)
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_EXPERIMENTAL_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER, RUNTIME_LP64_PRODUCT_FLAG_MEMBER)
+ RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER)
+#ifndef KERNEL
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_EXPERIMENTAL_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER)
+#endif
 #ifdef COMPILER1
- C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER,
-          C1_NOTPRODUCT_FLAG_MEMBER)
+ C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, C1_NOTPRODUCT_FLAG_MEMBER)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER,
-          C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
+ C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
 #endif
  NUM_CommandLineFlag
 } CommandLineFlag;
@@ -100,6 +96,7 @@
 #define RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)    FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc)        FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
+#define RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
 #define RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type),
 #ifdef PRODUCT
@@ -144,24 +141,47 @@
 #endif
 
 typedef enum {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_LP64_PRODUCT_FLAG_MEMBER_WITH_TYPE)
-RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+#ifndef KERNEL
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE)
+#endif // KERNEL
 #ifdef COMPILER1
- C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-          C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C1_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-          C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C2_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
  NUM_CommandLineFlagWithType
 } CommandLineFlagWithType;
--- a/hotspot/src/share/vm/runtime/mutexLocker.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/mutexLocker.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -47,7 +47,7 @@
 Monitor* JNICritical_lock             = NULL;
 Mutex*   JvmtiThreadState_lock        = NULL;
 Monitor* JvmtiPendingEvent_lock       = NULL;
-Mutex*   Heap_lock                    = NULL;
+Monitor* Heap_lock                    = NULL;
 Mutex*   ExpandHeap_lock              = NULL;
 Mutex*   AdapterHandlerLibrary_lock   = NULL;
 Mutex*   SignatureHandlerLibrary_lock = NULL;
@@ -67,7 +67,18 @@
 Monitor* SLT_lock                     = NULL;
 Monitor* iCMS_lock                    = NULL;
 Monitor* FullGCCount_lock             = NULL;
+Monitor* CMark_lock                   = NULL;
+Monitor* ZF_mon                       = NULL;
+Monitor* Cleanup_mon                  = NULL;
+Monitor* G1ConcRefine_mon             = NULL;
+Mutex*   SATB_Q_FL_lock               = NULL;
+Monitor* SATB_Q_CBL_mon               = NULL;
+Mutex*   Shared_SATB_Q_lock           = NULL;
+Mutex*   DirtyCardQ_FL_lock           = NULL;
+Monitor* DirtyCardQ_CBL_mon           = NULL;
+Mutex*   Shared_DirtyCardQ_lock       = NULL;
 Mutex*   ParGCRareEvent_lock          = NULL;
+Mutex*   EvacFailureStack_lock        = NULL;
 Mutex*   DerivedPointerTableGC_lock   = NULL;
 Mutex*   Compile_lock                 = NULL;
 Monitor* MethodCompileQueue_lock      = NULL;
@@ -102,6 +113,9 @@
 Mutex*   PerfDataManager_lock         = NULL;
 Mutex*   OopMapCacheAlloc_lock        = NULL;
 
+Mutex*   MMUTracker_lock              = NULL;
+Mutex*   HotCardCache_lock            = NULL;
+
 Monitor* GCTaskManager_lock           = NULL;
 
 Mutex*   Management_lock              = NULL;
@@ -150,6 +164,23 @@
     def(iCMS_lock                  , Monitor, special,     true ); // CMS incremental mode start/stop notification
     def(FullGCCount_lock           , Monitor, leaf,        true ); // in support of ExplicitGCInvokesConcurrent
   }
+  if (UseG1GC) {
+    def(CMark_lock                 , Monitor, nonleaf,     true ); // coordinate concurrent mark thread
+    def(ZF_mon                     , Monitor, leaf,        true );
+    def(Cleanup_mon                , Monitor, nonleaf,     true );
+    def(G1ConcRefine_mon           , Monitor, nonleaf,     true );
+    def(SATB_Q_FL_lock             , Mutex  , special,     true );
+    def(SATB_Q_CBL_mon             , Monitor, nonleaf,     true );
+    def(Shared_SATB_Q_lock         , Mutex,   nonleaf,     true );
+
+    def(DirtyCardQ_FL_lock         , Mutex  , special,     true );
+    def(DirtyCardQ_CBL_mon         , Monitor, nonleaf,     true );
+    def(Shared_DirtyCardQ_lock     , Mutex,   nonleaf,     true );
+
+    def(MMUTracker_lock            , Mutex  , leaf     ,   true );
+    def(HotCardCache_lock          , Mutex  , special  ,   true );
+    def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
+  }
   def(ParGCRareEvent_lock          , Mutex  , leaf     ,   true );
   def(DerivedPointerTableGC_lock   , Mutex,   leaf,        true );
   def(CodeCache_lock               , Mutex  , special,     true );
@@ -203,7 +234,7 @@
     def(SLT_lock                   , Monitor, nonleaf,     false );
                     // used in CMS GC for locking PLL lock
   }
-  def(Heap_lock                    , Mutex  , nonleaf+1,   false);
+  def(Heap_lock                    , Monitor, nonleaf+1,   false);
   def(JfieldIdCreation_lock        , Mutex  , nonleaf+1,   true ); // jfieldID, Used in VM_Operation
   def(JNICachedItableIndex_lock    , Mutex  , nonleaf+1,   false); // Used to cache an itable index during JNI invoke
 
--- a/hotspot/src/share/vm/runtime/mutexLocker.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/mutexLocker.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -38,7 +38,7 @@
 extern Monitor* JNICritical_lock;                // a lock used while entering and exiting JNI critical regions, allows GC to sometimes get in
 extern Mutex*   JvmtiThreadState_lock;           // a lock on modification of JVMTI thread data
 extern Monitor* JvmtiPendingEvent_lock;          // a lock on the JVMTI pending events list
-extern Mutex*   Heap_lock;                       // a lock on the heap
+extern Monitor* Heap_lock;                       // a lock on the heap
 extern Mutex*   ExpandHeap_lock;                 // a lock on expanding the heap
 extern Mutex*   AdapterHandlerLibrary_lock;      // a lock on the AdapterHandlerLibrary
 extern Mutex*   SignatureHandlerLibrary_lock;    // a lock on the SignatureHandlerLibrary
@@ -60,8 +60,30 @@
 extern Monitor* SLT_lock;                        // used in CMS GC for acquiring PLL
 extern Monitor* iCMS_lock;                       // CMS incremental mode start/stop notification
 extern Monitor* FullGCCount_lock;                // in support of "concurrent" full gc
+extern Monitor* CMark_lock;                      // used for concurrent mark thread coordination
+extern Monitor* ZF_mon;                          // used for G1 conc zero-fill.
+extern Monitor* Cleanup_mon;                     // used for G1 conc cleanup.
+extern Monitor* G1ConcRefine_mon;                // used for G1 conc-refine
+                                                 // coordination.
+
+extern Mutex*   SATB_Q_FL_lock;                  // Protects SATB Q
+                                                 // buffer free list.
+extern Monitor* SATB_Q_CBL_mon;                  // Protects SATB Q
+                                                 // completed buffer queue.
+extern Mutex*   Shared_SATB_Q_lock;              // Lock protecting SATB
+                                                 // queue shared by
+                                                 // non-Java threads.
+
+extern Mutex*   DirtyCardQ_FL_lock;              // Protects dirty card Q
+                                                 // buffer free list.
+extern Monitor* DirtyCardQ_CBL_mon;              // Protects dirty card Q
+                                                 // completed buffer queue.
+extern Mutex*   Shared_DirtyCardQ_lock;          // Lock protecting dirty card
+                                                 // queue shared by
+                                                 // non-Java threads.
                                                  // (see option ExplicitGCInvokesConcurrent)
 extern Mutex*   ParGCRareEvent_lock;             // Synchronizes various (rare) parallel GC ops.
+extern Mutex*   EvacFailureStack_lock;           // guards the evac failure scan stack
 extern Mutex*   Compile_lock;                    // a lock held when Compilation is updating code (used to block CodeCache traversal, CHA updates, etc)
 extern Monitor* MethodCompileQueue_lock;         // a lock held when method compilations are enqueued, dequeued
 #ifdef TIERED
@@ -93,6 +115,10 @@
 extern Mutex*   ParkerFreeList_lock;
 extern Mutex*   OopMapCacheAlloc_lock;           // protects allocation of oop_map caches
 
+extern Mutex*   MMUTracker_lock;                 // protects the MMU
+                                                 // tracker data structures
+extern Mutex*   HotCardCache_lock;               // protects the hot card cache
+
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* LowMemory_lock;                  // a lock used for low memory detection
 
--- a/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -105,6 +105,18 @@
   static jlong elapsed_counter();
   static jlong elapsed_frequency();
 
+  // The "virtual time" of a thread is the amount of time a thread has
+  // actually run.  The first function indicates whether the OS supports
+  // this functionality for the current thread, and if so:
+  //   * the second enables vtime tracking (if that is required).
+  //   * the third tells whether vtime is enabled.
+  //   * the fourth returns the elapsed virtual time for the current
+  //     thread.
+  static bool supports_vtime();
+  static bool enable_vtime();
+  static bool vtime_enabled();
+  static double elapsedVTime();
+
   // Return current local time in a string (YYYY-MM-DD HH:MM:SS).
   // It is MT safe, but not async-safe, as reading time zone
   // information may require a lock on some platforms.
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -111,6 +111,25 @@
 }
 #endif // PRODUCT
 
+#ifndef SERIALGC
+
+// G1 write-barrier pre: executed before a pointer store.
+JRT_LEAF(void, SharedRuntime::g1_wb_pre(oopDesc* orig, JavaThread *thread))
+  if (orig == NULL) {
+    assert(false, "should be optimized out");
+    return;
+  }
+  // store the original value that was in the field reference
+  thread->satb_mark_queue().enqueue(orig);
+JRT_END
+
+// G1 write-barrier post: executed after a pointer store.
+JRT_LEAF(void, SharedRuntime::g1_wb_post(void* card_addr, JavaThread* thread))
+  thread->dirty_card_queue().enqueue(card_addr);
+JRT_END
+
+#endif // !SERIALGC
+
 
 JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x))
   return x * y;
--- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -99,6 +99,12 @@
   static address raw_exception_handler_for_return_address(address return_address);
   static address exception_handler_for_return_address(address return_address);
 
+#ifndef SERIALGC
+  // G1 write barriers
+  static void g1_wb_pre(oopDesc* orig, JavaThread *thread);
+  static void g1_wb_post(void* card_addr, JavaThread* thread);
+#endif // !SERIALGC
+
   // exception handling and implicit exceptions
   static address compute_compiled_exc_handler(nmethod* nm, address ret_pc, Handle& exception,
                                               bool force_unwind, bool top_frame_only);
--- a/hotspot/src/share/vm/runtime/task.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/task.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -67,7 +67,6 @@
 
 PeriodicTask::PeriodicTask(size_t interval_time) :
   _counter(0), _interval(interval_time) {
-  assert(is_init_completed(), "Periodic tasks should not start during VM initialization");
   // Sanity check the interval time
   assert(_interval >= PeriodicTask::min_interval &&
          _interval <= PeriodicTask::max_interval &&
--- a/hotspot/src/share/vm/runtime/thread.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -1138,6 +1138,10 @@
 
 void JavaThread::initialize() {
   // Initialize fields
+
+  // Set the claimed par_id to -1 (ie not claiming any par_ids)
+  set_claimed_par_id(-1);
+
   set_saved_exception_pc(NULL);
   set_threadObj(NULL);
   _anchor.clear();
@@ -1209,7 +1213,18 @@
   pd_initialize();
 }
 
-JavaThread::JavaThread(bool is_attaching) : Thread() {
+#ifndef SERIALGC
+SATBMarkQueueSet JavaThread::_satb_mark_queue_set;
+DirtyCardQueueSet JavaThread::_dirty_card_queue_set;
+#endif // !SERIALGC
+
+JavaThread::JavaThread(bool is_attaching) :
+  Thread()
+#ifndef SERIALGC
+  , _satb_mark_queue(&_satb_mark_queue_set),
+  _dirty_card_queue(&_dirty_card_queue_set)
+#endif // !SERIALGC
+{
   initialize();
   _is_attaching = is_attaching;
 }
@@ -1255,7 +1270,13 @@
 // Remove this ifdef when C1 is ported to the compiler interface.
 static void compiler_thread_entry(JavaThread* thread, TRAPS);
 
-JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : Thread() {
+JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) :
+  Thread()
+#ifndef SERIALGC
+  , _satb_mark_queue(&_satb_mark_queue_set),
+  _dirty_card_queue(&_dirty_card_queue_set)
+#endif // !SERIALGC
+{
   if (TraceThreadEvents) {
     tty->print_cr("creating thread %p", this);
   }
@@ -2964,10 +2985,6 @@
       if (UseStringCache) {
         // Forcibly initialize java/lang/String and mutate the private
         // static final "stringCacheEnabled" field before we start creating instances
-#ifdef ASSERT
-        klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0);
-        assert(tmp_k == NULL, "java/lang/String should not be loaded yet");
-#endif
         klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0);
         KlassHandle k = KlassHandle(THREAD, k_o);
         guarantee(k.not_null(), "Must find java/lang/String");
@@ -3071,9 +3088,14 @@
 
 #ifndef SERIALGC
   // Support for ConcurrentMarkSweep. This should be cleaned up
-  // and better encapsulated. XXX YSR
-  if (UseConcMarkSweepGC) {
-    ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD);
+  // and better encapsulated. The ugly nested if test would go away
+  // once things are properly refactored. XXX YSR
+  if (UseConcMarkSweepGC || UseG1GC) {
+    if (UseConcMarkSweepGC) {
+      ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD);
+    } else {
+      ConcurrentMarkThread::makeSurrogateLockerThread(THREAD);
+    }
     if (HAS_PENDING_EXCEPTION) {
       vm_exit_during_initialization(Handle(THREAD, PENDING_EXCEPTION));
     }
--- a/hotspot/src/share/vm/runtime/thread.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/thread.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -783,6 +783,18 @@
   }   _jmp_ring[ jump_ring_buffer_size ];
 #endif /* PRODUCT */
 
+#ifndef SERIALGC
+  // Support for G1 barriers
+
+  ObjPtrQueue _satb_mark_queue;          // Thread-local log for SATB barrier.
+  // Set of all such queues.
+  static SATBMarkQueueSet _satb_mark_queue_set;
+
+  DirtyCardQueue _dirty_card_queue;      // Thread-local log for dirty cards.
+  // Set of all such queues.
+  static DirtyCardQueueSet _dirty_card_queue_set;
+#endif // !SERIALGC
+
   friend class VMThread;
   friend class ThreadWaitTransition;
   friend class VM_Exit;
@@ -1168,6 +1180,11 @@
 
   static ByteSize do_not_unlock_if_synchronized_offset() { return byte_offset_of(JavaThread, _do_not_unlock_if_synchronized); }
 
+#ifndef SERIALGC
+  static ByteSize satb_mark_queue_offset()       { return byte_offset_of(JavaThread, _satb_mark_queue); }
+  static ByteSize dirty_card_queue_offset()      { return byte_offset_of(JavaThread, _dirty_card_queue); }
+#endif // !SERIALGC
+
   // Returns the jni environment for this thread
   JNIEnv* jni_environment()                      { return &_jni_environment; }
 
@@ -1414,6 +1431,20 @@
     _stack_size_at_create = value;
   }
 
+#ifndef SERIALGC
+  // SATB marking queue support
+  ObjPtrQueue& satb_mark_queue() { return _satb_mark_queue; }
+  static SATBMarkQueueSet& satb_mark_queue_set() {
+    return _satb_mark_queue_set;
+  }
+
+  // Dirty card queue support
+  DirtyCardQueue& dirty_card_queue() { return _dirty_card_queue; }
+  static DirtyCardQueueSet& dirty_card_queue_set() {
+    return _dirty_card_queue_set;
+  }
+#endif // !SERIALGC
+
   // Machine dependent stuff
   #include "incls/_thread_pd.hpp.incl"
 
@@ -1445,6 +1476,14 @@
   // clearing/querying jni attach status
   bool is_attaching() const { return _is_attaching; }
   void set_attached() { _is_attaching = false; OrderAccess::fence(); }
+private:
+  // This field is used to determine if a thread has claimed
+  // a par_id: it is -1 if the thread has not claimed a par_id;
+  // otherwise its value is the par_id that has been claimed.
+  int _claimed_par_id;
+public:
+  int get_claimed_par_id() { return _claimed_par_id; }
+  void set_claimed_par_id(int id) { _claimed_par_id = id;}
 };
 
 // Inline implementation of JavaThread::current
--- a/hotspot/src/share/vm/runtime/virtualspace.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/virtualspace.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -251,24 +251,16 @@
       // increase size to a multiple of the desired alignment
       size = align_size_up(size, alignment);
       size_t extra_size = size + alignment;
-      char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
-      if (extra_base == NULL) return;
-      // Do manual alignement
-      base = (char*) align_size_up((uintptr_t) extra_base, alignment);
-      assert(base >= extra_base, "just checking");
-      // Release unused areas
-      size_t unused_bottom_size = base - extra_base;
-      size_t unused_top_size = extra_size - size - unused_bottom_size;
-      assert(unused_bottom_size % os::vm_allocation_granularity() == 0,
-             "size not allocation aligned");
-      assert(unused_top_size % os::vm_allocation_granularity() == 0,
-             "size not allocation aligned");
-      if (unused_bottom_size > 0) {
-        os::release_memory(extra_base, unused_bottom_size);
-      }
-      if (unused_top_size > 0) {
-        os::release_memory(base + size, unused_top_size);
-      }
+      do {
+        char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+        if (extra_base == NULL) return;
+        // Do manual alignement
+        base = (char*) align_size_up((uintptr_t) extra_base, alignment);
+        assert(base >= extra_base, "just checking");
+        // Re-reserve the region at the aligned base address.
+        os::release_memory(extra_base, extra_size);
+        base = os::reserve_memory(size, base);
+      } while (base == NULL);
     }
   }
   // Done
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -302,7 +302,7 @@
   nonstatic_field(CardTableModRefBS,           _guard_region,                                 MemRegion)                             \
   nonstatic_field(CardTableModRefBS,           byte_map_base,                                 jbyte*)                                \
                                                                                                                                      \
-  nonstatic_field(CardTableRS,                 _ct_bs,                                        CardTableModRefBS)                     \
+  nonstatic_field(CardTableRS,                 _ct_bs,                                        CardTableModRefBSForCTRS*)             \
                                                                                                                                      \
   nonstatic_field(CollectedHeap,               _reserved,                                     MemRegion)                             \
   nonstatic_field(SharedHeap,                  _perm_gen,                                     PermGen*)                              \
@@ -995,6 +995,7 @@
   declare_toplevel_type(BarrierSet)                                       \
            declare_type(ModRefBarrierSet,             BarrierSet)         \
            declare_type(CardTableModRefBS,            ModRefBarrierSet)   \
+           declare_type(CardTableModRefBSForCTRS,     CardTableModRefBS)  \
   declare_toplevel_type(GenRemSet)                                        \
            declare_type(CardTableRS,                  GenRemSet)          \
   declare_toplevel_type(BlockOffsetSharedArray)                           \
@@ -1022,6 +1023,10 @@
   declare_toplevel_type(BlockOffsetSharedArray*)                          \
   declare_toplevel_type(GenRemSet*)                                       \
   declare_toplevel_type(CardTableRS*)                                     \
+  declare_toplevel_type(CardTableModRefBS*)                               \
+  declare_toplevel_type(CardTableModRefBS**)                              \
+  declare_toplevel_type(CardTableModRefBSForCTRS*)                        \
+  declare_toplevel_type(CardTableModRefBSForCTRS**)                       \
   declare_toplevel_type(CollectedHeap*)                                   \
   declare_toplevel_type(ContiguousSpace*)                                 \
   declare_toplevel_type(DefNewGeneration*)                                \
--- a/hotspot/src/share/vm/runtime/vm_operations.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/runtime/vm_operations.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -53,8 +53,13 @@
   template(ParallelGCFailedAllocation)            \
   template(ParallelGCFailedPermanentAllocation)   \
   template(ParallelGCSystemGC)                    \
+  template(CGC_Operation)                         \
   template(CMS_Initial_Mark)                      \
   template(CMS_Final_Remark)                      \
+  template(G1CollectFull)                         \
+  template(G1CollectForAllocation)                \
+  template(G1IncCollectionPause)                  \
+  template(G1PopRegionCollectionPause)            \
   template(EnableBiasedLocking)                   \
   template(RevokeBias)                            \
   template(BulkRevokeBias)                        \
--- a/hotspot/src/share/vm/services/heapDumper.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/services/heapDumper.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -343,7 +343,8 @@
 
 // Default stack trace ID (used for dummy HPROF_TRACE record)
 enum {
-  STACK_TRACE_ID = 1
+  STACK_TRACE_ID = 1,
+  INITIAL_CLASS_COUNT = 200
 };
 
 
@@ -408,6 +409,7 @@
   void write_u8(u8 x);
   void write_objectID(oop o);
   void write_classID(Klass* k);
+  void write_id(u4 x);
 };
 
 DumpWriter::DumpWriter(const char* path) {
@@ -548,6 +550,14 @@
 #endif
 }
 
+void DumpWriter::write_id(u4 x) {
+#ifdef _LP64
+  write_u8((u8) x);
+#else
+  write_u4(x);
+#endif
+}
+
 // We use java mirror as the class ID
 void DumpWriter::write_classID(Klass* k) {
   write_objectID(k->java_mirror());
@@ -596,6 +606,8 @@
   static void dump_object_array(DumpWriter* writer, objArrayOop array);
   // creates HPROF_GC_PRIM_ARRAY_DUMP record for the given type array
   static void dump_prim_array(DumpWriter* writer, typeArrayOop array);
+  // create HPROF_FRAME record for the given method and bci
+  static void dump_stack_frame(DumpWriter* writer, int frame_serial_num, int class_serial_num, methodOop m, int bci);
 };
 
 // write a header of the given type
@@ -1070,6 +1082,29 @@
   }
 }
 
+// create a HPROF_FRAME record of the given methodOop and bci
+void DumperSupport::dump_stack_frame(DumpWriter* writer,
+                                     int frame_serial_num,
+                                     int class_serial_num,
+                                     methodOop m,
+                                     int bci) {
+  int line_number;
+  if (m->is_native()) {
+    line_number = -3;  // native frame
+  } else {
+    line_number = m->line_number_from_bci(bci);
+  }
+
+  write_header(writer, HPROF_FRAME, 4*oopSize + 2*sizeof(u4));
+  writer->write_id(frame_serial_num);               // frame serial number
+  writer->write_objectID(m->name());                // method's name
+  writer->write_objectID(m->signature());           // method's signature
+
+  assert(Klass::cast(m->method_holder())->oop_is_instance(), "not instanceKlass");
+  writer->write_objectID(instanceKlass::cast(m->method_holder())->source_file_name());  // source file name
+  writer->write_u4(class_serial_num);               // class serial number
+  writer->write_u4((u4) line_number);               // line number
+}
 
 // Support class used to generate HPROF_UTF8 records from the entries in the
 // SymbolTable.
@@ -1104,12 +1139,15 @@
  private:
   DumpWriter* _writer;
   u4 _thread_serial_num;
+  int _frame_num;
   DumpWriter* writer() const                { return _writer; }
  public:
   JNILocalsDumper(DumpWriter* writer, u4 thread_serial_num) {
     _writer = writer;
     _thread_serial_num = thread_serial_num;
+    _frame_num = -1;  // default - empty stack
   }
+  void set_frame_number(int n) { _frame_num = n; }
   void do_oop(oop* obj_p);
   void do_oop(narrowOop* obj_p) { ShouldNotReachHere(); }
 };
@@ -1122,7 +1160,7 @@
     writer()->write_u1(HPROF_GC_ROOT_JNI_LOCAL);
     writer()->write_objectID(o);
     writer()->write_u4(_thread_serial_num);
-    writer()->write_u4((u4)-1); // empty
+    writer()->write_u4((u4)_frame_num);
   }
 }
 
@@ -1269,6 +1307,9 @@
   bool _gc_before_heap_dump;
   bool _is_segmented_dump;
   jlong _dump_start;
+  GrowableArray<Klass*>* _klass_map;
+  ThreadStackTrace** _stack_traces;
+  int _num_threads;
 
   // accessors
   DumpWriter* writer() const                    { return _writer; }
@@ -1291,9 +1332,16 @@
   static void do_basic_type_array_class_dump(klassOop k);
 
   // HPROF_GC_ROOT_THREAD_OBJ records
-  void do_thread(JavaThread* thread, u4 thread_serial_num);
+  int do_thread(JavaThread* thread, u4 thread_serial_num);
   void do_threads();
 
+  void add_class_serial_number(Klass* k, int serial_num) {
+    _klass_map->at_put_grow(serial_num, k);
+  }
+
+  // HPROF_TRACE and HPROF_FRAME records
+  void dump_stack_traces();
+
   // writes a HPROF_HEAP_DUMP or HPROF_HEAP_DUMP_SEGMENT record
   void write_dump_header();
 
@@ -1313,6 +1361,18 @@
     _gc_before_heap_dump = gc_before_heap_dump;
     _is_segmented_dump = false;
     _dump_start = (jlong)-1;
+    _klass_map = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(INITIAL_CLASS_COUNT, true);
+    _stack_traces = NULL;
+    _num_threads = 0;
+  }
+  ~VM_HeapDumper() {
+    if (_stack_traces != NULL) {
+      for (int i=0; i < _num_threads; i++) {
+        delete _stack_traces[i];
+      }
+      FREE_C_HEAP_ARRAY(ThreadStackTrace*, _stack_traces);
+    }
+    delete _klass_map;
   }
 
   VMOp_Type type() const { return VMOp_HeapDumper; }
@@ -1436,6 +1496,9 @@
     Klass* klass = Klass::cast(k);
     writer->write_classID(klass);
 
+    // add the klassOop and class serial number pair
+    dumper->add_class_serial_number(klass, class_serial_num);
+
     writer->write_u4(STACK_TRACE_ID);
 
     // class name ID
@@ -1465,15 +1528,15 @@
 // Walk the stack of the given thread.
 // Dumps a HPROF_GC_ROOT_JAVA_FRAME record for each local
 // Dumps a HPROF_GC_ROOT_JNI_LOCAL record for each JNI local
-void VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) {
+//
+// It returns the number of Java frames in this thread stack
+int VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) {
   JNILocalsDumper blk(writer(), thread_serial_num);
 
   oop threadObj = java_thread->threadObj();
   assert(threadObj != NULL, "sanity check");
 
-  // JNI locals for the top frame
-  java_thread->active_handles()->oops_do(&blk);
-
+  int stack_depth = 0;
   if (java_thread->has_last_Java_frame()) {
 
     // vframes are resource allocated
@@ -1484,13 +1547,14 @@
     RegisterMap reg_map(java_thread);
     frame f = java_thread->last_frame();
     vframe* vf = vframe::new_vframe(&f, &reg_map, java_thread);
+    frame* last_entry_frame = NULL;
 
     while (vf != NULL) {
+      blk.set_frame_number(stack_depth);
       if (vf->is_java_frame()) {
 
         // java frame (interpreted, compiled, ...)
         javaVFrame *jvf = javaVFrame::cast(vf);
-
         if (!(jvf->method()->is_native())) {
           StackValueCollection* locals = jvf->locals();
           for (int slot=0; slot<locals->size(); slot++) {
@@ -1501,44 +1565,61 @@
                 writer()->write_u1(HPROF_GC_ROOT_JAVA_FRAME);
                 writer()->write_objectID(o);
                 writer()->write_u4(thread_serial_num);
-                writer()->write_u4((u4)-1); // empty
+                writer()->write_u4((u4) stack_depth);
               }
             }
           }
+        } else {
+          // native frame
+          if (stack_depth == 0) {
+            // JNI locals for the top frame.
+            java_thread->active_handles()->oops_do(&blk);
+          } else {
+            if (last_entry_frame != NULL) {
+              // JNI locals for the entry frame
+              assert(last_entry_frame->is_entry_frame(), "checking");
+              last_entry_frame->entry_frame_call_wrapper()->handles()->oops_do(&blk);
+            }
+          }
         }
-      } else {
+        // increment only for Java frames
+        stack_depth++;
+        last_entry_frame = NULL;
 
+      } else {
         // externalVFrame - if it's an entry frame then report any JNI locals
-        // as roots
+        // as roots when we find the corresponding native javaVFrame
         frame* fr = vf->frame_pointer();
         assert(fr != NULL, "sanity check");
         if (fr->is_entry_frame()) {
-          fr->entry_frame_call_wrapper()->handles()->oops_do(&blk);
+          last_entry_frame = fr;
         }
       }
-
       vf = vf->sender();
     }
+  } else {
+    // no last java frame but there may be JNI locals
+    java_thread->active_handles()->oops_do(&blk);
   }
+  return stack_depth;
 }
 
 
 // write a HPROF_GC_ROOT_THREAD_OBJ record for each java thread. Then walk
 // the stack so that locals and JNI locals are dumped.
 void VM_HeapDumper::do_threads() {
-  u4 thread_serial_num = 0;
-  for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) {
+  for (int i=0; i < _num_threads; i++) {
+    JavaThread* thread = _stack_traces[i]->thread();
     oop threadObj = thread->threadObj();
-    if (threadObj != NULL && !thread->is_exiting() && !thread->is_hidden_from_external_view()) {
-      ++thread_serial_num;
-
-      writer()->write_u1(HPROF_GC_ROOT_THREAD_OBJ);
-      writer()->write_objectID(threadObj);
-      writer()->write_u4(thread_serial_num);
-      writer()->write_u4(STACK_TRACE_ID);
-
-      do_thread(thread, thread_serial_num);
-    }
+    u4 thread_serial_num = i+1;
+    u4 stack_serial_num = thread_serial_num + STACK_TRACE_ID;
+    writer()->write_u1(HPROF_GC_ROOT_THREAD_OBJ);
+    writer()->write_objectID(threadObj);
+    writer()->write_u4(thread_serial_num);  // thread number
+    writer()->write_u4(stack_serial_num);   // stack trace serial number
+    int num_frames = do_thread(thread, thread_serial_num);
+    assert(num_frames == _stack_traces[i]->get_stack_depth(),
+           "total number of Java frames not matched");
   }
 }
 
@@ -1547,16 +1628,16 @@
 // records:
 //
 //  HPROF_HEADER
-//  HPROF_TRACE
 //  [HPROF_UTF8]*
 //  [HPROF_LOAD_CLASS]*
+//  [[HPROF_FRAME]*|HPROF_TRACE]*
 //  [HPROF_GC_CLASS_DUMP]*
 //  HPROF_HEAP_DUMP
 //
-// The HPROF_TRACE record after the header is "dummy trace" record which does
-// not include any frames. Other records which require a stack trace ID will
-// specify the trace ID of this record (1). It also means we can run HAT without
-// needing the -stack false option.
+// The HPROF_TRACE records represent the stack traces where the heap dump
+// is generated and a "dummy trace" record which does not include
+// any frames. The dummy trace record is used to be referenced as the
+// unknown object alloc site.
 //
 // The HPROF_HEAP_DUMP record has a length following by sub-records. To allow
 // the heap dump be generated in a single pass we remember the position of
@@ -1578,17 +1659,8 @@
   }
 
   // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1
-  size_t used;
+  size_t used = ch->used();
   const char* header;
-#ifndef SERIALGC
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    used = GenCollectedHeap::heap()->used();
-  } else {
-    used = ParallelScavengeHeap::heap()->used();
-  }
-#else // SERIALGC
-  used = GenCollectedHeap::heap()->used();
-#endif // SERIALGC
   if (used > (size_t)SegmentedHeapDumpThreshold) {
     set_segmented_dump();
     header = "JAVA PROFILE 1.0.2";
@@ -1601,12 +1673,6 @@
   writer()->write_u4(oopSize);
   writer()->write_u8(os::javaTimeMillis());
 
-  // HPROF_TRACE record without any frames
-  DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4));
-  writer()->write_u4(STACK_TRACE_ID);
-  writer()->write_u4(0);                    // thread number
-  writer()->write_u4(0);                    // frame count
-
   // HPROF_UTF8 records
   SymbolTableDumper sym_dumper(writer());
   SymbolTable::oops_do(&sym_dumper);
@@ -1615,6 +1681,10 @@
   SystemDictionary::classes_do(&do_load_class);
   Universe::basic_type_classes_do(&do_load_class);
 
+  // write HPROF_FRAME and HPROF_TRACE records
+  // this must be called after _klass_map is built when iterating the classes above.
+  dump_stack_traces();
+
   // write HPROF_HEAP_DUMP or HPROF_HEAP_DUMP_SEGMENT
   write_dump_header();
 
@@ -1655,6 +1725,47 @@
   end_of_dump();
 }
 
+void VM_HeapDumper::dump_stack_traces() {
+  // write a HPROF_TRACE record without any frames to be referenced as object alloc sites
+  DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4));
+  writer()->write_u4((u4) STACK_TRACE_ID);
+  writer()->write_u4(0);                    // thread number
+  writer()->write_u4(0);                    // frame count
+
+  _stack_traces = NEW_C_HEAP_ARRAY(ThreadStackTrace*, Threads::number_of_threads());
+  int frame_serial_num = 0;
+  for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) {
+    oop threadObj = thread->threadObj();
+    if (threadObj != NULL && !thread->is_exiting() && !thread->is_hidden_from_external_view()) {
+      // dump thread stack trace
+      ThreadStackTrace* stack_trace = new ThreadStackTrace(thread, false);
+      stack_trace->dump_stack_at_safepoint(-1);
+      _stack_traces[_num_threads++] = stack_trace;
+
+      // write HPROF_FRAME records for this thread's stack trace
+      int depth = stack_trace->get_stack_depth();
+      int thread_frame_start = frame_serial_num;
+      for (int j=0; j < depth; j++) {
+        StackFrameInfo* frame = stack_trace->stack_frame_at(j);
+        methodOop m = frame->method();
+        int class_serial_num = _klass_map->find(Klass::cast(m->method_holder()));
+        // the class serial number starts from 1
+        assert(class_serial_num > 0, "class not found");
+        DumperSupport::dump_stack_frame(writer(), ++frame_serial_num, class_serial_num, m, frame->bci());
+      }
+
+      // write HPROF_TRACE record for one thread
+      DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4) + depth*oopSize);
+      int stack_serial_num = _num_threads + STACK_TRACE_ID;
+      writer()->write_u4(stack_serial_num);      // stack trace serial number
+      writer()->write_u4((u4) _num_threads);     // thread serial number
+      writer()->write_u4(depth);                 // frame count
+      for (int j=1; j <= depth; j++) {
+        writer()->write_id(thread_frame_start + j);
+      }
+    }
+  }
+}
 
 // dump the heap to given path.
 int HeapDumper::dump(const char* path) {
--- a/hotspot/src/share/vm/services/management.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/services/management.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -886,7 +886,7 @@
   int count = 0;
   for (int i = 0; i < nFlags; i++) {
     Flag* flag = &Flag::flags[i];
-    // Exclude the diagnostic flags
+    // Exclude the locked (diagnostic, experimental) flags
     if (flag->is_unlocked() || flag->is_unlocker()) {
       count++;
     }
@@ -1487,7 +1487,7 @@
   int num_entries = 0;
   for (int i = 0; i < nFlags; i++) {
     Flag* flag = &Flag::flags[i];
-    // Exclude the diagnostic flags
+    // Exclude the locked (experimental, diagnostic) flags
     if (flag->is_unlocked() || flag->is_unlocker()) {
       Handle s = java_lang_String::create_from_str(flag->name, CHECK_0);
       flags_ah->obj_at_put(num_entries, s());
@@ -1616,7 +1616,7 @@
     int num_entries = 0;
     for (int i = 0; i < nFlags && num_entries < count;  i++) {
       Flag* flag = &Flag::flags[i];
-      // Exclude the diagnostic flags
+      // Exclude the locked (diagnostic, experimental) flags
       if (flag->is_unlocked() || flag->is_unlocker()) {
         add_global_entry(env, null_h, &globals[num_entries], flag, THREAD);
         num_entries++;
--- a/hotspot/src/share/vm/services/memoryService.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/services/memoryService.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -59,9 +59,13 @@
       add_parallel_scavenge_heap_info(ParallelScavengeHeap::heap());
       break;
     }
+    case CollectedHeap::G1CollectedHeap : {
+      G1CollectedHeap::g1_unimplemented();
+      return;
+    }
 #endif // SERIALGC
     default: {
-      guarantee(false, "Not recognized kind of heap");
+      guarantee(false, "Unrecognized kind of heap");
     }
   }
 
--- a/hotspot/src/share/vm/services/threadService.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/services/threadService.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -242,6 +242,7 @@
   ThreadStackTrace(JavaThread* thread, bool with_locked_monitors);
   ~ThreadStackTrace();
 
+  JavaThread*     thread()              { return _thread; }
   StackFrameInfo* stack_frame_at(int i) { return _frames->at(i); }
   int             get_stack_depth()     { return _depth; }
 
--- a/hotspot/src/share/vm/utilities/bitMap.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/bitMap.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -26,54 +26,59 @@
 # include "incls/_bitMap.cpp.incl"
 
 
-BitMap::BitMap(idx_t* map, idx_t size_in_bits) {
+BitMap::BitMap(bm_word_t* map, idx_t size_in_bits) :
+  _map(map), _size(size_in_bits)
+{
+  assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption.");
   assert(size_in_bits >= 0, "just checking");
-  _map = map;
-  _size = size_in_bits;
 }
 
 
-BitMap::BitMap(idx_t size_in_bits) {
-  assert(size_in_bits >= 0, "just checking");
-  _size = size_in_bits;
-  _map = NEW_RESOURCE_ARRAY(idx_t, size_in_words());
+BitMap::BitMap(idx_t size_in_bits, bool in_resource_area) :
+  _map(NULL), _size(0)
+{
+  assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption.");
+  resize(size_in_bits, in_resource_area);
 }
 
 
-void BitMap::resize(idx_t size_in_bits) {
+void BitMap::verify_index(idx_t index) const {
+    assert(index < _size, "BitMap index out of bounds");
+}
+
+void BitMap::verify_range(idx_t beg_index, idx_t end_index) const {
+#ifdef ASSERT
+    assert(beg_index <= end_index, "BitMap range error");
+    // Note that [0,0) and [size,size) are both valid ranges.
+    if (end_index != _size)  verify_index(end_index);
+#endif
+}
+
+void BitMap::resize(idx_t size_in_bits, bool in_resource_area) {
   assert(size_in_bits >= 0, "just checking");
-  size_t old_size_in_words = size_in_words();
-  uintptr_t* old_map = map();
+  idx_t old_size_in_words = size_in_words();
+  bm_word_t* old_map = map();
+
   _size = size_in_bits;
-  size_t new_size_in_words = size_in_words();
-  _map = NEW_RESOURCE_ARRAY(idx_t, new_size_in_words);
-  Copy::disjoint_words((HeapWord*) old_map, (HeapWord*) _map, MIN2(old_size_in_words, new_size_in_words));
+  idx_t new_size_in_words = size_in_words();
+  if (in_resource_area) {
+    _map = NEW_RESOURCE_ARRAY(bm_word_t, new_size_in_words);
+  } else {
+    if (old_map != NULL) FREE_C_HEAP_ARRAY(bm_word_t, _map);
+    _map = NEW_C_HEAP_ARRAY(bm_word_t, new_size_in_words);
+  }
+  Copy::disjoint_words((HeapWord*)old_map, (HeapWord*) _map,
+                       MIN2(old_size_in_words, new_size_in_words));
   if (new_size_in_words > old_size_in_words) {
     clear_range_of_words(old_size_in_words, size_in_words());
   }
 }
 
-// Returns a bit mask for a range of bits [beg, end) within a single word.  Each
-// bit in the mask is 0 if the bit is in the range, 1 if not in the range.  The
-// returned mask can be used directly to clear the range, or inverted to set the
-// range.  Note:  end must not be 0.
-inline BitMap::idx_t
-BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const {
-  assert(end != 0, "does not work when end == 0");
-  assert(beg == end || word_index(beg) == word_index(end - 1),
-         "must be a single-word range");
-  idx_t mask = bit_mask(beg) - 1;       // low (right) bits
-  if (bit_in_word(end) != 0) {
-    mask |= ~(bit_mask(end) - 1);       // high (left) bits
-  }
-  return mask;
-}
-
 void BitMap::set_range_within_word(idx_t beg, idx_t end) {
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
   if (beg != end) {
-    idx_t mask = inverted_bit_mask_for_range(beg, end);
+    bm_word_t mask = inverted_bit_mask_for_range(beg, end);
     *word_addr(beg) |= ~mask;
   }
 }
@@ -82,7 +87,7 @@
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
   if (beg != end) {
-    idx_t mask = inverted_bit_mask_for_range(beg, end);
+    bm_word_t mask = inverted_bit_mask_for_range(beg, end);
     *word_addr(beg) &= mask;
   }
 }
@@ -105,20 +110,6 @@
   }
 }
 
-inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) {
-  memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t));
-}
-
-inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) {
-  memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t));
-}
-
-inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const {
-  idx_t bit_rounded_up = bit + (BitsPerWord - 1);
-  // Check for integer arithmetic overflow.
-  return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words();
-}
-
 void BitMap::set_range(idx_t beg, idx_t end) {
   verify_range(beg, end);
 
@@ -187,6 +178,64 @@
   clear_range_within_word(bit_index(end_full_word), end);
 }
 
+void BitMap::mostly_disjoint_range_union(BitMap* from_bitmap,
+                                         idx_t   from_start_index,
+                                         idx_t   to_start_index,
+                                         size_t  word_num) {
+  // Ensure that the parameters are correct.
+  // These shouldn't be that expensive to check, hence I left them as
+  // guarantees.
+  guarantee(from_bitmap->bit_in_word(from_start_index) == 0,
+            "it should be aligned on a word boundary");
+  guarantee(bit_in_word(to_start_index) == 0,
+            "it should be aligned on a word boundary");
+  guarantee(word_num >= 2, "word_num should be at least 2");
+
+  intptr_t* from = (intptr_t*) from_bitmap->word_addr(from_start_index);
+  intptr_t* to   = (intptr_t*) word_addr(to_start_index);
+
+  if (*from != 0) {
+    // if it's 0, then there's no point in doing the CAS
+    while (true) {
+      intptr_t old_value = *to;
+      intptr_t new_value = old_value | *from;
+      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
+      if (res == old_value) break;
+    }
+  }
+  ++from;
+  ++to;
+
+  for (size_t i = 0; i < word_num - 2; ++i) {
+    if (*from != 0) {
+      // if it's 0, then there's no point in doing the CAS
+      assert(*to == 0, "nobody else should be writing here");
+      intptr_t new_value = *from;
+      *to = new_value;
+    }
+
+    ++from;
+    ++to;
+  }
+
+  if (*from != 0) {
+    // if it's 0, then there's no point in doing the CAS
+    while (true) {
+      intptr_t old_value = *to;
+      intptr_t new_value = old_value | *from;
+      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
+      if (res == old_value) break;
+    }
+  }
+
+  // the -1 is because we didn't advance them after the final CAS
+  assert(from ==
+           (intptr_t*) from_bitmap->word_addr(from_start_index) + word_num - 1,
+            "invariant");
+  assert(to == (intptr_t*) word_addr(to_start_index) + word_num - 1,
+            "invariant");
+}
+
 void BitMap::at_put(idx_t offset, bool value) {
   if (value) {
     set_bit(offset);
@@ -282,11 +331,11 @@
 
 bool BitMap::contains(const BitMap other) const {
   assert(size() == other.size(), "must have same size");
-  uintptr_t* dest_map = map();
-  uintptr_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
-    uintptr_t word_union = dest_map[index] | other_map[index];
+    bm_word_t word_union = dest_map[index] | other_map[index];
     // If this has more bits set than dest_map[index], then other is not a
     // subset.
     if (word_union != dest_map[index]) return false;
@@ -296,8 +345,8 @@
 
 bool BitMap::intersects(const BitMap other) const {
   assert(size() == other.size(), "must have same size");
-  uintptr_t* dest_map = map();
-  uintptr_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     if ((dest_map[index] & other_map[index]) != 0) return true;
@@ -308,8 +357,8 @@
 
 void BitMap::set_union(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     dest_map[index] = dest_map[index] | other_map[index];
@@ -319,8 +368,8 @@
 
 void BitMap::set_difference(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     dest_map[index] = dest_map[index] & ~(other_map[index]);
@@ -330,8 +379,8 @@
 
 void BitMap::set_intersection(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     dest_map[index]  = dest_map[index] & other_map[index];
@@ -339,11 +388,26 @@
 }
 
 
+void BitMap::set_intersection_at_offset(BitMap other, idx_t offset) {
+  assert(other.size() >= offset, "offset not in range");
+  assert(other.size() - offset >= size(), "other not large enough");
+  // XXX Ideally, we would remove this restriction.
+  guarantee((offset % (sizeof(bm_word_t) * BitsPerByte)) == 0,
+            "Only handle aligned cases so far.");
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
+  idx_t offset_word_ind = word_index(offset);
+  idx_t size = size_in_words();
+  for (idx_t index = 0; index < size; index++) {
+    dest_map[index] = dest_map[index] & other_map[offset_word_ind + index];
+  }
+}
+
 bool BitMap::set_union_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     idx_t temp = map(index) | other_map[index];
@@ -357,11 +421,11 @@
 bool BitMap::set_difference_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
-    idx_t temp = dest_map[index] & ~(other_map[index]);
+    bm_word_t temp = dest_map[index] & ~(other_map[index]);
     changed = changed || (temp != dest_map[index]);
     dest_map[index] = temp;
   }
@@ -372,12 +436,12 @@
 bool BitMap::set_intersection_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
-    idx_t orig = dest_map[index];
-    idx_t temp = orig & other_map[index];
+    bm_word_t orig = dest_map[index];
+    bm_word_t temp = orig & other_map[index];
     changed = changed || (temp != orig);
     dest_map[index]  = temp;
   }
@@ -387,8 +451,8 @@
 
 void BitMap::set_from(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     dest_map[index] = other_map[index];
@@ -398,8 +462,8 @@
 
 bool BitMap::is_same(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     if (dest_map[index] != other_map[index]) return false;
@@ -408,24 +472,24 @@
 }
 
 bool BitMap::is_full() const {
-  uintptr_t* word = map();
+  bm_word_t* word = map();
   idx_t rest = size();
   for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) {
-    if (*word != (uintptr_t) AllBits) return false;
+    if (*word != (bm_word_t) AllBits) return false;
     word++;
   }
-  return rest == 0 || (*word | ~right_n_bits((int)rest)) == (uintptr_t) AllBits;
+  return rest == 0 || (*word | ~right_n_bits((int)rest)) == (bm_word_t) AllBits;
 }
 
 
 bool BitMap::is_empty() const {
-  uintptr_t* word = map();
+  bm_word_t* word = map();
   idx_t rest = size();
   for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) {
-    if (*word != (uintptr_t) NoBits) return false;
+    if (*word != (bm_word_t) NoBits) return false;
     word++;
   }
-  return rest == 0 || (*word & right_n_bits((int)rest)) == (uintptr_t) NoBits;
+  return rest == 0 || (*word & right_n_bits((int)rest)) == (bm_word_t) NoBits;
 }
 
 void BitMap::clear_large() {
@@ -436,7 +500,7 @@
 // then modifications in and to the left of the _bit_ being
 // currently sampled will not be seen. Note also that the
 // interval [leftOffset, rightOffset) is right open.
-void BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
+bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
   verify_range(leftOffset, rightOffset);
 
   idx_t startIndex = word_index(leftOffset);
@@ -445,106 +509,71 @@
        offset < rightOffset && index < endIndex;
        offset = (++index) << LogBitsPerWord) {
     idx_t rest = map(index) >> (offset & (BitsPerWord - 1));
-    for (; offset < rightOffset && rest != (uintptr_t)NoBits; offset++) {
+    for (; offset < rightOffset && rest != (bm_word_t)NoBits; offset++) {
       if (rest & 1) {
-        blk->do_bit(offset);
+        if (!blk->do_bit(offset)) return false;
         //  resample at each closure application
         // (see, for instance, CMS bug 4525989)
         rest = map(index) >> (offset & (BitsPerWord -1));
-        // XXX debugging: remove
-        // The following assertion assumes that closure application
-        // doesn't clear bits (may not be true in general, e.g. G1).
-        assert(rest & 1,
-               "incorrect shift or closure application can clear bits?");
       }
       rest = rest >> 1;
     }
   }
+  return true;
+}
+
+BitMap::idx_t* BitMap::_pop_count_table = NULL;
+
+void BitMap::init_pop_count_table() {
+  if (_pop_count_table == NULL) {
+    BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256);
+    for (uint i = 0; i < 256; i++) {
+      table[i] = num_set_bits(i);
+    }
+
+    intptr_t res = Atomic::cmpxchg_ptr((intptr_t)  table,
+                                       (intptr_t*) &_pop_count_table,
+                                       (intptr_t)  NULL_WORD);
+    if (res != NULL_WORD) {
+      guarantee( _pop_count_table == (void*) res, "invariant" );
+      FREE_C_HEAP_ARRAY(bm_word_t, table);
+    }
+  }
 }
 
-BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset,
-                                          idx_t r_offset) const {
-  assert(l_offset <= size(), "BitMap index out of bounds");
-  assert(r_offset <= size(), "BitMap index out of bounds");
-  assert(l_offset <= r_offset, "l_offset > r_offset ?");
-
-  if (l_offset == r_offset) {
-    return l_offset;
-  }
-  idx_t   index = word_index(l_offset);
-  idx_t r_index = word_index(r_offset-1) + 1;
-  idx_t res_offset = l_offset;
+BitMap::idx_t BitMap::num_set_bits(bm_word_t w) {
+  idx_t bits = 0;
 
-  // check bits including and to the _left_ of offset's position
-  idx_t pos = bit_in_word(res_offset);
-  idx_t res = map(index) >> pos;
-  if (res != (uintptr_t)NoBits) {
-    // find the position of the 1-bit
-    for (; !(res & 1); res_offset++) {
-      res = res >> 1;
+  while (w != 0) {
+    while ((w & 1) == 0) {
+      w >>= 1;
     }
-    assert(res_offset >= l_offset, "just checking");
-    return MIN2(res_offset, r_offset);
+    bits++;
+    w >>= 1;
   }
-  // skip over all word length 0-bit runs
-  for (index++; index < r_index; index++) {
-    res = map(index);
-    if (res != (uintptr_t)NoBits) {
-      // found a 1, return the offset
-      for (res_offset = index << LogBitsPerWord; !(res & 1);
-           res_offset++) {
-        res = res >> 1;
-      }
-      assert(res & 1, "tautology; see loop condition");
-      assert(res_offset >= l_offset, "just checking");
-      return MIN2(res_offset, r_offset);
-    }
-  }
-  return r_offset;
+  return bits;
 }
 
-BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset,
-                                           idx_t r_offset) const {
-  assert(l_offset <= size(), "BitMap index out of bounds");
-  assert(r_offset <= size(), "BitMap index out of bounds");
-  assert(l_offset <= r_offset, "l_offset > r_offset ?");
-
-  if (l_offset == r_offset) {
-    return l_offset;
-  }
-  idx_t   index = word_index(l_offset);
-  idx_t r_index = word_index(r_offset-1) + 1;
-  idx_t res_offset = l_offset;
-
-  // check bits including and to the _left_ of offset's position
-  idx_t pos = res_offset & (BitsPerWord - 1);
-  idx_t res = (map(index) >> pos) | left_n_bits((int)pos);
+BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) {
+  assert(_pop_count_table != NULL, "precondition");
+  return _pop_count_table[c];
+}
 
-  if (res != (uintptr_t)AllBits) {
-    // find the position of the 0-bit
-    for (; res & 1; res_offset++) {
-      res = res >> 1;
-    }
-    assert(res_offset >= l_offset, "just checking");
-    return MIN2(res_offset, r_offset);
-  }
-  // skip over all word length 1-bit runs
-  for (index++; index < r_index; index++) {
-    res = map(index);
-    if (res != (uintptr_t)AllBits) {
-      // found a 0, return the offset
-      for (res_offset = index << LogBitsPerWord; res & 1;
-           res_offset++) {
-        res = res >> 1;
-      }
-      assert(!(res & 1), "tautology; see loop condition");
-      assert(res_offset >= l_offset, "just checking");
-      return MIN2(res_offset, r_offset);
+BitMap::idx_t BitMap::count_one_bits() const {
+  init_pop_count_table(); // If necessary.
+  idx_t sum = 0;
+  typedef unsigned char uchar;
+  for (idx_t i = 0; i < size_in_words(); i++) {
+    bm_word_t w = map()[i];
+    for (size_t j = 0; j < sizeof(bm_word_t); j++) {
+      sum += num_set_bits_from_table(uchar(w & 255));
+      w >>= 8;
     }
   }
-  return r_offset;
+  return sum;
 }
 
+
 #ifndef PRODUCT
 
 void BitMap::print_on(outputStream* st) const {
@@ -558,7 +587,7 @@
 #endif
 
 
-BitMap2D::BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot)
+BitMap2D::BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot)
   : _bits_per_slot(bits_per_slot)
   , _map(map, size_in_slots * bits_per_slot)
 {
--- a/hotspot/src/share/vm/utilities/bitMap.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/bitMap.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -22,25 +22,19 @@
  *
  */
 
-// Closure for iterating over BitMaps
+// Forward decl;
+class BitMapClosure;
 
-class BitMapClosure VALUE_OBJ_CLASS_SPEC {
- public:
-  // Callback when bit in map is set
-  virtual void do_bit(size_t offset) = 0;
-};
-
-
-// Operations for bitmaps represented as arrays of unsigned 32- or 64-bit
-// integers (uintptr_t).
-//
-// Bit offsets are numbered from 0 to size-1
+// Operations for bitmaps represented as arrays of unsigned integers.
+// Bit offsets are numbered from 0 to size-1.
 
 class BitMap VALUE_OBJ_CLASS_SPEC {
   friend class BitMap2D;
 
  public:
   typedef size_t idx_t;         // Type used for bit and word indices.
+  typedef uintptr_t bm_word_t;  // Element type of array that represents
+                                // the bitmap.
 
   // Hints for range sizes.
   typedef enum {
@@ -48,8 +42,8 @@
   } RangeSizeHint;
 
  private:
-  idx_t* _map;     // First word in bitmap
-  idx_t  _size;    // Size of bitmap (in bits)
+  bm_word_t* _map;     // First word in bitmap
+  idx_t      _size;    // Size of bitmap (in bits)
 
   // Puts the given value at the given offset, using resize() to size
   // the bitmap appropriately if needed using factor-of-two expansion.
@@ -62,7 +56,7 @@
 
   // Return a mask that will select the specified bit, when applied to the word
   // containing the bit.
-  static idx_t bit_mask(idx_t bit)    { return (idx_t)1 << bit_in_word(bit); }
+  static bm_word_t bit_mask(idx_t bit) { return (bm_word_t)1 << bit_in_word(bit); }
 
   // Return the index of the word containing the specified bit.
   static idx_t word_index(idx_t bit)  { return bit >> LogBitsPerWord; }
@@ -71,66 +65,68 @@
   static idx_t bit_index(idx_t word)  { return word << LogBitsPerWord; }
 
   // Return the array of bitmap words, or a specific word from it.
-  idx_t* map() const           { return _map; }
-  idx_t  map(idx_t word) const { return _map[word]; }
+  bm_word_t* map() const           { return _map; }
+  bm_word_t  map(idx_t word) const { return _map[word]; }
 
   // Return a pointer to the word containing the specified bit.
-  idx_t* word_addr(idx_t bit) const { return map() + word_index(bit); }
+  bm_word_t* word_addr(idx_t bit) const { return map() + word_index(bit); }
 
   // Set a word to a specified value or to all ones; clear a word.
-  void set_word  (idx_t word, idx_t val) { _map[word] = val; }
+  void set_word  (idx_t word, bm_word_t val) { _map[word] = val; }
   void set_word  (idx_t word)            { set_word(word, ~(uintptr_t)0); }
   void clear_word(idx_t word)            { _map[word] = 0; }
 
   // Utilities for ranges of bits.  Ranges are half-open [beg, end).
 
   // Ranges within a single word.
-  inline idx_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const;
-  inline void  set_range_within_word      (idx_t beg, idx_t end);
-  inline void  clear_range_within_word    (idx_t beg, idx_t end);
-  inline void  par_put_range_within_word  (idx_t beg, idx_t end, bool value);
+  bm_word_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const;
+  void  set_range_within_word      (idx_t beg, idx_t end);
+  void  clear_range_within_word    (idx_t beg, idx_t end);
+  void  par_put_range_within_word  (idx_t beg, idx_t end, bool value);
 
   // Ranges spanning entire words.
-  inline void      set_range_of_words         (idx_t beg, idx_t end);
-  inline void      clear_range_of_words       (idx_t beg, idx_t end);
-  inline void      set_large_range_of_words   (idx_t beg, idx_t end);
-  inline void      clear_large_range_of_words (idx_t beg, idx_t end);
+  void      set_range_of_words         (idx_t beg, idx_t end);
+  void      clear_range_of_words       (idx_t beg, idx_t end);
+  void      set_large_range_of_words   (idx_t beg, idx_t end);
+  void      clear_large_range_of_words (idx_t beg, idx_t end);
 
   // The index of the first full word in a range.
-  inline idx_t word_index_round_up(idx_t bit) const;
+  idx_t word_index_round_up(idx_t bit) const;
 
   // Verification, statistics.
-  void verify_index(idx_t index) const {
-    assert(index < _size, "BitMap index out of bounds");
-  }
+  void verify_index(idx_t index) const;
+  void verify_range(idx_t beg_index, idx_t end_index) const;
 
-  void verify_range(idx_t beg_index, idx_t end_index) const {
-#ifdef ASSERT
-    assert(beg_index <= end_index, "BitMap range error");
-    // Note that [0,0) and [size,size) are both valid ranges.
-    if (end_index != _size)  verify_index(end_index);
-#endif
-  }
+  static idx_t* _pop_count_table;
+  static void init_pop_count_table();
+  static idx_t num_set_bits(bm_word_t w);
+  static idx_t num_set_bits_from_table(unsigned char c);
 
  public:
 
   // Constructs a bitmap with no map, and size 0.
   BitMap() : _map(NULL), _size(0) {}
 
-  // Construction
-  BitMap(idx_t* map, idx_t size_in_bits);
+  // Constructs a bitmap with the given map and size.
+  BitMap(bm_word_t* map, idx_t size_in_bits);
 
-  // Allocates necessary data structure in resource area
-  BitMap(idx_t size_in_bits);
+  // Constructs an empty bitmap of the given size (that is, this clears the
+  // new bitmap).  Allocates the map array in resource area if
+  // "in_resource_area" is true, else in the C heap.
+  BitMap(idx_t size_in_bits, bool in_resource_area = true);
 
-  void set_map(idx_t* map)          { _map = map; }
+  // Set the map and size.
+  void set_map(bm_word_t* map)      { _map = map; }
   void set_size(idx_t size_in_bits) { _size = size_in_bits; }
 
-  // Allocates necessary data structure in resource area.
+  // Allocates necessary data structure, either in the resource area
+  // or in the C heap, as indicated by "in_resource_area."
   // Preserves state currently in bit map by copying data.
   // Zeros any newly-addressable bits.
-  // Does not perform any frees (i.e., of current _map).
-  void resize(idx_t size_in_bits);
+  // If "in_resource_area" is false, frees the current map.
+  // (Note that this assumes that all calls to "resize" on the same BitMap
+  // use the same value for "in_resource_area".)
+  void resize(idx_t size_in_bits, bool in_resource_area = true);
 
   // Accessing
   idx_t size() const                    { return _size; }
@@ -157,11 +153,11 @@
 
   // Set or clear the specified bit.
   inline void set_bit(idx_t bit);
-  inline void clear_bit(idx_t bit);
+  void clear_bit(idx_t bit);
 
   // Atomically set or clear the specified bit.
-  inline bool par_set_bit(idx_t bit);
-  inline bool par_clear_bit(idx_t bit);
+  bool par_set_bit(idx_t bit);
+  bool par_clear_bit(idx_t bit);
 
   // Put the given value at the given offset. The parallel version
   // will CAS the value into the bitmap and is quite a bit slower.
@@ -183,23 +179,61 @@
   // Update a range of bits, using a hint about the size.  Currently only
   // inlines the predominant case of a 1-bit range.  Works best when hint is a
   // compile-time constant.
-  inline void set_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void clear_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void par_clear_range  (idx_t beg, idx_t end, RangeSizeHint hint);
+  void set_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void clear_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void par_clear_range  (idx_t beg, idx_t end, RangeSizeHint hint);
+
+  // It performs the union operation between subsets of equal length
+  // of two bitmaps (the target bitmap of the method and the
+  // from_bitmap) and stores the result to the target bitmap.  The
+  // from_start_index represents the first bit index of the subrange
+  // of the from_bitmap.  The to_start_index is the equivalent of the
+  // target bitmap. Both indexes should be word-aligned, i.e. they
+  // should correspond to the first bit on a bitmap word (it's up to
+  // the caller to ensure this; the method does check it).  The length
+  // of the subset is specified with word_num and it is in number of
+  // bitmap words. The caller should ensure that this is at least 2
+  // (smaller ranges are not support to save extra checks).  Again,
+  // this is checked in the method.
+  //
+  // Atomicity concerns: it is assumed that any contention on the
+  // target bitmap with other threads will happen on the first and
+  // last words; the ones in between will be "owned" exclusively by
+  // the calling thread and, in fact, they will already be 0. So, the
+  // method performs a CAS on the first word, copies the next
+  // word_num-2 words, and finally performs a CAS on the last word.
+  void mostly_disjoint_range_union(BitMap* from_bitmap,
+                                   idx_t   from_start_index,
+                                   idx_t   to_start_index,
+                                   size_t  word_num);
+
 
   // Clearing
-  void clear();
   void clear_large();
+  inline void clear();
 
-  // Iteration support
-  void iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex);
-  inline void iterate(BitMapClosure* blk) {
+  // Iteration support.  Returns "true" if the iteration completed, false
+  // if the iteration terminated early (because the closure "blk" returned
+  // false).
+  bool iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex);
+  bool iterate(BitMapClosure* blk) {
     // call the version that takes an interval
-    iterate(blk, 0, size());
+    return iterate(blk, 0, size());
   }
 
-  // Looking for 1's and 0's to the "right"
+  // Looking for 1's and 0's at indices equal to or greater than "l_index",
+  // stopping if none has been found before "r_index", and returning
+  // "r_index" (which must be at most "size") in that case.
+  idx_t get_next_one_offset_inline (idx_t l_index, idx_t r_index) const;
+  idx_t get_next_zero_offset_inline(idx_t l_index, idx_t r_index) const;
+
+  // Like "get_next_one_offset_inline", except requires that "r_index" is
+  // aligned to bitsizeof(bm_word_t).
+  idx_t get_next_one_offset_inline_aligned_right(idx_t l_index,
+                                                        idx_t r_index) const;
+
+  // Non-inline versionsof the above.
   idx_t get_next_one_offset (idx_t l_index, idx_t r_index) const;
   idx_t get_next_zero_offset(idx_t l_index, idx_t r_index) const;
 
@@ -210,12 +244,8 @@
     return get_next_zero_offset(offset, size());
   }
 
-
-
-  // Find the next one bit in the range [beg_bit, end_bit), or return end_bit if
-  // no one bit is found.  Equivalent to get_next_one_offset(), but inline for
-  // use in performance-critical code.
-  inline idx_t find_next_one_bit(idx_t beg_bit, idx_t end_bit) const;
+  // Returns the number of bits set in the bitmap.
+  idx_t count_one_bits() const;
 
   // Set operations.
   void set_union(BitMap bits);
@@ -232,6 +262,15 @@
   bool set_difference_with_result(BitMap bits);
   bool set_intersection_with_result(BitMap bits);
 
+  // Requires the submap of "bits" starting at offset to be at least as
+  // large as "this".  Modifies "this" to be the intersection of its
+  // current contents and the submap of "bits" starting at "offset" of the
+  // same length as "this."
+  // (For expedience, currently requires the offset to be aligned to the
+  // bitsize of a uintptr_t.  This should go away in the future though it
+  // will probably remain a good case to optimize.)
+  void set_intersection_at_offset(BitMap bits, idx_t offset);
+
   void set_from(BitMap bits);
 
   bool is_same(BitMap bits);
@@ -248,58 +287,13 @@
 #endif
 };
 
-inline void BitMap::set_bit(idx_t bit) {
-  verify_index(bit);
-  *word_addr(bit) |= bit_mask(bit);
-}
-
-inline void BitMap::clear_bit(idx_t bit) {
-  verify_index(bit);
-  *word_addr(bit) &= ~bit_mask(bit);
-}
-
-inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    set_bit(beg);
-  } else {
-    if (hint == large_range) {
-      set_large_range(beg, end);
-    } else {
-      set_range(beg, end);
-    }
-  }
-}
-
-inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    clear_bit(beg);
-  } else {
-    if (hint == large_range) {
-      clear_large_range(beg, end);
-    } else {
-      clear_range(beg, end);
-    }
-  }
-}
-
-inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    par_at_put(beg, true);
-  } else {
-    if (hint == large_range) {
-      par_at_put_large_range(beg, end, true);
-    } else {
-      par_at_put_range(beg, end, true);
-    }
-  }
-}
-
 
 // Convenience class wrapping BitMap which provides multiple bits per slot.
 class BitMap2D VALUE_OBJ_CLASS_SPEC {
  public:
-  typedef size_t idx_t;         // Type used for bit and word indices.
-
+  typedef BitMap::idx_t idx_t;          // Type used for bit and word indices.
+  typedef BitMap::bm_word_t bm_word_t;  // Element type of array that
+                                        // represents the bitmap.
  private:
   BitMap _map;
   idx_t  _bits_per_slot;
@@ -314,7 +308,7 @@
 
  public:
   // Construction. bits_per_slot must be greater than 0.
-  BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot);
+  BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot);
 
   // Allocates necessary data structure in resource area. bits_per_slot must be greater than 0.
   BitMap2D(idx_t size_in_slots, idx_t bits_per_slot);
@@ -359,38 +353,14 @@
     _map.at_put_grow(bit_index(slot_index, bit_within_slot_index), value);
   }
 
-  void clear() {
-    _map.clear();
-  }
+  void clear();
 };
 
-
-
-inline void BitMap::set_range_of_words(idx_t beg, idx_t end) {
-  uintptr_t* map = _map;
-  for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0;
-}
-
-
-inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) {
-  uintptr_t* map = _map;
-  for (idx_t i = beg; i < end; ++i) map[i] = 0;
-}
-
+// Closure for iterating over BitMaps
 
-inline void BitMap::clear() {
-  clear_range_of_words(0, size_in_words());
-}
-
-
-inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    par_at_put(beg, false);
-  } else {
-    if (hint == large_range) {
-      par_at_put_large_range(beg, end, false);
-    } else {
-      par_at_put_range(beg, end, false);
-    }
-  }
-}
+class BitMapClosure VALUE_OBJ_CLASS_SPEC {
+ public:
+  // Callback when bit in map is set.  Should normally return "true";
+  // return of false indicates that the bitmap iteration should terminate.
+  virtual bool do_bit(BitMap::idx_t offset) = 0;
+};
--- a/hotspot/src/share/vm/utilities/bitMap.inline.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/bitMap.inline.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -22,6 +22,17 @@
  *
  */
 
+
+inline void BitMap::set_bit(idx_t bit) {
+  verify_index(bit);
+  *word_addr(bit) |= bit_mask(bit);
+}
+
+inline void BitMap::clear_bit(idx_t bit) {
+  verify_index(bit);
+  *word_addr(bit) &= ~bit_mask(bit);
+}
+
 inline bool BitMap::par_set_bit(idx_t bit) {
   verify_index(bit);
   volatile idx_t* const addr = word_addr(bit);
@@ -64,42 +75,236 @@
   } while (true);
 }
 
-inline BitMap::idx_t
-BitMap::find_next_one_bit(idx_t beg_bit, idx_t end_bit) const
-{
-  verify_range(beg_bit, end_bit);
-  assert(bit_in_word(end_bit) == 0, "end_bit not word-aligned");
+inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    set_bit(beg);
+  } else {
+    if (hint == large_range) {
+      set_large_range(beg, end);
+    } else {
+      set_range(beg, end);
+    }
+  }
+}
+
+inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    clear_bit(beg);
+  } else {
+    if (hint == large_range) {
+      clear_large_range(beg, end);
+    } else {
+      clear_range(beg, end);
+    }
+  }
+}
+
+inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    par_at_put(beg, true);
+  } else {
+    if (hint == large_range) {
+      par_at_put_large_range(beg, end, true);
+    } else {
+      par_at_put_range(beg, end, true);
+    }
+  }
+}
 
-  if (beg_bit == end_bit) {
-    return beg_bit;
-  }
+inline void BitMap::set_range_of_words(idx_t beg, idx_t end) {
+  bm_word_t* map = _map;
+  for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0;
+}
+
+
+inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) {
+  bm_word_t* map = _map;
+  for (idx_t i = beg; i < end; ++i) map[i] = 0;
+}
+
+
+inline void BitMap::clear() {
+  clear_range_of_words(0, size_in_words());
+}
+
 
-  idx_t   index = word_index(beg_bit);
-  idx_t r_index = word_index(end_bit);
-  idx_t res_bit = beg_bit;
+inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    par_at_put(beg, false);
+  } else {
+    if (hint == large_range) {
+      par_at_put_large_range(beg, end, false);
+    } else {
+      par_at_put_range(beg, end, false);
+    }
+  }
+}
+
+inline BitMap::idx_t
+BitMap::get_next_one_offset_inline(idx_t l_offset, idx_t r_offset) const {
+  assert(l_offset <= size(), "BitMap index out of bounds");
+  assert(r_offset <= size(), "BitMap index out of bounds");
+  assert(l_offset <= r_offset, "l_offset > r_offset ?");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset-1) + 1;
+  idx_t res_offset = l_offset;
 
   // check bits including and to the _left_ of offset's position
-  idx_t res = map(index) >> bit_in_word(res_bit);
-  if (res != (uintptr_t) NoBits) {
+  idx_t pos = bit_in_word(res_offset);
+  idx_t res = map(index) >> pos;
+  if (res != (uintptr_t)NoBits) {
     // find the position of the 1-bit
-    for (; !(res & 1); res_bit++) {
+    for (; !(res & 1); res_offset++) {
       res = res >> 1;
     }
-    assert(res_bit >= beg_bit && res_bit < end_bit, "just checking");
-    return res_bit;
+    assert(res_offset >= l_offset &&
+           res_offset < r_offset, "just checking");
+    return MIN2(res_offset, r_offset);
   }
   // skip over all word length 0-bit runs
   for (index++; index < r_index; index++) {
     res = map(index);
-    if (res != (uintptr_t) NoBits) {
+    if (res != (uintptr_t)NoBits) {
       // found a 1, return the offset
-      for (res_bit = bit_index(index); !(res & 1); res_bit++) {
+      for (res_offset = bit_index(index); !(res & 1); res_offset++) {
         res = res >> 1;
       }
       assert(res & 1, "tautology; see loop condition");
-      assert(res_bit >= beg_bit && res_bit < end_bit, "just checking");
-      return res_bit;
+      assert(res_offset >= l_offset, "just checking");
+      return MIN2(res_offset, r_offset);
+    }
+  }
+  return r_offset;
+}
+
+inline BitMap::idx_t
+BitMap::get_next_zero_offset_inline(idx_t l_offset, idx_t r_offset) const {
+  assert(l_offset <= size(), "BitMap index out of bounds");
+  assert(r_offset <= size(), "BitMap index out of bounds");
+  assert(l_offset <= r_offset, "l_offset > r_offset ?");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset-1) + 1;
+  idx_t res_offset = l_offset;
+
+  // check bits including and to the _left_ of offset's position
+  idx_t pos = res_offset & (BitsPerWord - 1);
+  idx_t res = (map(index) >> pos) | left_n_bits((int)pos);
+
+  if (res != (uintptr_t)AllBits) {
+    // find the position of the 0-bit
+    for (; res & 1; res_offset++) {
+      res = res >> 1;
+    }
+    assert(res_offset >= l_offset, "just checking");
+    return MIN2(res_offset, r_offset);
+  }
+  // skip over all word length 1-bit runs
+  for (index++; index < r_index; index++) {
+    res = map(index);
+    if (res != (uintptr_t)AllBits) {
+      // found a 0, return the offset
+      for (res_offset = index << LogBitsPerWord; res & 1;
+           res_offset++) {
+        res = res >> 1;
+      }
+      assert(!(res & 1), "tautology; see loop condition");
+      assert(res_offset >= l_offset, "just checking");
+      return MIN2(res_offset, r_offset);
     }
   }
-  return end_bit;
+  return r_offset;
+}
+
+inline BitMap::idx_t
+BitMap::get_next_one_offset_inline_aligned_right(idx_t l_offset,
+                                                 idx_t r_offset) const
+{
+  verify_range(l_offset, r_offset);
+  assert(bit_in_word(r_offset) == 0, "r_offset not word-aligned");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset);
+  idx_t res_offset = l_offset;
+
+  // check bits including and to the _left_ of offset's position
+  idx_t res = map(index) >> bit_in_word(res_offset);
+  if (res != (uintptr_t)NoBits) {
+    // find the position of the 1-bit
+    for (; !(res & 1); res_offset++) {
+      res = res >> 1;
+    }
+    assert(res_offset >= l_offset &&
+           res_offset < r_offset, "just checking");
+    return res_offset;
+  }
+  // skip over all word length 0-bit runs
+  for (index++; index < r_index; index++) {
+    res = map(index);
+    if (res != (uintptr_t)NoBits) {
+      // found a 1, return the offset
+      for (res_offset = bit_index(index); !(res & 1); res_offset++) {
+        res = res >> 1;
+      }
+      assert(res & 1, "tautology; see loop condition");
+      assert(res_offset >= l_offset && res_offset < r_offset, "just checking");
+      return res_offset;
+    }
+  }
+  return r_offset;
 }
+
+
+// Returns a bit mask for a range of bits [beg, end) within a single word.  Each
+// bit in the mask is 0 if the bit is in the range, 1 if not in the range.  The
+// returned mask can be used directly to clear the range, or inverted to set the
+// range.  Note:  end must not be 0.
+inline BitMap::bm_word_t
+BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const {
+  assert(end != 0, "does not work when end == 0");
+  assert(beg == end || word_index(beg) == word_index(end - 1),
+         "must be a single-word range");
+  bm_word_t mask = bit_mask(beg) - 1;   // low (right) bits
+  if (bit_in_word(end) != 0) {
+    mask |= ~(bit_mask(end) - 1);       // high (left) bits
+  }
+  return mask;
+}
+
+inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) {
+  memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t));
+}
+
+inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) {
+  memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t));
+}
+
+inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const {
+  idx_t bit_rounded_up = bit + (BitsPerWord - 1);
+  // Check for integer arithmetic overflow.
+  return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words();
+}
+
+inline BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset,
+                                          idx_t r_offset) const {
+  return get_next_one_offset_inline(l_offset, r_offset);
+}
+
+inline BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset,
+                                           idx_t r_offset) const {
+  return get_next_zero_offset_inline(l_offset, r_offset);
+}
+
+inline void BitMap2D::clear() {
+  _map.clear();
+}
--- a/hotspot/src/share/vm/utilities/debug.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/debug.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -668,7 +668,7 @@
   oop target;
   void do_oop(oop* o) {
     if (o != NULL && *o == target) {
-      tty->print_cr("0x%08x", o);
+      tty->print_cr(INTPTR_FORMAT, o);
     }
   }
   void do_oop(narrowOop* o) { ShouldNotReachHere(); }
@@ -687,13 +687,13 @@
 
 
 static void findref(intptr_t x) {
-  GenCollectedHeap *gch = GenCollectedHeap::heap();
+  CollectedHeap *ch = Universe::heap();
   LookForRefInGenClosure lookFor;
   lookFor.target = (oop) x;
   LookForRefInObjectClosure look_in_object((oop) x);
 
   tty->print_cr("Searching heap:");
-  gch->object_iterate(&look_in_object);
+  ch->object_iterate(&look_in_object);
 
   tty->print_cr("Searching strong roots:");
   Universe::oops_do(&lookFor, false);
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -99,7 +99,7 @@
   friend class VMStructs;
  private:
   char* i;
-#ifdef ASSERT
+#ifndef PRODUCT
  public:
   char* value() { return i; }
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/intHisto.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_intHisto.cpp.incl"
+
+IntHistogram::IntHistogram(int est, int max) : _max(max), _tot(0) {
+  assert(0 <= est && est <= max, "Preconditions");
+  _elements = new (ResourceObj::C_HEAP) GrowableArray<int>(est, true);
+  guarantee(_elements != NULL, "alloc failure");
+}
+
+void IntHistogram::add_entry(int outcome) {
+  if (outcome > _max) outcome = _max;
+  int new_count = _elements->at_grow(outcome) + 1;
+  _elements->at_put(outcome, new_count);
+  _tot++;
+}
+
+int IntHistogram::entries_for_outcome(int outcome) {
+  return _elements->at_grow(outcome);
+}
+
+void IntHistogram::print_on(outputStream* st) const {
+  double tot_d = (double)_tot;
+  st->print_cr("Outcome     # of occurrences   %% of occurrences");
+  st->print_cr("-----------------------------------------------");
+  for (int i=0; i < _elements->length()-2; i++) {
+    int cnt = _elements->at(i);
+    if (cnt != 0) {
+      st->print_cr("%7d        %10d         %8.4f",
+                   i, cnt, (double)cnt/tot_d);
+    }
+  }
+  // Does it have any max entries?
+  if (_elements->length()-1 == _max) {
+    int cnt = _elements->at(_max);
+    st->print_cr(">= %4d        %10d         %8.4f",
+                 _max, cnt, (double)cnt/tot_d);
+  }
+  st->print_cr("-----------------------------------------------");
+  st->print_cr("    All        %10d         %8.4f", _tot, 1.0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/intHisto.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This class implements a simple histogram.
+
+// A histogram summarizes a series of "measurements", each of which is
+// assumed (required in this implementation) to have an outcome that is a
+// non-negative integer.  The histogram efficiently maps measurement outcomes
+// to the number of measurements had that outcome.
+
+// To print the results, invoke print() on your Histogram*.
+
+// Note: there is already an existing "Histogram" class, in file
+// histogram.{hpp,cpp}, but to my mind that's not a histogram, it's a table
+// mapping strings to counts.  To be a histogram (IMHO) it needs to map
+// numbers (in fact, integers) to number of occurrences of that number.
+
+// ysr: (i am not sure i agree with the above note.) i suspect we want to have a
+// histogram template that will map an arbitrary type (with a defined order
+// relation) to a count.
+
+
+class IntHistogram : public CHeapObj {
+ protected:
+  int _max;
+  int _tot;
+  GrowableArray<int>* _elements;
+
+public:
+  // Create a new, empty table.  "est" is an estimate of the maximum outcome
+  // that will be added, and "max" is an outcome such that all outcomes at
+  // least that large will be bundled with it.
+  IntHistogram(int est, int max);
+  // Add a measurement with the given outcome to the sequence.
+  void add_entry(int outcome);
+  // Return the number of entries recorded so far with the given outcome.
+  int  entries_for_outcome(int outcome);
+  // Return the total number of entries recorded so far.
+  int  total_entries() { return _tot; }
+  // Return the number of entries recorded so far with the given outcome as
+  // a fraction of the total number recorded so far.
+  double fraction_for_outcome(int outcome) {
+    return
+      (double)entries_for_outcome(outcome)/
+      (double)total_entries();
+  }
+  // Print the histogram on the given output stream.
+  void print_on(outputStream* st) const;
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/numberSeq.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_numberSeq.cpp.incl"
+
+AbsSeq::AbsSeq(double alpha) :
+  _num(0), _sum(0.0), _sum_of_squares(0.0),
+  _davg(0.0), _dvariance(0.0), _alpha(alpha) {
+}
+
+void AbsSeq::add(double val) {
+  if (_num == 0) {
+    // if the sequence is empty, the davg is the same as the value
+    _davg = val;
+    // and the variance is 0
+    _dvariance = 0.0;
+  } else {
+    // otherwise, calculate both
+    _davg = (1.0 - _alpha) * val + _alpha * _davg;
+    double diff = val - _davg;
+    _dvariance = (1.0 - _alpha) * diff * diff + _alpha * _dvariance;
+  }
+}
+
+double AbsSeq::avg() const {
+  if (_num == 0)
+    return 0.0;
+  else
+    return _sum / total();
+}
+
+double AbsSeq::variance() const {
+  if (_num <= 1)
+    return 0.0;
+
+  double x_bar = avg();
+  double result = _sum_of_squares / total() - x_bar * x_bar;
+  if (result < 0.0) {
+    // due to loss-of-precision errors, the variance might be negative
+    // by a small bit
+
+    //    guarantee(-0.1 < result && result < 0.0,
+    //        "if variance is negative, it should be very small");
+    result = 0.0;
+  }
+  return result;
+}
+
+double AbsSeq::sd() const {
+  double var = variance();
+  guarantee( var >= 0.0, "variance should not be negative" );
+  return sqrt(var);
+}
+
+double AbsSeq::davg() const {
+  return _davg;
+}
+
+double AbsSeq::dvariance() const {
+  if (_num <= 1)
+    return 0.0;
+
+  double result = _dvariance;
+  if (result < 0.0) {
+    // due to loss-of-precision errors, the variance might be negative
+    // by a small bit
+
+    guarantee(-0.1 < result && result < 0.0,
+               "if variance is negative, it should be very small");
+    result = 0.0;
+  }
+  return result;
+}
+
+double AbsSeq::dsd() const {
+  double var = dvariance();
+  guarantee( var >= 0.0, "variance should not be negative" );
+  return sqrt(var);
+}
+
+NumberSeq::NumberSeq(double alpha) :
+  AbsSeq(alpha), _maximum(0.0), _last(0.0) {
+}
+
+bool NumberSeq::check_nums(NumberSeq *total, int n, NumberSeq **parts) {
+  for (int i = 0; i < n; ++i) {
+    if (parts[i] != NULL && total->num() != parts[i]->num())
+      return false;
+  }
+  return true;
+}
+
+NumberSeq::NumberSeq(NumberSeq *total, int n, NumberSeq **parts) {
+  guarantee(check_nums(total, n, parts), "all seq lengths should match");
+  double sum = total->sum();
+  for (int i = 0; i < n; ++i) {
+    if (parts[i] != NULL)
+      sum -= parts[i]->sum();
+  }
+
+  _num = total->num();
+  _sum = sum;
+
+  // we do not calculate these...
+  _sum_of_squares = -1.0;
+  _maximum = -1.0;
+  _davg = -1.0;
+  _dvariance = -1.0;
+}
+
+void NumberSeq::add(double val) {
+  AbsSeq::add(val);
+
+  _last = val;
+  if (_num == 0) {
+    _maximum = val;
+  } else {
+    if (val > _maximum)
+      _maximum = val;
+  }
+  _sum += val;
+  _sum_of_squares += val * val;
+  ++_num;
+}
+
+
+TruncatedSeq::TruncatedSeq(int length, double alpha):
+  AbsSeq(alpha), _length(length), _next(0) {
+  _sequence = NEW_C_HEAP_ARRAY(double, _length);
+  for (int i = 0; i < _length; ++i)
+    _sequence[i] = 0.0;
+}
+
+void TruncatedSeq::add(double val) {
+  AbsSeq::add(val);
+
+  // get the oldest value in the sequence...
+  double old_val = _sequence[_next];
+  // ...remove it from the sum and sum of squares
+  _sum -= old_val;
+  _sum_of_squares -= old_val * old_val;
+
+  // ...and update them with the new value
+  _sum += val;
+  _sum_of_squares += val * val;
+
+  // now replace the old value with the new one
+  _sequence[_next] = val;
+  _next = (_next + 1) % _length;
+
+  // only increase it if the buffer is not full
+  if (_num < _length)
+    ++_num;
+
+  guarantee( variance() > -1.0, "variance should be >= 0" );
+}
+
+// can't easily keep track of this incrementally...
+double TruncatedSeq::maximum() const {
+  if (_num == 0)
+    return 0.0;
+  double ret = _sequence[0];
+  for (int i = 1; i < _num; ++i) {
+    double val = _sequence[i];
+    if (val > ret)
+      ret = val;
+  }
+  return ret;
+}
+
+double TruncatedSeq::last() const {
+  if (_num == 0)
+    return 0.0;
+  unsigned last_index = (_next + _length - 1) % _length;
+  return _sequence[last_index];
+}
+
+double TruncatedSeq::oldest() const {
+  if (_num == 0)
+    return 0.0;
+  else if (_num < _length)
+    // index 0 always oldest value until the array is full
+    return _sequence[0];
+  else {
+    // since the array is full, _next is over the oldest value
+    return _sequence[_next];
+  }
+}
+
+double TruncatedSeq::predict_next() const {
+  if (_num == 0)
+    return 0.0;
+
+  double num           = (double) _num;
+  double x_squared_sum = 0.0;
+  double x_sum         = 0.0;
+  double y_sum         = 0.0;
+  double xy_sum        = 0.0;
+  double x_avg         = 0.0;
+  double y_avg         = 0.0;
+
+  int first = (_next + _length - _num) % _length;
+  for (int i = 0; i < _num; ++i) {
+    double x = (double) i;
+    double y =  _sequence[(first + i) % _length];
+
+    x_squared_sum += x * x;
+    x_sum         += x;
+    y_sum         += y;
+    xy_sum        += x * y;
+  }
+  x_avg = x_sum / num;
+  y_avg = y_sum / num;
+
+  double Sxx = x_squared_sum - x_sum * x_sum / num;
+  double Sxy = xy_sum - x_sum * y_sum / num;
+  double b1 = Sxy / Sxx;
+  double b0 = y_avg - b1 * x_avg;
+
+  return b0 + b1 * num;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/numberSeq.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ **  This file contains a few classes that represent number sequence,
+ **  x1, x2, x3, ..., xN, and can calculate their avg, max, and sd.
+ **
+ **  Here's a quick description of the classes:
+ **
+ **    AbsSeq: abstract superclass
+ **    NumberSeq: the sequence is assumed to be very long and the
+ **      maximum, avg, sd, davg, and dsd are calculated over all its elements
+ **    TruncatedSeq: this class keeps track of the last L elements
+ **      of the sequence and calculates avg, max, and sd only over them
+ **/
+
+#define DEFAULT_ALPHA_VALUE 0.7
+
+class AbsSeq {
+private:
+  void init(double alpha);
+
+protected:
+  int    _num; // the number of elements in the sequence
+  double _sum; // the sum of the elements in the sequence
+  double _sum_of_squares; // the sum of squares of the elements in the sequence
+
+  double _davg; // decaying average
+  double _dvariance; // decaying variance
+  double _alpha; // factor for the decaying average / variance
+
+  // This is what we divide with to get the average. In a standard
+  // number sequence, this should just be the number of elements in it.
+  virtual double total() const { return (double) _num; };
+
+public:
+  AbsSeq(double alpha = DEFAULT_ALPHA_VALUE);
+
+  virtual void add(double val); // adds a new element to the sequence
+  void add(unsigned val) { add((double) val); }
+  virtual double maximum() const = 0; // maximum element in the sequence
+  virtual double last() const = 0; // last element added in the sequence
+
+  // the number of elements in the sequence
+  int num() const { return _num; }
+  // the sum of the elements in the sequence
+  double sum() const { return _sum; }
+
+  double avg() const; // the average of the sequence
+  double variance() const; // the variance of the sequence
+  double sd() const; // the standard deviation of the sequence
+
+  double davg() const; // decaying average
+  double dvariance() const; // decaying variance
+  double dsd() const; // decaying "standard deviation"
+};
+
+class NumberSeq: public AbsSeq {
+private:
+  bool check_nums(NumberSeq* total, int n, NumberSeq** parts);
+
+protected:
+  double _last;
+  double _maximum; // keep track of maximum value
+
+public:
+  NumberSeq(double alpha = DEFAULT_ALPHA_VALUE);
+  NumberSeq(NumberSeq* total, int n_parts, NumberSeq** parts);
+
+  virtual void add(double val);
+  virtual double maximum() const { return _maximum; }
+  virtual double last() const { return _last; }
+};
+
+class TruncatedSeq: public AbsSeq {
+private:
+  enum PrivateConstants {
+    DefaultSeqLength = 10
+  };
+  void init();
+protected:
+  double *_sequence; // buffers the last L elements in the sequence
+  int     _length; // this is L
+  int     _next;   // oldest slot in the array, i.e. next to be overwritten
+
+public:
+  // accepts a value for L
+  TruncatedSeq(int length = DefaultSeqLength,
+               double alpha = DEFAULT_ALPHA_VALUE);
+  virtual void add(double val);
+  virtual double maximum() const;
+  virtual double last() const; // the last value added to the sequence
+
+  double oldest() const; // the oldest valid value in the sequence
+  double predict_next() const; // prediction based on linear regression
+};
--- a/hotspot/src/share/vm/utilities/ostream.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/ostream.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -188,6 +188,17 @@
   print_raw(buf);
 }
 
+void outputStream::stamp(bool guard,
+                         const char* prefix,
+                         const char* suffix) {
+  if (!guard) {
+    return;
+  }
+  print_raw(prefix);
+  stamp();
+  print_raw(suffix);
+}
+
 void outputStream::date_stamp(bool guard,
                               const char* prefix,
                               const char* suffix) {
--- a/hotspot/src/share/vm/utilities/ostream.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/ostream.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -86,6 +86,10 @@
    // Time stamp
    TimeStamp& time_stamp() { return _stamp; }
    void stamp();
+   void stamp(bool guard, const char* prefix, const char* suffix);
+   void stamp(bool guard) {
+     stamp(guard, "", ": ");
+   }
    // Date stamp
    void date_stamp(bool guard, const char* prefix, const char* suffix);
    // A simplified call that includes a suffix of ": "
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -65,7 +65,8 @@
   os::sleep(Thread::current(), millis, false);
 }
 
-bool ParallelTaskTerminator::offer_termination() {
+bool
+ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
   Atomic::inc(&_offered_termination);
 
   juint yield_count = 0;
@@ -91,7 +92,8 @@
         sleep(WorkStealingSleepMillis);
       }
 
-      if (peek_in_queue_set()) {
+      if (peek_in_queue_set() ||
+          (terminator != NULL && terminator->should_exit_termination())) {
         Atomic::dec(&_offered_termination);
         return false;
       }
@@ -107,72 +109,72 @@
   }
 }
 
-bool ChunkTaskQueueWithOverflow::is_empty() {
-  return (_chunk_queue.size() == 0) &&
+bool RegionTaskQueueWithOverflow::is_empty() {
+  return (_region_queue.size() == 0) &&
          (_overflow_stack->length() == 0);
 }
 
-bool ChunkTaskQueueWithOverflow::stealable_is_empty() {
-  return _chunk_queue.size() == 0;
+bool RegionTaskQueueWithOverflow::stealable_is_empty() {
+  return _region_queue.size() == 0;
 }
 
-bool ChunkTaskQueueWithOverflow::overflow_is_empty() {
+bool RegionTaskQueueWithOverflow::overflow_is_empty() {
   return _overflow_stack->length() == 0;
 }
 
-void ChunkTaskQueueWithOverflow::initialize() {
-  _chunk_queue.initialize();
+void RegionTaskQueueWithOverflow::initialize() {
+  _region_queue.initialize();
   assert(_overflow_stack == 0, "Creating memory leak");
   _overflow_stack =
-    new (ResourceObj::C_HEAP) GrowableArray<ChunkTask>(10, true);
+    new (ResourceObj::C_HEAP) GrowableArray<RegionTask>(10, true);
 }
 
-void ChunkTaskQueueWithOverflow::save(ChunkTask t) {
-  if (TraceChunkTasksQueuing && Verbose) {
+void RegionTaskQueueWithOverflow::save(RegionTask t) {
+  if (TraceRegionTasksQueuing && Verbose) {
     gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t);
   }
-  if(!_chunk_queue.push(t)) {
+  if(!_region_queue.push(t)) {
     _overflow_stack->push(t);
   }
 }
 
-// Note that using this method will retrieve all chunks
+// Note that using this method will retrieve all regions
 // that have been saved but that it will always check
 // the overflow stack.  It may be more efficient to
 // check the stealable queue and the overflow stack
 // separately.
-bool ChunkTaskQueueWithOverflow::retrieve(ChunkTask& chunk_task) {
-  bool result = retrieve_from_overflow(chunk_task);
+bool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) {
+  bool result = retrieve_from_overflow(region_task);
   if (!result) {
-    result = retrieve_from_stealable_queue(chunk_task);
+    result = retrieve_from_stealable_queue(region_task);
   }
-  if (TraceChunkTasksQueuing && Verbose && result) {
+  if (TraceRegionTasksQueuing && Verbose && result) {
     gclog_or_tty->print_cr("  CTQ: retrieve " PTR_FORMAT, result);
   }
   return result;
 }
 
-bool ChunkTaskQueueWithOverflow::retrieve_from_stealable_queue(
-                                   ChunkTask& chunk_task) {
-  bool result = _chunk_queue.pop_local(chunk_task);
-  if (TraceChunkTasksQueuing && Verbose) {
-    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
+bool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue(
+                                   RegionTask& region_task) {
+  bool result = _region_queue.pop_local(region_task);
+  if (TraceRegionTasksQueuing && Verbose) {
+    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
   }
   return result;
 }
 
-bool ChunkTaskQueueWithOverflow::retrieve_from_overflow(
-                                        ChunkTask& chunk_task) {
+bool
+RegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) {
   bool result;
   if (!_overflow_stack->is_empty()) {
-    chunk_task = _overflow_stack->pop();
+    region_task = _overflow_stack->pop();
     result = true;
   } else {
-    chunk_task = (ChunkTask) NULL;
+    region_task = (RegionTask) NULL;
     result = false;
   }
-  if (TraceChunkTasksQueuing && Verbose) {
-    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task);
+  if (TraceRegionTasksQueuing && Verbose) {
+    gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
   }
   return result;
 }
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -120,6 +120,11 @@
     return dirty_size(_bottom, get_top());
   }
 
+  void set_empty() {
+    _bottom = 0;
+    _age = Age();
+  }
+
   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
   juint max_elems() { return n() - 2; }
@@ -155,6 +160,9 @@
   // Delete any resource associated with the queue.
   ~GenericTaskQueue();
 
+  // apply the closure to all elements in the task queue
+  void oops_do(OopClosure* f);
+
 private:
   // Element array.
   volatile E* _elems;
@@ -172,6 +180,24 @@
 }
 
 template<class E>
+void GenericTaskQueue<E>::oops_do(OopClosure* f) {
+  // tty->print_cr("START OopTaskQueue::oops_do");
+  int iters = size();
+  juint index = _bottom;
+  for (int i = 0; i < iters; ++i) {
+    index = decrement_index(index);
+    // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
+    //            index, &_elems[index], _elems[index]);
+    E* t = (E*)&_elems[index];      // cast away volatility
+    oop* p = (oop*)t;
+    assert((*t)->is_oop_or_null(), "Not an oop or null");
+    f->do_oop(p);
+  }
+  // tty->print_cr("END OopTaskQueue::oops_do");
+}
+
+
+template<class E>
 bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) {
   if (dirty_n_elems == n() - 1) {
     // Actually means 0, so do the push.
@@ -383,6 +409,12 @@
   return false;
 }
 
+// When to terminate from the termination protocol.
+class TerminatorTerminator: public CHeapObj {
+public:
+  virtual bool should_exit_termination() = 0;
+};
+
 // A class to aid in the termination of a set of parallel tasks using
 // TaskQueueSet's for work stealing.
 
@@ -407,7 +439,14 @@
   // else is.  If returns "true", all threads are terminated.  If returns
   // "false", available work has been observed in one of the task queues,
   // so the global task is not complete.
-  bool offer_termination();
+  bool offer_termination() {
+    return offer_termination(NULL);
+  }
+
+  // As above, but it also terminates of the should_exit_termination()
+  // method of the terminator parameter returns true. If terminator is
+  // NULL, then it is ignored.
+  bool offer_termination(TerminatorTerminator* terminator);
 
   // Reset the terminator, so that it may be reused again.
   // The caller is responsible for ensuring that this is done
@@ -518,32 +557,32 @@
 typedef GenericTaskQueue<StarTask>     OopStarTaskQueue;
 typedef GenericTaskQueueSet<StarTask>  OopStarTaskQueueSet;
 
-typedef size_t ChunkTask;  // index for chunk
-typedef GenericTaskQueue<ChunkTask>    ChunkTaskQueue;
-typedef GenericTaskQueueSet<ChunkTask> ChunkTaskQueueSet;
+typedef size_t RegionTask;  // index for region
+typedef GenericTaskQueue<RegionTask>    RegionTaskQueue;
+typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet;
 
-class ChunkTaskQueueWithOverflow: public CHeapObj {
+class RegionTaskQueueWithOverflow: public CHeapObj {
  protected:
-  ChunkTaskQueue              _chunk_queue;
-  GrowableArray<ChunkTask>*   _overflow_stack;
+  RegionTaskQueue              _region_queue;
+  GrowableArray<RegionTask>*   _overflow_stack;
 
  public:
-  ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {}
+  RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {}
   // Initialize both stealable queue and overflow
   void initialize();
   // Save first to stealable queue and then to overflow
-  void save(ChunkTask t);
+  void save(RegionTask t);
   // Retrieve first from overflow and then from stealable queue
-  bool retrieve(ChunkTask& chunk_index);
+  bool retrieve(RegionTask& region_index);
   // Retrieve from stealable queue
-  bool retrieve_from_stealable_queue(ChunkTask& chunk_index);
+  bool retrieve_from_stealable_queue(RegionTask& region_index);
   // Retrieve from overflow
-  bool retrieve_from_overflow(ChunkTask& chunk_index);
+  bool retrieve_from_overflow(RegionTask& region_index);
   bool is_empty();
   bool stealable_is_empty();
   bool overflow_is_empty();
-  juint stealable_size() { return _chunk_queue.size(); }
-  ChunkTaskQueue* task_queue() { return &_chunk_queue; }
+  juint stealable_size() { return _region_queue.size(); }
+  RegionTaskQueue* task_queue() { return &_region_queue; }
 };
 
-#define USE_ChunkTaskQueueWithOverflow
+#define USE_RegionTaskQueueWithOverflow
--- a/hotspot/src/share/vm/utilities/workgroup.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/workgroup.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -28,13 +28,19 @@
 // Definitions of WorkGang methods.
 
 AbstractWorkGang::AbstractWorkGang(const char* name,
-                                   bool  are_GC_threads) :
+                                   bool  are_GC_task_threads,
+                                   bool  are_ConcurrentGC_threads) :
   _name(name),
-  _are_GC_threads(are_GC_threads) {
+  _are_GC_task_threads(are_GC_task_threads),
+  _are_ConcurrentGC_threads(are_ConcurrentGC_threads) {
+
+  assert(!(are_GC_task_threads && are_ConcurrentGC_threads),
+         "They cannot both be STW GC and Concurrent threads" );
+
   // Other initialization.
   _monitor = new Monitor(/* priority */       Mutex::leaf,
                          /* name */           "WorkGroup monitor",
-                         /* allow_vm_block */ are_GC_threads);
+                         /* allow_vm_block */ are_GC_task_threads);
   assert(monitor() != NULL, "Failed to allocate monitor");
   _terminate = false;
   _task = NULL;
@@ -44,16 +50,21 @@
 }
 
 WorkGang::WorkGang(const char* name,
-                   int           workers,
-                   bool          are_GC_threads) :
-  AbstractWorkGang(name, are_GC_threads) {
+                   int         workers,
+                   bool        are_GC_task_threads,
+                   bool        are_ConcurrentGC_threads) :
+  AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads)
+{
   // Save arguments.
   _total_workers = workers;
+
   if (TraceWorkGang) {
     tty->print_cr("Constructing work gang %s with %d threads", name, workers);
   }
   _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers);
-  assert(gang_workers() != NULL, "Failed to allocate gang workers");
+  if (gang_workers() == NULL) {
+    vm_exit_out_of_memory(0, "Cannot create GangWorker array.");
+  }
   for (int worker = 0; worker < total_workers(); worker += 1) {
     GangWorker* new_worker = new GangWorker(this, worker);
     assert(new_worker != NULL, "Failed to allocate GangWorker");
@@ -285,7 +296,11 @@
 }
 
 bool GangWorker::is_GC_task_thread() const {
-  return gang()->are_GC_threads();
+  return gang()->are_GC_task_threads();
+}
+
+bool GangWorker::is_ConcurrentGC_thread() const {
+  return gang()->are_ConcurrentGC_threads();
 }
 
 void GangWorker::print_on(outputStream* st) const {
@@ -312,26 +327,43 @@
 
 WorkGangBarrierSync::WorkGangBarrierSync()
   : _monitor(Mutex::safepoint, "work gang barrier sync", true),
-    _n_workers(0), _n_completed(0) {
+    _n_workers(0), _n_completed(0), _should_reset(false) {
 }
 
 WorkGangBarrierSync::WorkGangBarrierSync(int n_workers, const char* name)
   : _monitor(Mutex::safepoint, name, true),
-    _n_workers(n_workers), _n_completed(0) {
+    _n_workers(n_workers), _n_completed(0), _should_reset(false) {
 }
 
 void WorkGangBarrierSync::set_n_workers(int n_workers) {
   _n_workers   = n_workers;
   _n_completed = 0;
+  _should_reset = false;
 }
 
 void WorkGangBarrierSync::enter() {
   MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
+  if (should_reset()) {
+    // The should_reset() was set and we are the first worker to enter
+    // the sync barrier. We will zero the n_completed() count which
+    // effectively resets the barrier.
+    zero_completed();
+    set_should_reset(false);
+  }
   inc_completed();
   if (n_completed() == n_workers()) {
+    // At this point we would like to reset the barrier to be ready in
+    // case it is used again. However, we cannot set n_completed() to
+    // 0, even after the notify_all(), given that some other workers
+    // might still be waiting for n_completed() to become ==
+    // n_workers(). So, if we set n_completed() to 0, those workers
+    // will get stuck (as they will wake up, see that n_completed() !=
+    // n_workers() and go back to sleep). Instead, we raise the
+    // should_reset() flag and the barrier will be reset the first
+    // time a worker enters it again.
+    set_should_reset(true);
     monitor()->notify_all();
-  }
-  else {
+  } else {
     while (n_completed() != n_workers()) {
       monitor()->wait(/* no_safepoint_check */ true);
     }
@@ -442,3 +474,122 @@
   }
   return false;
 }
+
+bool FreeIdSet::_stat_init = false;
+FreeIdSet* FreeIdSet::_sets[NSets];
+bool FreeIdSet::_safepoint;
+
+FreeIdSet::FreeIdSet(int sz, Monitor* mon) :
+  _sz(sz), _mon(mon), _hd(0), _waiters(0), _index(-1), _claimed(0)
+{
+  _ids = new int[sz];
+  for (int i = 0; i < sz; i++) _ids[i] = i+1;
+  _ids[sz-1] = end_of_list; // end of list.
+  if (_stat_init) {
+    for (int j = 0; j < NSets; j++) _sets[j] = NULL;
+    _stat_init = true;
+  }
+  // Add to sets.  (This should happen while the system is still single-threaded.)
+  for (int j = 0; j < NSets; j++) {
+    if (_sets[j] == NULL) {
+      _sets[j] = this;
+      _index = j;
+      break;
+    }
+  }
+  guarantee(_index != -1, "Too many FreeIdSets in use!");
+}
+
+FreeIdSet::~FreeIdSet() {
+  _sets[_index] = NULL;
+}
+
+void FreeIdSet::set_safepoint(bool b) {
+  _safepoint = b;
+  if (b) {
+    for (int j = 0; j < NSets; j++) {
+      if (_sets[j] != NULL && _sets[j]->_waiters > 0) {
+        Monitor* mon = _sets[j]->_mon;
+        mon->lock_without_safepoint_check();
+        mon->notify_all();
+        mon->unlock();
+      }
+    }
+  }
+}
+
+#define FID_STATS 0
+
+int FreeIdSet::claim_par_id() {
+#if FID_STATS
+  thread_t tslf = thr_self();
+  tty->print("claim_par_id[%d]: sz = %d, claimed = %d\n", tslf, _sz, _claimed);
+#endif
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  while (!_safepoint && _hd == end_of_list) {
+    _waiters++;
+#if FID_STATS
+    if (_waiters > 5) {
+      tty->print("claim_par_id waiting[%d]: %d waiters, %d claimed.\n",
+                 tslf, _waiters, _claimed);
+    }
+#endif
+    _mon->wait(Mutex::_no_safepoint_check_flag);
+    _waiters--;
+  }
+  if (_hd == end_of_list) {
+#if FID_STATS
+    tty->print("claim_par_id[%d]: returning EOL.\n", tslf);
+#endif
+    return -1;
+  } else {
+    int res = _hd;
+    _hd = _ids[res];
+    _ids[res] = claimed;  // For debugging.
+    _claimed++;
+#if FID_STATS
+    tty->print("claim_par_id[%d]: returning %d, claimed = %d.\n",
+               tslf, res, _claimed);
+#endif
+    return res;
+  }
+}
+
+bool FreeIdSet::claim_perm_id(int i) {
+  assert(0 <= i && i < _sz, "Out of range.");
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  int prev = end_of_list;
+  int cur = _hd;
+  while (cur != end_of_list) {
+    if (cur == i) {
+      if (prev == end_of_list) {
+        _hd = _ids[cur];
+      } else {
+        _ids[prev] = _ids[cur];
+      }
+      _ids[cur] = claimed;
+      _claimed++;
+      return true;
+    } else {
+      prev = cur;
+      cur = _ids[cur];
+    }
+  }
+  return false;
+
+}
+
+void FreeIdSet::release_par_id(int id) {
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  assert(_ids[id] == claimed, "Precondition.");
+  _ids[id] = _hd;
+  _hd = id;
+  _claimed--;
+#if FID_STATS
+  tty->print("[%d] release_par_id(%d), waiters =%d,  claimed = %d.\n",
+             thr_self(), id, _waiters, _claimed);
+#endif
+  if (_waiters > 0)
+    // Notify all would be safer, but this is OK, right?
+    _mon->notify_all();
+}
--- a/hotspot/src/share/vm/utilities/workgroup.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/workgroup.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -72,7 +72,8 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  AbstractWorkGang(const char* name, bool are_GC_threads);
+  AbstractWorkGang(const char* name, bool are_GC_task_threads,
+                   bool are_ConcurrentGC_threads);
   ~AbstractWorkGang();
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task) = 0;
@@ -83,7 +84,8 @@
   const char* name() const;
 protected:
   // Initialize only instance data.
-  const bool _are_GC_threads;
+  const bool _are_GC_task_threads;
+  const bool _are_ConcurrentGC_threads;
   // Printing support.
   const char* _name;
   // The monitor which protects these data,
@@ -130,8 +132,11 @@
   int finished_workers() const {
     return _finished_workers;
   }
-  bool are_GC_threads() const {
-    return _are_GC_threads;
+  bool are_GC_task_threads() const {
+    return _are_GC_task_threads;
+  }
+  bool are_ConcurrentGC_threads() const {
+    return _are_ConcurrentGC_threads;
   }
   // Predicates.
   bool is_idle() const {
@@ -190,7 +195,8 @@
 class WorkGang: public AbstractWorkGang {
 public:
   // Constructor
-  WorkGang(const char* name, int workers, bool are_GC_threads);
+  WorkGang(const char* name, int workers,
+           bool are_GC_task_threads, bool are_ConcurrentGC_threads);
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task);
 };
@@ -206,6 +212,7 @@
   virtual void run();
   // Predicate for Thread
   virtual bool is_GC_task_thread() const;
+  virtual bool is_ConcurrentGC_thread() const;
   // Printing
   void print_on(outputStream* st) const;
   virtual void print() const { print_on(tty); }
@@ -228,12 +235,17 @@
   Monitor _monitor;
   int     _n_workers;
   int     _n_completed;
+  bool    _should_reset;
 
-  Monitor* monitor()       { return &_monitor; }
-  int      n_workers()     { return _n_workers; }
-  int      n_completed()   { return _n_completed; }
+  Monitor* monitor()        { return &_monitor; }
+  int      n_workers()      { return _n_workers; }
+  int      n_completed()    { return _n_completed; }
+  bool     should_reset()   { return _should_reset; }
 
-  void     inc_completed() { _n_completed++; }
+  void     zero_completed() { _n_completed = 0; }
+  void     inc_completed()  { _n_completed++; }
+
+  void     set_should_reset(bool v) { _should_reset = v; }
 
 public:
   WorkGangBarrierSync();
@@ -343,3 +355,42 @@
   // cleanup if necessary.
   bool all_tasks_completed();
 };
+
+// Represents a set of free small integer ids.
+class FreeIdSet {
+  enum {
+    end_of_list = -1,
+    claimed = -2
+  };
+
+  int _sz;
+  Monitor* _mon;
+
+  int* _ids;
+  int _hd;
+  int _waiters;
+  int _claimed;
+
+  static bool _safepoint;
+  typedef FreeIdSet* FreeIdSetPtr;
+  static const int NSets = 10;
+  static FreeIdSetPtr _sets[NSets];
+  static bool _stat_init;
+  int _index;
+
+public:
+  FreeIdSet(int sz, Monitor* mon);
+  ~FreeIdSet();
+
+  static void set_safepoint(bool b);
+
+  // Attempt to claim the given id permanently.  Returns "true" iff
+  // successful.
+  bool claim_perm_id(int i);
+
+  // Returns an unclaimed parallel id (waiting for one to be released if
+  // necessary).  Returns "-1" if a GC wakes up a wait for an id.
+  int claim_par_id();
+
+  void release_par_id(int id);
+};
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp	Wed Jul 05 16:43:17 2017 +0200
@@ -31,8 +31,8 @@
 class WorkData;
 
 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
-  const char* name, int workers, bool are_GC_threads) :
-  AbstractWorkGang(name, are_GC_threads) {
+  const char* name, int workers, bool are_GC_task_threads) :
+  AbstractWorkGang(name, are_GC_task_threads, false) {
   // Save arguments.
   _total_workers = workers;
   assert(_total_workers > 0, "Must have more than 1 worker");
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp	Wed Jul 05 16:43:17 2017 +0200
@@ -143,7 +143,8 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  YieldingFlexibleWorkGang(const char* name, int workers, bool are_GC_threads);
+  YieldingFlexibleWorkGang(const char* name, int workers,
+                           bool are_GC_task_threads);
 
   YieldingFlexibleGangTask* yielding_task() const {
     assert(task() == NULL || task()->is_YieldingFlexibleGang_task(),
--- a/hotspot/test/compiler/6646019/Test.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/test/compiler/6646019/Test.java	Wed Jul 05 16:43:17 2017 +0200
@@ -19,7 +19,6 @@
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
- *
  */
 
 /*
--- a/hotspot/test/compiler/6689060/Test.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/test/compiler/6689060/Test.java	Wed Jul 05 16:43:17 2017 +0200
@@ -19,7 +19,6 @@
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
- *
  */
 
 /*
--- a/hotspot/test/compiler/6695810/Test.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/test/compiler/6695810/Test.java	Wed Jul 05 16:43:17 2017 +0200
@@ -19,7 +19,6 @@
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
- *
  */
 
 /*
--- a/hotspot/test/compiler/6700047/Test6700047.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/hotspot/test/compiler/6700047/Test6700047.java	Wed Jul 05 16:43:17 2017 +0200
@@ -29,6 +29,8 @@
  */
 
 public class Test6700047 {
+    static byte[] dummy = new byte[256];
+
     public static void main(String[] args) {
         for (int i = 0; i < 100000; i++) {
             intToLeftPaddedAsciiBytes();
@@ -53,6 +55,7 @@
         if (offset > 0) {
             for(int j = 0; j < offset; j++) {
                 result++;
+                dummy[i] = 0;
             }
         }
         return result;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6711100/Test.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6711100
+ * @summary 64bit fastdebug server vm crashes with assert(_base == Int,"Not an Int")
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test.<init> Test
+ */
+
+public class Test {
+
+    static byte b;
+
+    // The server compiler chokes on compiling
+    // this method when f() is not inlined
+    public Test() {
+        b = (new byte[1])[(new byte[f()])[-1]];
+    }
+
+    protected static int f() {
+      return 1;
+    }
+
+    public static void main(String[] args) {
+      try {
+        Test t = new Test();
+      } catch (ArrayIndexOutOfBoundsException e) {
+      }
+    }
+}
+
+
--- a/jaxp/.hgtags	Wed Jul 05 16:42:40 2017 +0200
+++ b/jaxp/.hgtags	Wed Jul 05 16:43:17 2017 +0200
@@ -12,3 +12,4 @@
 eac46d1eb7f0935ba04f1c7929ec15423fd0309e jdk7-b35
 c84ca638db42a8b6b227b4e3b63bca192c5ca634 jdk7-b36
 af49591bc486d82aa04b832257de0d18adc9af52 jdk7-b37
+e9f750f0a3a00413a7b77028b2ecdabb7129ae32 jdk7-b38
--- a/jaxws/.hgtags	Wed Jul 05 16:42:40 2017 +0200
+++ b/jaxws/.hgtags	Wed Jul 05 16:43:17 2017 +0200
@@ -12,3 +12,4 @@
 b0f01c2508b690dd225298edfec70b5e8b8dc367 jdk7-b35
 f60187f44a0d62906a5e2f6bd0989b5b24c1ca1e jdk7-b36
 a2a6f9edf761934faf59ea60d7fe7178371302cd jdk7-b37
+9ce439969184c753a9ba3caf8ed277b05230f2e5 jdk7-b38
--- a/jdk/.hgtags	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/.hgtags	Wed Jul 05 16:43:17 2017 +0200
@@ -12,3 +12,4 @@
 cf4894b78ceb966326e93bf221db0c2d14d59218 jdk7-b35
 134fd1a656ea85acd1f97f6700f75029b9b472a0 jdk7-b36
 14f50aee4989b75934d385c56a83da0c23d2f68b jdk7-b37
+cc5f810b5af8a3a83b0df5a29d9e24d7a0ff8086 jdk7-b38
--- a/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java	Wed Jul 05 16:43:17 2017 +0200
@@ -32,6 +32,7 @@
 import java.lang.reflect.GenericArrayType;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
 import java.util.Map;
 import java.util.WeakHashMap;
@@ -390,7 +391,31 @@
         if (type instanceof Class)
             return ((Class) type).getName();
         else
-            return type.toString();
+            return genericTypeString(type);
+    }
+
+    private static String genericTypeString(Type type) {
+        if (type instanceof Class<?>) {
+            Class<?> c = (Class<?>) type;
+            if (c.isArray())
+                return genericTypeString(c.getComponentType()) + "[]";
+            else
+                return c.getName();
+        } else if (type instanceof GenericArrayType) {
+            GenericArrayType gat = (GenericArrayType) type;
+            return genericTypeString(gat.getGenericComponentType()) + "[]";
+        } else if (type instanceof ParameterizedType) {
+            ParameterizedType pt = (ParameterizedType) type;
+            StringBuilder sb = new StringBuilder();
+            sb.append(genericTypeString(pt.getRawType())).append("<");
+            String sep = "";
+            for (Type t : pt.getActualTypeArguments()) {
+                sb.append(sep).append(genericTypeString(t));
+                sep = ", ";
+            }
+            return sb.append(">").toString();
+        } else
+            return "???";
     }
 
     private final PerInterfaceMap<ConvertingMethod>
--- a/jdk/src/share/classes/java/nio/channels/SelectableChannel.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/java/nio/channels/SelectableChannel.java	Wed Jul 05 16:43:17 2017 +0200
@@ -191,6 +191,9 @@
      * @throws  ClosedChannelException
      *          If this channel is closed
      *
+     * @throws  ClosedSelectorException
+     *          If the selector is closed
+     *
      * @throws  IllegalBlockingModeException
      *          If this channel is in blocking mode
      *
@@ -246,6 +249,9 @@
      * @throws  ClosedChannelException
      *          If this channel is closed
      *
+     * @throws  ClosedSelectorException
+     *          If the selector is closed
+     *
      * @throws  IllegalBlockingModeException
      *          If this channel is in blocking mode
      *
--- a/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java	Wed Jul 05 16:43:17 2017 +0200
@@ -175,6 +175,16 @@
      * the selector is invoked while holding the appropriate locks.  The
      * resulting key is added to this channel's key set before being returned.
      * </p>
+     *
+     * @throws  ClosedSelectorException {@inheritDoc}
+     *
+     * @throws  IllegalBlockingModeException {@inheritDoc}
+     *
+     * @throws  IllegalSelectorException {@inheritDoc}
+     *
+     * @throws  CancelledKeyException {@inheritDoc}
+     *
+     * @throws  IllegalArgumentException {@inheritDoc}
      */
     public final SelectionKey register(Selector sel, int ops,
                                        Object att)
--- a/jdk/src/share/classes/javax/management/event/EventClient.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/javax/management/event/EventClient.java	Wed Jul 05 16:43:17 2017 +0200
@@ -265,12 +265,20 @@
             public ScheduledThreadPoolExecutor createThreadPool(ThreadGroup group) {
                 ThreadFactory daemonThreadFactory = new DaemonThreadFactory(
                         "JMX EventClient lease renewer %d");
-                ScheduledThreadPoolExecutor exec = new ScheduledThreadPoolExecutor(
-                        20, daemonThreadFactory);
-                exec.setKeepAliveTime(1, TimeUnit.SECONDS);
-                exec.allowCoreThreadTimeOut(true);
-                exec.setRemoveOnCancelPolicy(true);
-                return exec;
+                ScheduledThreadPoolExecutor executor =
+                        new ScheduledThreadPoolExecutor(20, daemonThreadFactory);
+                executor.setKeepAliveTime(1, TimeUnit.SECONDS);
+                executor.allowCoreThreadTimeOut(true);
+                executor.setRemoveOnCancelPolicy(true);
+                // By default, a ScheduledThreadPoolExecutor will keep jobs
+                // in its queue even after they have been cancelled.  They
+                // will only be removed when their scheduled time arrives.
+                // Since the job references the LeaseRenewer which references
+                // this EventClient, this can lead to a moderately large number
+                // of objects remaining referenced until the renewal time
+                // arrives.  Hence the above call, which removes the job from
+                // the queue as soon as it is cancelled.
+                return executor;
             }
         };
         return leaseRenewerThreadPool.getThreadPoolExecutor(create);
@@ -381,7 +389,7 @@
             listenerId =
                     eventClientDelegate.addListener(clientId, name, filter);
         } catch (EventClientNotFoundException ecnfe) {
-            final IOException ioe = new IOException();
+            final IOException ioe = new IOException(ecnfe.getMessage());
             ioe.initCause(ecnfe);
             throw ioe;
         }
@@ -488,7 +496,7 @@
             listenerId =
                     eventClientDelegate.addSubscriber(clientId, name, filter);
         } catch (EventClientNotFoundException ecnfe) {
-            final IOException ioe = new IOException();
+            final IOException ioe = new IOException(ecnfe.getMessage());
             ioe.initCause(ecnfe);
             throw ioe;
         }
--- a/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java	Wed Jul 05 16:43:17 2017 +0200
@@ -91,7 +91,7 @@
      * the fetching.
      *
      * @param delegate The {@code EventClientDelegateMBean} to work with.
-     * @param executor Used to do the fetching. A new thread is created if
+     * @param fetchExecutor Used to do the fetching. A new thread is created if
      * {@code null}.
      * @throws IOException If failed to work with the {@code delegate}.
      * @throws MBeanException if unable to add a client to the remote
@@ -101,12 +101,12 @@
      * @throws IllegalArgumentException If {@code delegate} is {@code null}.
      */
     public FetchingEventRelay(EventClientDelegateMBean delegate,
-            Executor executor) throws IOException, MBeanException {
+            Executor fetchExecutor) throws IOException, MBeanException {
         this(delegate,
                 DEFAULT_BUFFER_SIZE,
                 DEFAULT_WAITING_TIMEOUT,
                 DEFAULT_MAX_NOTIFICATIONS,
-                executor);
+                fetchExecutor);
     }
 
     /**
@@ -120,7 +120,7 @@
      * @param timeout The waiting time in millseconds when fetching
      * notifications from an {@code EventClientDelegateMBean}.
      * @param maxNotifs The maximum notifications to fetch every time.
-     * @param executor Used to do the fetching. A new thread is created if
+     * @param fetchExecutor Used to do the fetching. A new thread is created if
      * {@code null}.
      * @throws IOException if failed to communicate with the {@code delegate}.
      * @throws MBeanException if unable to add a client to the remote
@@ -133,12 +133,12 @@
             int bufferSize,
             long timeout,
             int maxNotifs,
-            Executor executor) throws IOException, MBeanException {
+            Executor fetchExecutor) throws IOException, MBeanException {
         this(delegate,
                 bufferSize,
                 timeout,
                 maxNotifs,
-                executor,
+                fetchExecutor,
                 FetchingEventForwarder.class.getName(),
                 new Object[] {bufferSize},
                 new String[] {int.class.getName()});
@@ -155,7 +155,7 @@
      * @param timeout The waiting time in millseconds when fetching
      * notifications from an {@code EventClientDelegateMBean}.
      * @param maxNotifs The maximum notifications to fetch every time.
-     * @param executor Used to do the fetching.
+     * @param fetchExecutor Used to do the fetching.
      * @param forwarderName the class name of a user specific EventForwarder
      * to create in server to forward notifications to this object. The class
      * should be a subclass of the class {@link FetchingEventForwarder}.
@@ -174,7 +174,7 @@
             int bufferSize,
             long timeout,
             int maxNotifs,
-            Executor executor,
+            Executor fetchExecutor,
             String forwarderName,
             Object[] params,
             String[] sig) throws IOException, MBeanException {
@@ -184,11 +184,11 @@
                     bufferSize+" "+
                     timeout+" "+
                     maxNotifs+" "+
-                    executor+" "+
+                    fetchExecutor+" "+
                     forwarderName+" ");
         }
 
-        if(delegate == null) {
+        if (delegate == null) {
             throw new NullPointerException("Null EventClientDelegateMBean!");
         }
 
@@ -212,16 +212,16 @@
         this.timeout = timeout;
         this.maxNotifs = maxNotifs;
 
-        if (executor == null) {
-            ScheduledThreadPoolExecutor stpe = new ScheduledThreadPoolExecutor(1,
-                    daemonThreadFactory);
-            stpe.setKeepAliveTime(1, TimeUnit.SECONDS);
-            stpe.allowCoreThreadTimeOut(true);
-            executor = stpe;
-            this.defaultExecutor = stpe;
+        if (fetchExecutor == null) {
+            ScheduledThreadPoolExecutor executor =
+                    new ScheduledThreadPoolExecutor(1, daemonThreadFactory);
+            executor.setKeepAliveTime(1, TimeUnit.SECONDS);
+            executor.allowCoreThreadTimeOut(true);
+            fetchExecutor = executor;
+            this.defaultExecutor = executor;
         } else
             this.defaultExecutor = null;
-        this.executor = executor;
+        this.fetchExecutor = fetchExecutor;
 
         startSequenceNumber = 0;
         fetchingJob = new MyJob();
@@ -258,7 +258,7 @@
 
     private class MyJob extends RepeatedSingletonJob {
         public MyJob() {
-            super(executor);
+            super(fetchExecutor);
         }
 
         public boolean isSuspended() {
@@ -368,7 +368,7 @@
     private String clientId;
     private boolean stopped = false;
 
-    private final Executor executor;
+    private final Executor fetchExecutor;
     private final ExecutorService defaultExecutor;
     private final MyJob fetchingJob;
 
--- a/jdk/src/share/classes/javax/management/monitor/Monitor.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/javax/management/monitor/Monitor.java	Wed Jul 05 16:43:17 2017 +0200
@@ -181,7 +181,7 @@
     /**
      * Executor Service.
      */
-    private static final ExecutorService executor;
+    private static final ThreadPoolExecutor executor;
     static {
         final String maximumPoolSizeSysProp = "jmx.x.monitor.maximum.pool.size";
         final String maximumPoolSizeStr = AccessController.doPrivileged(
@@ -218,7 +218,7 @@
                 TimeUnit.SECONDS,
                 new LinkedBlockingQueue<Runnable>(),
                 new DaemonThreadFactory("Executor"));
-        ((ThreadPoolExecutor)executor).allowCoreThreadTimeOut(true);
+        executor.allowCoreThreadTimeOut(true);
     }
 
     /**
--- a/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java	Wed Jul 05 16:43:17 2017 +0200
@@ -71,9 +71,8 @@
 import java.util.Properties;
 import java.util.Set;
 import java.util.WeakHashMap;
-import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Executor;
-import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@@ -421,12 +420,12 @@
             public ThreadPoolExecutor createThreadPool(ThreadGroup group) {
                 ThreadFactory daemonThreadFactory = new DaemonThreadFactory(
                         "JMX RMIConnector listener dispatch %d");
-                ThreadPoolExecutor exec = new ThreadPoolExecutor(
+                ThreadPoolExecutor executor = new ThreadPoolExecutor(
                         1, 10, 1, TimeUnit.SECONDS,
-                        new LinkedBlockingDeque<Runnable>(),
+                        new LinkedBlockingQueue<Runnable>(),
                         daemonThreadFactory);
-                exec.allowCoreThreadTimeOut(true);
-                return exec;
+                executor.allowCoreThreadTimeOut(true);
+                return executor;
             }
         };
         return listenerDispatchThreadPool.getThreadPoolExecutor(create);
@@ -1503,7 +1502,7 @@
             super(period);
         }
 
-        public void gotIOException (IOException ioe) throws IOException {
+        public void gotIOException(IOException ioe) throws IOException {
             if (ioe instanceof NoSuchObjectException) {
                 // need to restart
                 super.gotIOException(ioe);
--- a/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java	Wed Jul 05 16:43:17 2017 +0200
@@ -80,7 +80,7 @@
 import static sun.management.AgentConfigurationError.*;
 import sun.management.ConnectorAddressLink;
 import sun.management.FileSystem;
-import sun.management.snmp.util.MibLogger;
+import com.sun.jmx.remote.util.ClassLogger;
 
 import com.sun.jmx.remote.internal.RMIExporter;
 import com.sun.jmx.remote.security.JMXPluggableAuthenticator;
@@ -99,6 +99,7 @@
         public static final String PORT = "0";
         public static final String CONFIG_FILE_NAME = "management.properties";
         public static final String USE_SSL = "true";
+        public static final String USE_LOCAL_ONLY = "true";
         public static final String USE_REGISTRY_SSL = "false";
         public static final String USE_AUTHENTICATION = "true";
         public static final String PASSWORD_FILE_NAME = "jmxremote.password";
@@ -115,6 +116,8 @@
                 "com.sun.management.jmxremote.port";
         public static final String CONFIG_FILE_NAME =
                 "com.sun.management.config.file";
+        public static final String USE_LOCAL_ONLY =
+                "com.sun.management.jmxremote.local.only";
         public static final String USE_SSL =
                 "com.sun.management.jmxremote.ssl";
         public static final String USE_REGISTRY_SSL =
@@ -384,7 +387,7 @@
             checkAccessFile(accessFileName);
         }
 
-        if (log.isDebugOn()) {
+        if (log.debugOn()) {
             log.debug("initialize",
                     Agent.getText("jmxremote.ConnectorBootstrap.initialize") +
                     "\n\t" + PropertyNames.PORT + "=" + port +
@@ -477,6 +480,18 @@
         MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
         try {
             JMXServiceURL url = new JMXServiceURL("rmi", localhost, 0);
+            // Do we accept connections from local interfaces only?
+            Properties props = Agent.getManagementProperties();
+            if (props ==  null) {
+                props = new Properties();
+            }
+            String useLocalOnlyStr = props.getProperty(
+                    PropertyNames.USE_LOCAL_ONLY, DefaultValues.USE_LOCAL_ONLY);
+            boolean useLocalOnly = Boolean.valueOf(useLocalOnlyStr).booleanValue();
+            if (useLocalOnly) {
+                env.put(RMIConnectorServer.RMI_SERVER_SOCKET_FACTORY_ATTRIBUTE,
+                        new LocalRMIServerSocketFactory());
+            }
             JMXConnectorServer server =
                     JMXConnectorServerFactory.newJMXConnectorServer(url, env, mbs);
             server.start();
@@ -764,7 +779,7 @@
     private ConnectorBootstrap() {
     }
 
-    // XXX Revisit: should probably clone this MibLogger....
-    private static final MibLogger log =
-            new MibLogger(ConnectorBootstrap.class);
+    private static final ClassLogger log =
+        new ClassLogger(ConnectorBootstrap.class.getPackage().getName(),
+                        "ConnectorBootstrap");
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/management/jmxremote/LocalRMIServerSocketFactory.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.management.jmxremote;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.NetworkInterface;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.net.SocketException;
+import java.rmi.server.RMIServerSocketFactory;
+import java.util.Enumeration;
+
+/**
+ * This RMI server socket factory creates server sockets that
+ * will only accept connection requests from clients running
+ * on the host where the RMI remote objects have been exported.
+ */
+public final class LocalRMIServerSocketFactory implements RMIServerSocketFactory {
+    /**
+     * Creates a server socket that only accepts connection requests from
+     * clients running on the host where the RMI remote objects have been
+     * exported.
+     */
+    public ServerSocket createServerSocket(int port) throws IOException {
+        return new ServerSocket(port) {
+            @Override
+            public Socket accept() throws IOException {
+                Socket socket = super.accept();
+                InetAddress remoteAddr = socket.getInetAddress();
+                final String msg = "The server sockets created using the " +
+                        "LocalRMIServerSocketFactory only accept connections " +
+                        "from clients running on the host where the RMI " +
+                        "remote objects have been exported.";
+                if (remoteAddr.isAnyLocalAddress()) {
+                    // local address: accept the connection.
+                    return socket;
+                }
+                // Retrieve all the network interfaces on this host.
+                Enumeration<NetworkInterface> nis;
+                try {
+                    nis = NetworkInterface.getNetworkInterfaces();
+                } catch (SocketException e) {
+                    try {
+                        socket.close();
+                    } catch (IOException ioe) {
+                        // Ignore...
+                    }
+                    throw new IOException(msg, e);
+                }
+                // Walk through the network interfaces to see
+                // if any of them matches the client's address.
+                // If true, then the client's address is local.
+                while (nis.hasMoreElements()) {
+                    NetworkInterface ni = nis.nextElement();
+                    Enumeration<InetAddress> addrs = ni.getInetAddresses();
+                    while (addrs.hasMoreElements()) {
+                        InetAddress localAddr = addrs.nextElement();
+                        if (localAddr.equals(remoteAddr)) {
+                            return socket;
+                        }
+                    }
+                }
+                // The client's address is remote so refuse the connection.
+                try {
+                    socket.close();
+                } catch (IOException ioe) {
+                    // Ignore...
+                }
+                throw new IOException(msg);
+            }
+        };
+    }
+
+    /**
+     * Two LocalRMIServerSocketFactory objects
+     * are equal if they are of the same type.
+     */
+    @Override
+    public boolean equals(Object obj) {
+        return (obj instanceof LocalRMIServerSocketFactory);
+    }
+
+    /**
+     * Returns a hash code value for this LocalRMIServerSocketFactory.
+     */
+    @Override
+    public int hashCode() {
+        return getClass().hashCode();
+    }
+}
--- a/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -58,6 +58,9 @@
     // True if this Selector has been closed
     private boolean closed = false;
 
+    // Lock for close and cleanup
+    private Object closeLock = new Object();
+
     AbstractPollSelectorImpl(SelectorProvider sp, int channels, int offset) {
         super(sp);
         this.totalChannels = channels;
@@ -65,7 +68,11 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
-        pollWrapper.putEventOps(sk.getIndex(), ops);
+        synchronized (closeLock) {
+            if (closed)
+                throw new ClosedSelectorException();
+            pollWrapper.putEventOps(sk.getIndex(), ops);
+        }
     }
 
     public Selector wakeup() {
@@ -76,7 +83,9 @@
     protected abstract int doSelect(long timeout) throws IOException;
 
     protected void implClose() throws IOException {
-        if (!closed) {
+        synchronized (closeLock) {
+            if (closed)
+                return;
             closed = true;
             // Deregister channels
             for(int i=channelOffset; i<totalChannels; i++) {
@@ -129,23 +138,28 @@
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
-        // Check to see if the array is large enough
-        if (channelArray.length == totalChannels) {
-            // Make a larger array
-            int newSize = pollWrapper.totalChannels * 2;
-            SelectionKeyImpl temp[] = new SelectionKeyImpl[newSize];
-            // Copy over
-            for (int i=channelOffset; i<totalChannels; i++)
-                temp[i] = channelArray[i];
-            channelArray = temp;
-            // Grow the NativeObject poll array
-            pollWrapper.grow(newSize);
+        synchronized (closeLock) {
+            if (closed)
+                throw new ClosedSelectorException();
+
+            // Check to see if the array is large enough
+            if (channelArray.length == totalChannels) {
+                // Make a larger array
+                int newSize = pollWrapper.totalChannels * 2;
+                SelectionKeyImpl temp[] = new SelectionKeyImpl[newSize];
+                // Copy over
+                for (int i=channelOffset; i<totalChannels; i++)
+                    temp[i] = channelArray[i];
+                channelArray = temp;
+                // Grow the NativeObject poll array
+                pollWrapper.grow(newSize);
+            }
+            channelArray[totalChannels] = ski;
+            ski.setIndex(totalChannels);
+            pollWrapper.addEntry(ski.channel);
+            totalChannels++;
+            keys.add(ski);
         }
-        channelArray[totalChannels] = ski;
-        ski.setIndex(totalChannels);
-        pollWrapper.addEntry(ski.channel);
-        totalChannels++;
-        keys.add(ski);
     }
 
     protected void implDereg(SelectionKeyImpl ski) throws IOException {
--- a/jdk/src/share/classes/sun/security/provider/certpath/BasicChecker.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/classes/sun/security/provider/certpath/BasicChecker.java	Wed Jul 05 16:43:17 2017 +0200
@@ -162,7 +162,7 @@
             throw new CertPathValidatorException
                 (msg + " check failed", e, null, -1,
                  BasicReason.INVALID_SIGNATURE);
-        } catch (GeneralSecurityException e) {
+        } catch (Exception e) {
             throw new CertPathValidatorException(msg + " check failed", e);
         }
 
--- a/jdk/src/share/lib/management/management.properties	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/share/lib/management/management.properties	Wed Jul 05 16:43:17 2017 +0200
@@ -82,7 +82,7 @@
 #
 # com.sun.management.snmp.interface=<InetAddress>
 #      Specifies the local interface on which the SNMP agent will bind.
-#      This is usefull when running on machines which have several
+#      This is useful when running on machines which have several
 #      interfaces defined. It makes it possible to listen to a specific
 #      subnet accessible through that interface.
 #      Default for this property is "localhost".
@@ -144,6 +144,26 @@
 #
 
 #
+# ########## RMI connector settings for local management ##########
+#
+# com.sun.management.jmxremote.local.only=true|false
+#      Default for this property is true. (Case for true/false ignored)
+#      If this property is specified as true then the local JMX RMI connector
+#      server will only accept connection requests from clients running on
+#      the host where the out-of-the-box JMX management agent is running.
+#      In order to ensure backwards compatibility this property could be
+#      set to false. However, deploying the local management agent in this
+#      way is discouraged because the local JMX RMI connector server will
+#      accept connection requests from any client either local or remote.
+#      For remote management the remote JMX RMI connector server should
+#      be used instead with authentication and SSL/TLS encryption enabled.
+#
+
+# For allowing the local management agent accept local
+# and remote connection requests use the following line
+# com.sun.management.jmxremote.local.only=false
+
+#
 # ###################### RMI SSL #############################
 #
 # com.sun.management.jmxremote.ssl=true|false
--- a/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -46,15 +46,15 @@
     // The poll object
     DevPollArrayWrapper pollWrapper;
 
-    // The number of valid channels in this Selector's poll array
-    private int totalChannels;
-
     // Maps from file descriptors to keys
     private Map<Integer,SelectionKeyImpl> fdToKey;
 
     // True if this Selector has been closed
     private boolean closed = false;
 
+    // Lock for close/cleanup
+    private Object closeLock = new Object();
+
     // Lock for interrupt triggering and clearing
     private Object interruptLock = new Object();
     private boolean interruptTriggered = false;
@@ -72,7 +72,6 @@
         pollWrapper = new DevPollArrayWrapper();
         pollWrapper.initInterrupt(fd0, fd1);
         fdToKey = new HashMap<Integer,SelectionKeyImpl>();
-        totalChannels = 1;
     }
 
     protected int doSelect(long timeout)
@@ -131,45 +130,39 @@
     }
 
     protected void implClose() throws IOException {
-        if (!closed) {
-            closed = true;
-
-            // prevent further wakeup
-            synchronized (interruptLock) {
-                interruptTriggered = true;
-            }
+        if (closed)
+            return;
+        closed = true;
 
-            FileDispatcher.closeIntFD(fd0);
-            FileDispatcher.closeIntFD(fd1);
-            if (pollWrapper != null) {
+        // prevent further wakeup
+        synchronized (interruptLock) {
+            interruptTriggered = true;
+        }
 
-                pollWrapper.release(fd0);
-                pollWrapper.closeDevPollFD();
-                pollWrapper = null;
-                selectedKeys = null;
+        FileDispatcher.closeIntFD(fd0);
+        FileDispatcher.closeIntFD(fd1);
 
-                // Deregister channels
-                Iterator i = keys.iterator();
-                while (i.hasNext()) {
-                    SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
-                    deregister(ski);
-                    SelectableChannel selch = ski.channel();
-                    if (!selch.isOpen() && !selch.isRegistered())
-                        ((SelChImpl)selch).kill();
-                    i.remove();
-                }
-                totalChannels = 0;
+        pollWrapper.release(fd0);
+        pollWrapper.closeDevPollFD();
+        selectedKeys = null;
 
-            }
-            fd0 = -1;
-            fd1 = -1;
+        // Deregister channels
+        Iterator i = keys.iterator();
+        while (i.hasNext()) {
+            SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
+            deregister(ski);
+            SelectableChannel selch = ski.channel();
+            if (!selch.isOpen() && !selch.isRegistered())
+                ((SelChImpl)selch).kill();
+            i.remove();
         }
+        fd0 = -1;
+        fd1 = -1;
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
         int fd = IOUtil.fdVal(ski.channel.getFD());
         fdToKey.put(Integer.valueOf(fd), ski);
-        totalChannels++;
         keys.add(ski);
     }
 
@@ -179,7 +172,6 @@
         int fd = ski.channel.getFDVal();
         fdToKey.remove(Integer.valueOf(fd));
         pollWrapper.release(fd);
-        totalChannels--;
         ski.setIndex(-1);
         keys.remove(ski);
         selectedKeys.remove(ski);
@@ -190,6 +182,8 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
+        if (closed)
+            throw new ClosedSelectorException();
         int fd = IOUtil.fdVal(sk.channel.getFD());
         pollWrapper.setInterest(fd, ops);
     }
--- a/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2005-2007 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,7 +31,6 @@
 import java.util.*;
 import sun.misc.*;
 
-
 /**
  * An implementation of Selector for Linux 2.6+ kernels that uses
  * the epoll event notification facility.
@@ -51,7 +50,7 @@
     private Map<Integer,SelectionKeyImpl> fdToKey;
 
     // True if this Selector has been closed
-    private boolean closed = false;
+    private volatile boolean closed = false;
 
     // Lock for interrupt triggering and clearing
     private Object interruptLock = new Object();
@@ -128,40 +127,41 @@
     }
 
     protected void implClose() throws IOException {
-        if (!closed) {
-            closed = true;
+        if (closed)
+            return;
+        closed = true;
 
-            // prevent further wakeup
-            synchronized (interruptLock) {
-                interruptTriggered = true;
-            }
+        // prevent further wakeup
+        synchronized (interruptLock) {
+            interruptTriggered = true;
+        }
 
-            FileDispatcher.closeIntFD(fd0);
-            FileDispatcher.closeIntFD(fd1);
-            if (pollWrapper != null) {
+        FileDispatcher.closeIntFD(fd0);
+        FileDispatcher.closeIntFD(fd1);
 
-                pollWrapper.release(fd0);
-                pollWrapper.closeEPollFD();
-                pollWrapper = null;
-                selectedKeys = null;
+        pollWrapper.release(fd0);
+        pollWrapper.closeEPollFD();
+        // it is possible
+        selectedKeys = null;
 
-                // Deregister channels
-                Iterator i = keys.iterator();
-                while (i.hasNext()) {
-                    SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
-                    deregister(ski);
-                    SelectableChannel selch = ski.channel();
-                    if (!selch.isOpen() && !selch.isRegistered())
-                        ((SelChImpl)selch).kill();
-                    i.remove();
-                }
-            }
-            fd0 = -1;
-            fd1 = -1;
+        // Deregister channels
+        Iterator i = keys.iterator();
+        while (i.hasNext()) {
+            SelectionKeyImpl ski = (SelectionKeyImpl)i.next();
+            deregister(ski);
+            SelectableChannel selch = ski.channel();
+            if (!selch.isOpen() && !selch.isRegistered())
+                ((SelChImpl)selch).kill();
+            i.remove();
         }
+
+        fd0 = -1;
+        fd1 = -1;
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
+        if (closed)
+            throw new ClosedSelectorException();
         int fd = IOUtil.fdVal(ski.channel.getFD());
         fdToKey.put(Integer.valueOf(fd), ski);
         pollWrapper.add(fd);
@@ -183,6 +183,8 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
+        if (closed)
+            throw new ClosedSelectorException();
         int fd = IOUtil.fdVal(sk.channel.getFD());
         pollWrapper.setInterest(fd, ops);
     }
--- a/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright 2002-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2002-2007 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,6 +80,9 @@
     // File descriptors corresponding to source and sink
     private final int wakeupSourceFd, wakeupSinkFd;
 
+    // Lock for close cleanup
+    private Object closeLock = new Object();
+
     // Maps file descriptors to their indices in  pollArray
     private final static class FdMap extends HashMap<Integer, MapEntry> {
         static final long serialVersionUID = 0L;
@@ -473,42 +476,48 @@
     }
 
     protected void implClose() throws IOException {
-        if (channelArray != null) {
-            if (pollWrapper != null) {
-                // prevent further wakeup
-                synchronized (interruptLock) {
-                    interruptTriggered = true;
-                }
-                wakeupPipe.sink().close();
-                wakeupPipe.source().close();
-                for(int i = 1; i < totalChannels; i++) { // Deregister channels
-                    if (i % MAX_SELECTABLE_FDS != 0) { // skip wakeupEvent
-                        deregister(channelArray[i]);
-                        SelectableChannel selch = channelArray[i].channel();
-                        if (!selch.isOpen() && !selch.isRegistered())
-                            ((SelChImpl)selch).kill();
+        synchronized (closeLock) {
+            if (channelArray != null) {
+                if (pollWrapper != null) {
+                    // prevent further wakeup
+                    synchronized (interruptLock) {
+                        interruptTriggered = true;
                     }
-                }
-                pollWrapper.free();
-                pollWrapper = null;
-                selectedKeys = null;
-                channelArray = null;
-                threads.clear();
-                // Call startThreads. All remaining helper threads now exit,
-                // since threads.size() = 0;
-                startLock.startThreads();
+                    wakeupPipe.sink().close();
+                    wakeupPipe.source().close();
+                    for(int i = 1; i < totalChannels; i++) { // Deregister channels
+                        if (i % MAX_SELECTABLE_FDS != 0) { // skip wakeupEvent
+                            deregister(channelArray[i]);
+                            SelectableChannel selch = channelArray[i].channel();
+                            if (!selch.isOpen() && !selch.isRegistered())
+                                ((SelChImpl)selch).kill();
+                        }
+                    }
+                    pollWrapper.free();
+                    pollWrapper = null;
+                     selectedKeys = null;
+                     channelArray = null;
+                     threads.clear();
+                     // Call startThreads. All remaining helper threads now exit,
+                     // since threads.size() = 0;
+                     startLock.startThreads();
+                 }
             }
         }
     }
 
     protected void implRegister(SelectionKeyImpl ski) {
-        growIfNeeded();
-        channelArray[totalChannels] = ski;
-        ski.setIndex(totalChannels);
-        fdMap.put(ski);
-        keys.add(ski);
-        pollWrapper.addEntry(totalChannels, ski);
-        totalChannels++;
+        synchronized (closeLock) {
+            if (pollWrapper == null)
+                throw new ClosedSelectorException();
+            growIfNeeded();
+            channelArray[totalChannels] = ski;
+            ski.setIndex(totalChannels);
+            fdMap.put(ski);
+            keys.add(ski);
+            pollWrapper.addEntry(totalChannels, ski);
+            totalChannels++;
+        }
     }
 
     private void growIfNeeded() {
@@ -554,7 +563,11 @@
     }
 
     void putEventOps(SelectionKeyImpl sk, int ops) {
-        pollWrapper.putEventOps(sk.getIndex(), ops);
+        synchronized (closeLock) {
+            if (pollWrapper == null)
+                throw new ClosedSelectorException();
+            pollWrapper.putEventOps(sk.getIndex(), ops);
+        }
     }
 
     public Selector wakeup() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/nio/channels/Selector/CloseThenRegister.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/* @test
+ * @bug 5025260
+ * @summary ClosedSelectorException is expected when register after close
+ */
+
+import java.net.*;
+import java.nio.channels.*;
+
+public class CloseThenRegister {
+
+    public static void main (String [] args) throws Exception {
+        try {
+            Selector s = Selector.open();
+            s.close();
+            ServerSocketChannel c = ServerSocketChannel.open();
+            c.socket().bind(new InetSocketAddress(40000));
+            c.configureBlocking(false);
+            c.register(s, SelectionKey.OP_ACCEPT);
+        } catch (ClosedSelectorException cse) {
+            return;
+        }
+        throw new RuntimeException("register after close does not cause CSE!");
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/javax/management/mxbean/TypeNameTest.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6757225
+ * @summary Test that type names in MXBeans match their spec.
+ * @author Eamonn McManus
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.util.List;
+import java.util.Map;
+import javax.management.MBeanAttributeInfo;
+import javax.management.MBeanInfo;
+import javax.management.MBeanServer;
+import javax.management.MBeanServerFactory;
+import javax.management.ObjectName;
+import javax.management.StandardMBean;
+
+public class TypeNameTest {
+    public static interface TestMXBean {
+        public int getInt();
+        public String IntName = "int";
+
+        public Map<String, Integer> getMapSI();
+        public String MapSIName = "java.util.Map<java.lang.String, java.lang.Integer>";
+
+        public Map<String, int[]> getMapSInts();
+        public String MapSIntsName = "java.util.Map<java.lang.String, int[]>";
+
+        public List<List<int[]>> getListListInts();
+        public String ListListIntsName = "java.util.List<java.util.List<int[]>>";
+    }
+
+    private static InvocationHandler nullIH = new InvocationHandler() {
+        public Object invoke(Object proxy, Method method, Object[] args)
+                throws Throwable {
+            return null;
+        }
+    };
+
+    static String failure;
+
+    public static void main(String[] args) throws Exception {
+        TestMXBean testImpl = (TestMXBean) Proxy.newProxyInstance(
+                TestMXBean.class.getClassLoader(), new Class<?>[] {TestMXBean.class}, nullIH);
+        Object mxbean = new StandardMBean(testImpl, TestMXBean.class, true);
+        MBeanServer mbs = MBeanServerFactory.newMBeanServer();
+        ObjectName name = new ObjectName("a:b=c");
+        mbs.registerMBean(mxbean, name);
+        MBeanInfo mbi = mbs.getMBeanInfo(name);
+        MBeanAttributeInfo[] mbais = mbi.getAttributes();
+        for (MBeanAttributeInfo mbai : mbais) {
+            String attrName = mbai.getName();
+            String attrTypeName = (String) mbai.getDescriptor().getFieldValue("originalType");
+            String fieldName = attrName + "Name";
+            Field nameField = TestMXBean.class.getField(fieldName);
+            String expectedTypeName = (String) nameField.get(null);
+            if (expectedTypeName.equals(attrTypeName)) {
+                System.out.println("OK: " + attrName + ": " + attrTypeName);
+            } else {
+                failure = "For attribute " + attrName + " expected type name \"" +
+                        expectedTypeName + "\", found type name \"" + attrTypeName +
+                        "\"";
+                System.out.println("FAIL: " + failure);
+            }
+        }
+        if (failure == null)
+            System.out.println("TEST PASSED");
+        else
+            throw new Exception("TEST FAILED: " + failure);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/Action.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * Action used in Context.doAs
+ */
+public interface Action {
+    /**
+     * This method always reads a byte block and emits another one
+     */
+    byte[] run(Context s, byte[] input) throws Exception;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/BasicKrb5Test.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6706974
+ * @summary Add krb5 test infrastructure
+ */
+
+import org.ietf.jgss.GSSName;
+import sun.security.jgss.GSSUtil;
+import sun.security.krb5.Config;
+import sun.security.krb5.internal.crypto.EType;
+
+/**
+ * Basic JGSS/krb5 test with 3 parties: client, server, backend server. Each
+ * party uses JAAS login to get subjects and executes JGSS calls using
+ * Subject.doAs.
+ */
+public class BasicKrb5Test {
+
+    /**
+     * @param args empty or etype
+     */
+    public static void main(String[] args)
+            throws Exception {
+
+        String etype = null;
+        if (args.length > 0) {
+            etype = args[0];
+        }
+
+        // Creates and starts the KDC. This line must be put ahead of etype check
+        // since the check needs a krb5.conf.
+        new OneKDC(etype).writeJAASConf();
+
+        System.out.println("Testing etype " + etype);
+        if (etype != null && !EType.isSupported(Config.getInstance().getType(etype))) {
+            System.out.println("Not supported.");
+            System.exit(0);
+        }
+
+        new BasicKrb5Test().go(OneKDC.SERVER, OneKDC.BACKEND);
+    }
+
+    void go(final String server, final String backend) throws Exception {
+        Context c, s, s2, b;
+        c = Context.fromJAAS("client");
+        s = Context.fromJAAS("server");
+        b = Context.fromJAAS("backend");
+
+        c.startAsClient(server, GSSUtil.GSS_KRB5_MECH_OID);
+        c.x().requestCredDeleg(true);
+        s.startAsServer(GSSUtil.GSS_KRB5_MECH_OID);
+
+        c.status();
+        s.status();
+
+        Context.handshake(c, s);
+        GSSName client = c.x().getSrcName();
+
+        c.status();
+        s.status();
+
+        Context.transmit("i say high --", c, s);
+        Context.transmit("   you say low", s, c);
+
+        s2 = s.delegated();
+        s.dispose();
+        s = null;
+
+        s2.startAsClient(backend, GSSUtil.GSS_KRB5_MECH_OID);
+        b.startAsServer(GSSUtil.GSS_KRB5_MECH_OID);
+
+        s2.status();
+        b.status();
+
+        Context.handshake(s2, b);
+        GSSName client2 = b.x().getSrcName();
+
+        if (!client.equals(client2)) {
+            throw new Exception("Delegation failed");
+        }
+
+        s2.status();
+        b.status();
+
+        Context.transmit("you say hello --", s2, b);
+        Context.transmit("   i say goodbye", b, s2);
+
+        s2.dispose();
+        b.dispose();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/CleanState.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6716534
+ * @summary Krb5LoginModule has not cleaned temp info between authentication attempts
+ */
+import com.sun.security.auth.module.Krb5LoginModule;
+import java.util.HashMap;
+import java.util.Map;
+import javax.security.auth.Subject;
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+
+public class CleanState {
+    public static void main(String[] args) throws Exception {
+        CleanState x = new CleanState();
+        new OneKDC(null);
+        x.go();
+    }
+
+    void go() throws Exception {
+        Krb5LoginModule krb5 = new Krb5LoginModule();
+
+        final String name = OneKDC.USER;
+        final char[] password = OneKDC.PASS;
+        char[] badpassword = "hellokitty".toCharArray();
+
+        Map<String,String> map = new HashMap<String,String>();
+        map.put("useTicketCache", "false");
+        map.put("doNotPrompt", "false");
+        map.put("tryFirstPass", "true");
+        Map<String,Object> shared = new HashMap<String,Object>();
+        shared.put("javax.security.auth.login.name", name);
+        shared.put("javax.security.auth.login.password", badpassword);
+
+        krb5.initialize(new Subject(), new CallbackHandler() {
+            @Override
+            public void handle(Callback[] callbacks) {
+                for(Callback callback: callbacks) {
+                    if (callback instanceof NameCallback) {
+                        ((NameCallback)callback).setName(name);
+                    }
+                    if (callback instanceof PasswordCallback) {
+                        ((PasswordCallback)callback).setPassword(password);
+                    }
+                }
+            }
+        }, shared, map);
+        krb5.login();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/Context.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,386 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+import com.sun.security.auth.module.Krb5LoginModule;
+import java.security.PrivilegedActionException;
+import java.security.PrivilegedExceptionAction;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import javax.security.auth.Subject;
+import javax.security.auth.kerberos.KerberosKey;
+import javax.security.auth.kerberos.KerberosTicket;
+import javax.security.auth.login.LoginContext;
+import org.ietf.jgss.GSSContext;
+import org.ietf.jgss.GSSCredential;
+import org.ietf.jgss.GSSException;
+import org.ietf.jgss.GSSManager;
+import org.ietf.jgss.GSSName;
+import org.ietf.jgss.MessageProp;
+import org.ietf.jgss.Oid;
+
+/**
+ * Context of a JGSS subject, encapsulating Subject and GSSContext.
+ *
+ * Three "constructors", which acquire the (private) credentials and fill
+ * it into the Subject:
+ *
+ * 1. static fromJAAS(): Creates a Context using a JAAS login config entry
+ * 2. static fromUserPass(): Creates a Context using a username and a password
+ * 3. delegated(): A new context which uses the delegated credentials from a
+ *    previously established acceptor Context
+ *
+ * Two context initiators, which create the GSSContext object inside:
+ *
+ * 1. startAsClient()
+ * 2. startAsServer()
+ *
+ * Privileged action:
+ *    doAs(): Performs an action in the name of the Subject
+ *
+ * Handshake process:
+ *    static handShake(initiator, acceptor)
+ *
+ * A four-phase typical data communication which includes all four GSS
+ * actions (wrap, unwrap, getMic and veryfyMiC):
+ *    static transmit(message, from, to)
+ */
+public class Context {
+
+    private Subject s;
+    private GSSContext x;
+    private boolean f;      // context established?
+    private String name;
+    private GSSCredential cred;     // see static method delegated().
+
+    private Context() {}
+
+    /**
+     * Using the delegated credentials from a previous acceptor
+     * @param c
+     */
+    public Context delegated() throws Exception {
+        Context out = new Context();
+        out.s = s;
+        out.cred = x.getDelegCred();
+        out.name = name + " as " + out.cred.getName().toString();
+        return out;
+    }
+
+    /**
+     * Logins with a JAAS login config entry name
+     */
+    public static Context fromJAAS(final String name) throws Exception {
+        Context out = new Context();
+        out.name = name;
+        LoginContext lc = new LoginContext(name);
+        lc.login();
+        out.s = lc.getSubject();
+        return out;
+    }
+
+    /**
+     * Logins with a username and a password, using Krb5LoginModule directly
+     * @param storeKey true if key should be saved, used on acceptor side
+     */
+    public static Context fromUserPass(String user, char[] pass, boolean storeKey) throws Exception {
+        Context out = new Context();
+        out.name = user;
+        out.s = new Subject();
+        Krb5LoginModule krb5 = new Krb5LoginModule();
+        Map<String, String> map = new HashMap<String, String>();
+        map.put("tryFirstPass", "true");
+        if (storeKey) {
+            map.put("storeKey", "true");
+        }
+        Map<String, Object> shared = new HashMap<String, Object>();
+        shared.put("javax.security.auth.login.name", user);
+        shared.put("javax.security.auth.login.password", pass);
+
+        krb5.initialize(out.s, null, shared, map);
+        krb5.login();
+        krb5.commit();
+        return out;
+    }
+
+    /**
+     * Starts as a client
+     * @param target communication peer
+     * @param mech GSS mech
+     * @throws java.lang.Exception
+     */
+    public void startAsClient(final String target, final Oid mech) throws Exception {
+        doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] dummy) throws Exception {
+                GSSManager m = GSSManager.getInstance();
+                me.x = m.createContext(
+                        target.indexOf('@') < 0 ?
+                            m.createName(target, null) :
+                            m.createName(target, GSSName.NT_HOSTBASED_SERVICE),
+                        mech,
+                        cred,
+                        GSSContext.DEFAULT_LIFETIME);
+                return null;
+            }
+        }, null);
+        f = false;
+    }
+
+    /**
+     * Starts as a server
+     * @param mech GSS mech
+     * @throws java.lang.Exception
+     */
+    public void startAsServer(final Oid mech) throws Exception {
+        doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] dummy) throws Exception {
+                GSSManager m = GSSManager.getInstance();
+                me.x = m.createContext(m.createCredential(
+                        null,
+                        GSSCredential.INDEFINITE_LIFETIME,
+                        mech,
+                        GSSCredential.ACCEPT_ONLY));
+                return null;
+            }
+        }, null);
+        f = false;
+    }
+
+    /**
+     * Accesses the internal GSSContext object. Currently it's used for --
+     *
+     * 1. calling requestXXX() before handshake
+     * 2. accessing source name
+     *
+     * Note: If the application needs to do any privileged call on this
+     * object, please use doAs(). Otherwise, it can be done directly. The
+     * methods listed above are all non-privileged calls.
+     *
+     * @return the GSSContext object
+     */
+    public GSSContext x() {
+        return x;
+    }
+
+    /**
+     * Disposes the GSSContext within
+     * @throws org.ietf.jgss.GSSException
+     */
+    public void dispose() throws GSSException {
+        x.dispose();
+    }
+
+    /**
+     * Does something using the Subject inside
+     * @param action the action
+     * @param in the input byte
+     * @return the output byte
+     * @throws java.lang.Exception
+     */
+    public byte[] doAs(final Action action, final byte[] in) throws Exception {
+        try {
+            return Subject.doAs(s, new PrivilegedExceptionAction<byte[]>() {
+
+                @Override
+                public byte[] run() throws Exception {
+                    return action.run(Context.this, in);
+                }
+            });
+        } catch (PrivilegedActionException pae) {
+            throw pae.getException();
+        }
+    }
+
+    /**
+     * Prints status of GSSContext and Subject
+     * @throws java.lang.Exception
+     */
+    public void status() throws Exception {
+        System.out.println("STATUS OF " + name.toUpperCase());
+        try {
+            StringBuffer sb = new StringBuffer();
+            if (x.getAnonymityState()) {
+                sb.append("anon, ");
+            }
+            if (x.getConfState()) {
+                sb.append("conf, ");
+            }
+            if (x.getCredDelegState()) {
+                sb.append("deleg, ");
+            }
+            if (x.getIntegState()) {
+                sb.append("integ, ");
+            }
+            if (x.getMutualAuthState()) {
+                sb.append("mutual, ");
+            }
+            if (x.getReplayDetState()) {
+                sb.append("rep det, ");
+            }
+            if (x.getSequenceDetState()) {
+                sb.append("seq det, ");
+            }
+            System.out.println("Context status of " + name + ": " + sb.toString());
+            System.out.println(x.getSrcName() + " -> " + x.getTargName());
+        } catch (Exception e) {
+            ;// Don't care
+        }
+        System.out.println("=====================================");
+        for (Object o : s.getPrivateCredentials()) {
+            System.out.println("    " + o.getClass());
+            if (o instanceof KerberosTicket) {
+                KerberosTicket kt = (KerberosTicket) o;
+                System.out.println("        " + kt.getServer() + " for " + kt.getClient());
+            } else if (o instanceof KerberosKey) {
+                KerberosKey kk = (KerberosKey) o;
+                System.out.print("        " + kk.getKeyType() + " " + kk.getVersionNumber() + " " + kk.getAlgorithm() + " ");
+                for (byte b : kk.getEncoded()) {
+                    System.out.printf("%02X", b & 0xff);
+                }
+                System.out.println();
+            } else if (o instanceof Map) {
+                Map map = (Map) o;
+                for (Object k : map.keySet()) {
+                    System.out.println("        " + k + ": " + map.get(k));
+                }
+            }
+        }
+    }
+
+    /**
+     * Transmits a message from one Context to another. The sender wraps the
+     * message and sends it to the receiver. The receiver unwraps it, creates
+     * a MIC of the clear text and sends it back to the sender. The sender
+     * verifies the MIC against the message sent earlier.
+     * @param message the message
+     * @param s1 the sender
+     * @param s2 the receiver
+     * @throws java.lang.Exception If anything goes wrong
+     */
+    static public void transmit(final String message, final Context s1,
+            final Context s2) throws Exception {
+        final byte[] messageBytes = message.getBytes();
+        System.out.printf("-------------------- TRANSMIT from %s to %s------------------------\n",
+                s1.name, s2.name);
+
+        byte[] t = s1.doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] dummy) throws Exception {
+                System.out.println("wrap");
+                MessageProp p1 = new MessageProp(0, true);
+                byte[] out = me.x.wrap(messageBytes, 0, messageBytes.length, p1);
+                System.out.println(printProp(p1));
+                return out;
+            }
+        }, null);
+
+        t = s2.doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] input) throws Exception {
+                MessageProp p1 = new MessageProp(0, true);
+                byte[] bytes = me.x.unwrap(input, 0, input.length, p1);
+                if (!Arrays.equals(messageBytes, bytes))
+                    throw new Exception("wrap/unwrap mismatch");
+                System.out.println("unwrap");
+                System.out.println(printProp(p1));
+                p1 = new MessageProp(0, true);
+                System.out.println("getMIC");
+                bytes = me.x.getMIC(bytes, 0, bytes.length, p1);
+                System.out.println(printProp(p1));
+                return bytes;
+            }
+        }, t);
+        // Re-unwrap should make p2.isDuplicateToken() returns true
+        s1.doAs(new Action() {
+            @Override
+            public byte[] run(Context me, byte[] input) throws Exception {
+                MessageProp p1 = new MessageProp(0, true);
+                System.out.println("verifyMIC");
+                me.x.verifyMIC(input, 0, input.length,
+                        messageBytes, 0, messageBytes.length,
+                        p1);
+                System.out.println(printProp(p1));
+                return null;
+            }
+        }, t);
+    }
+
+    /**
+     * Returns a string description of a MessageProp object
+     * @param prop the object
+     * @return the description
+     */
+    static public String printProp(MessageProp prop) {
+        StringBuffer sb = new StringBuffer();
+        sb.append("MessagePop: ");
+        sb.append("QOP="+ prop.getQOP() + ", ");
+        sb.append(prop.getPrivacy()?"privacy, ":"");
+        sb.append(prop.isDuplicateToken()?"dup, ":"");
+        sb.append(prop.isGapToken()?"gap, ":"");
+        sb.append(prop.isOldToken()?"old, ":"");
+        sb.append(prop.isUnseqToken()?"unseq, ":"");
+        sb.append(prop.getMinorString()+ "(" + prop.getMinorStatus()+")");
+        return sb.toString();
+    }
+
+    /**
+     * Handshake (security context establishment process) between two Contexts
+     * @param c the initiator
+     * @param s the acceptor
+     * @throws java.lang.Exception
+     */
+    static public void handshake(final Context c, final Context s) throws Exception {
+        byte[] t = new byte[0];
+        while (!c.f || !s.f) {
+            t = c.doAs(new Action() {
+                @Override
+                public byte[] run(Context me, byte[] input) throws Exception {
+                    if (me.x.isEstablished()) {
+                        me.f = true;
+                        System.out.println(c.name + " side established");
+                        return null;
+                    } else {
+                        System.out.println(c.name + " call initSecContext");
+                        return me.x.initSecContext(input, 0, input.length);
+                    }
+                }
+            }, t);
+
+            t = s.doAs(new Action() {
+                @Override
+                public byte[] run(Context me, byte[] input) throws Exception {
+                    if (me.x.isEstablished()) {
+                        me.f = true;
+                        System.out.println(s.name + " side established");
+                        return null;
+                    } else {
+                        System.out.println(s.name + " called acceptSecContext");
+                        return me.x.acceptSecContext(input, 0, input.length);
+                    }
+                }
+            }, t);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/CrossRealm.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6706974
+ * @summary Add krb5 test infrastructure
+ */
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.security.Security;
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import org.ietf.jgss.GSSContext;
+import org.ietf.jgss.GSSManager;
+import org.ietf.jgss.GSSName;
+import sun.security.jgss.GSSUtil;
+
+public class CrossRealm implements CallbackHandler {
+    public static void main(String[] args) throws Exception {
+        startKDCs();
+        xRealmAuth();
+    }
+
+    static void startKDCs() throws Exception {
+        // Create and start the KDC
+        KDC kdc1 = KDC.create("RABBIT.HOLE");
+        kdc1.addPrincipal("dummy", "bogus".toCharArray());
+        kdc1.addPrincipalRandKey("krbtgt/RABBIT.HOLE");
+        kdc1.addPrincipal("krbtgt/SNAKE.HOLE", "sharedsec".toCharArray());
+
+        KDC kdc2 = KDC.create("SNAKE.HOLE");
+        kdc2.addPrincipalRandKey("krbtgt/SNAKE.HOLE");
+        kdc2.addPrincipal("krbtgt/RABBIT.HOLE", "sharedsec".toCharArray());
+        kdc2.addPrincipalRandKey("host/www.snake.hole");
+
+        KDC.saveConfig("krb5-localkdc.conf", kdc1, kdc2,
+                "forwardable=true",
+                "[domain_realm]",
+                ".snake.hole=SNAKE.HOLE");
+        System.setProperty("java.security.krb5.conf", "krb5-localkdc.conf");
+    }
+
+    static void xRealmAuth() throws Exception {
+        Security.setProperty("auth.login.defaultCallbackHandler", "CrossRealm");
+        System.setProperty("java.security.auth.login.config", "jaas-localkdc.conf");
+        System.setProperty("javax.security.auth.useSubjectCredsOnly", "false");
+        FileOutputStream fos = new FileOutputStream("jaas-localkdc.conf");
+        fos.write(("com.sun.security.jgss.krb5.initiate {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule\n" +
+                "    required\n" +
+                "    principal=dummy\n" +
+                "    doNotPrompt=false\n" +
+                "    useTicketCache=false\n" +
+                "    ;\n" +
+                "};").getBytes());
+        fos.close();
+
+        GSSManager m = GSSManager.getInstance();
+        m.createContext(
+                m.createName("host@www.snake.hole", GSSName.NT_HOSTBASED_SERVICE),
+                GSSUtil.GSS_KRB5_MECH_OID,
+                null,
+                GSSContext.DEFAULT_LIFETIME).initSecContext(new byte[0], 0, 0);
+    }
+
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+        for (Callback callback : callbacks) {
+            if (callback instanceof NameCallback) {
+                ((NameCallback) callback).setName("dummy");
+            }
+            if (callback instanceof PasswordCallback) {
+                ((PasswordCallback) callback).setPassword("bogus".toCharArray());
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/KDC.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,969 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.net.*;
+import java.io.*;
+import java.lang.reflect.Method;
+import java.security.SecureRandom;
+import java.util.*;
+import java.util.concurrent.*;
+import sun.security.krb5.*;
+import sun.security.krb5.internal.*;
+import sun.security.krb5.internal.crypto.KeyUsage;
+import sun.security.krb5.internal.ktab.KeyTab;
+import sun.security.util.DerInputStream;
+import sun.security.util.DerOutputStream;
+import sun.security.util.DerValue;
+
+/**
+ * A KDC server.
+ * <p>
+ * Features:
+ * <ol>
+ * <li> Supports TCP and UDP
+ * <li> Supports AS-REQ and TGS-REQ
+ * <li> Principal db and other settings hard coded in application
+ * <li> Options, say, request preauth or not
+ * </ol>
+ * Side effects:
+ * <ol>
+ * <li> The Sun-internal class <code>sun.security.krb5.Config</code> is a
+ * singleton and initialized according to Kerberos settings (krb5.conf and
+ * java.security.krb5.* system properties). This means once it's initialized
+ * it will not automatically notice any changes to these settings (or file
+ * changes of krb5.conf). The KDC class normally does not touch these
+ * settings (except for the <code>writeKtab()</code> method). However, to make
+ * sure nothing ever goes wrong, if you want to make any changes to these
+ * settings after calling a KDC method, call <code>Config.refresh()</code> to
+ * make sure your changes are reflected in the <code>Config</code> object.
+ * </ol>
+ * Issues and TODOs:
+ * <ol>
+ * <li> Generates krb5.conf to be used on another machine, currently the kdc is
+ * always localhost
+ * <li> More options to KDC, say, error output, say, response nonce !=
+ * request nonce
+ * </ol>
+ * Note: This program uses internal krb5 classes (including reflection to
+ * access private fields and methods).
+ * <p>
+ * Usages:
+ * <p>
+ * 1. Init and start the KDC:
+ * <pre>
+ * KDC kdc = KDC.create("REALM.NAME", port, isDaemon);
+ * KDC kdc = KDC.create("REALM.NAME");
+ * </pre>
+ * Here, <code>port</code> is the UDP and TCP port number the KDC server
+ * listens on. If zero, a random port is chosen, which you can use getPort()
+ * later to retrieve the value.
+ * <p>
+ * If <code>isDaemon</code> is true, the KDC worker threads will be daemons.
+ * <p>
+ * The shortcut <code>KDC.create("REALM.NAME")</code> has port=0 and
+ * isDaemon=false, and is commonly used in an embedded KDC.
+ * <p>
+ * 2. Adding users:
+ * <pre>
+ * kdc.addPrincipal(String principal_name, char[] password);
+ * kdc.addPrincipalRandKey(String principal_name);
+ * </pre>
+ * A service principal's name should look like "host/f.q.d.n". The second form
+ * generates a random key. To expose this key, call <code>writeKtab()</code> to
+ * save the keys into a keytab file.
+ * <p>
+ * Note that you need to add the principal name krbtgt/REALM.NAME yourself.
+ * <p>
+ * Note that you can safely add a principal at any time after the KDC is
+ * started and before a user requests info on this principal.
+ * <p>
+ * 3. Other public methods:
+ * <ul>
+ * <li> <code>getPort</code>: Returns the port number the KDC uses
+ * <li> <code>getRealm</code>: Returns the realm name
+ * <li> <code>writeKtab</code>: Writes all principals' keys into a keytab file
+ * <li> <code>saveConfig</code>: Saves a krb5.conf file to access this KDC
+ * <li> <code>setOption</code>: Sets various options
+ * </ul>
+ * Read the javadoc for details. Lazy developer can use <code>OneKDC</code>
+ * directly.
+ */
+public class KDC {
+
+    // Under the hood.
+
+    // The random generator to generate random keys (including session keys)
+    private static SecureRandom secureRandom = new SecureRandom();
+    // Principal db
+    private Map<String,char[]> passwords = new HashMap<String,char[]>();
+    // Realm name
+    private String realm;
+    // The request/response job queue
+    private BlockingQueue<Job> q = new ArrayBlockingQueue<Job>(100);
+    // Service port number
+    private int port;
+    // Options
+    private Map<Option,Object> options = new HashMap<Option,Object>();
+
+    /**
+     * Option names, to be expanded forever.
+     */
+    public static enum Option {
+        /**
+         * Whether pre-authentication is required. Default Boolean.TRUE
+         */
+        PREAUTH_REQUIRED,
+    };
+
+    /**
+     * A standalone KDC server.
+     * @param args
+     * @throws java.lang.Exception
+     */
+    public static void main(String[] args) throws Exception {
+        if (args.length > 0) {
+            if (args[0].equals("-help") || args[0].equals("--help")) {
+                System.out.println("Usage:");
+                System.out.println("   java " + KDC.class + "       " +
+                        "Start KDC on port 8888");
+                return;
+            }
+        }
+        String localhost = "localhost";
+        try {
+            localhost = InetAddress.getByName(localhost)
+                    .getCanonicalHostName();
+        } catch (UnknownHostException uhe) {
+            ;   // Ignore, localhost is still "localhost"
+        }
+        KDC kdc = create("RABBIT.HOLE", 8888, false);
+        kdc.addPrincipal("dummy", "bogus".toCharArray());
+        kdc.addPrincipal("foo", "bar".toCharArray());
+        kdc.addPrincipalRandKey("krbtgt/" + kdc.realm);
+        kdc.addPrincipalRandKey("server/" + localhost);
+        kdc.addPrincipalRandKey("backend/" + localhost);
+    }
+
+    /**
+     * Creates and starts a KDC running as a daemon on a random port.
+     * @param realm the realm name
+     * @return the running KDC instance
+     * @throws java.io.IOException for any socket creation error
+     */
+    public static KDC create(String realm) throws IOException {
+        return create(realm, 0, true);
+    }
+
+    /**
+     * Creates and starts a KDC server.
+     * @param realm the realm name
+     * @param port the TCP and UDP port to listen to. A random port will to
+     *        chosen if zero.
+     * @param asDaemon if true, KDC threads will be daemons. Otherwise, not.
+     * @return the running KDC instance
+     * @throws java.io.IOException for any socket creation error
+     */
+    public static KDC create(String realm, int port, boolean asDaemon) throws IOException {
+        return new KDC(realm, port, asDaemon);
+    }
+
+    /**
+     * Sets an option
+     * @param key the option name
+     * @param obj the value
+     */
+    public void setOption(Option key, Object value) {
+        options.put(key, value);
+    }
+
+    /**
+     * Write all principals' keys into a keytab file. Note that the keys for
+     * the krbtgt principal for this realm will not be written.
+     * <p>
+     * Attention: This method references krb5.conf settings. If you need to
+     * setup krb5.conf later, please call <code>Config.refresh()</code> after
+     * the new setting. For example:
+     * <pre>
+     * kdc.writeKtab("/etc/kdc/ktab");  // Config is initialized,
+     * System.setProperty("java.security.krb5.conf", "/home/mykrb5.conf");
+     * Config.refresh();
+     * </pre>
+     *
+     * Inside this method there are 2 places krb5.conf is used:
+     * <ol>
+     * <li> (Fatal) Generating keys: EncryptionKey.acquireSecretKeys
+     * <li> (Has workaround) Creating PrincipalName
+     * </ol>
+     * @param tab The keytab filename to write to.
+     * @throws java.io.IOException for any file output error
+     * @throws sun.security.krb5.KrbException for any realm and/or principal
+     *         name error.
+     */
+    public void writeKtab(String tab) throws IOException, KrbException {
+        KeyTab ktab = KeyTab.create(tab);
+        for (String name : passwords.keySet()) {
+            if (name.equals("krbtgt/" + realm)) {
+                continue;
+            }
+            ktab.addEntry(new PrincipalName(name + "@" + realm,
+                    name.indexOf('/') < 0 ?
+                        PrincipalName.KRB_NT_UNKNOWN :
+                        PrincipalName.KRB_NT_SRV_HST), passwords.get(name));
+        }
+        ktab.save();
+    }
+
+    /**
+     * Adds a new principal to this realm with a given password.
+     * @param user the principal's name. For a service principal, use the
+     *        form of host/f.q.d.n
+     * @param pass the password for the principal
+     */
+    public void addPrincipal(String user, char[] pass) {
+        passwords.put(user, pass);
+    }
+
+    /**
+     * Adds a new principal to this realm with a random password
+     * @param user the principal's name. For a service principal, use the
+     *        form of host/f.q.d.n
+     */
+    public void addPrincipalRandKey(String user) {
+        passwords.put(user, randomPassword());
+    }
+
+    /**
+     * Returns the name of this realm
+     * @return the name of this realm
+     */
+    public String getRealm() {
+        return realm;
+    }
+
+    /**
+     * Writes a krb5.conf for one or more KDC that includes KDC locations for
+     * each realm and the default realm name. You can also add extra strings
+     * into the file. The method should be called like:
+     * <pre>
+     *   KDC.saveConfig("krb5.conf", kdc1, kdc2, ..., line1, line2, ...);
+     * </pre>
+     * Here you can provide one or more kdc# and zero or more line# arguments.
+     * The line# will be put after [libdefaults] and before [realms]. Therefore
+     * you can append new lines into [libdefaults] and/or create your new
+     * stanzas as well. Note that a newline character will be appended to
+     * each line# argument.
+     * <p>
+     * For example:
+     * <pre>
+     * KDC.saveConfig("krb5.conf", this);
+     * </pre>
+     * generates:
+     * <pre>
+     * [libdefaults]
+     * default_realm = REALM.NAME
+     *
+     * [realms]
+     *   REALM.NAME = {
+     *     kdc = localhost:port_number
+     *   }
+     * </pre>
+     *
+     * Another example:
+     * <pre>
+     * KDC.saveConfig("krb5.conf", kdc1, kdc2, "forwardable = true", "",
+     *         "[domain_realm]",
+     *         ".kdc1.com = KDC1.NAME");
+     * </pre>
+     * generates:
+     * <pre>
+     * [libdefaults]
+     * default_realm = KDC1.NAME
+     * forwardable = true
+     *
+     * [domain_realm]
+     * .kdc1.com = KDC1.NAME
+     *
+     * [realms]
+     *   KDC1.NAME = {
+     *     kdc = localhost:port1
+     *   }
+     *   KDC2.NAME = {
+     *     kdc = localhost:port2
+     *   }
+     * </pre>
+     * @param file the name of the file to write into
+     * @param kdc the first (and default) KDC
+     * @param more more KDCs or extra lines (in their appearing order) to
+     * insert into the krb5.conf file. This method reads each argument's type
+     * to determine what it's for. This argument can be empty.
+     * @throws java.io.IOException for any file output error
+     */
+    public static void saveConfig(String file, KDC kdc, Object... more)
+            throws IOException {
+        File f = new File(file);
+        StringBuffer sb = new StringBuffer();
+        sb.append("[libdefaults]\ndefault_realm = ");
+        sb.append(kdc.realm);
+        sb.append("\n");
+        for (Object o: more) {
+            if (o instanceof String) {
+                sb.append(o);
+                sb.append("\n");
+            }
+        }
+        sb.append("\n[realms]\n");
+        sb.append(realmLineForKDC(kdc));
+        for (Object o: more) {
+            if (o instanceof KDC) {
+                sb.append(realmLineForKDC((KDC)o));
+            }
+        }
+        FileOutputStream fos = new FileOutputStream(f);
+        fos.write(sb.toString().getBytes());
+        fos.close();
+    }
+
+    /**
+     * Returns the service port of the KDC server.
+     * @return the KDC service port
+     */
+    public int getPort() {
+        return port;
+    }
+
+    // Private helper methods
+
+    /**
+     * Private constructor, cannot be called outside.
+     * @param realm
+     */
+    private KDC(String realm) {
+        this.realm = realm;
+    }
+
+    /**
+     * A constructor that starts the KDC service also.
+     */
+    protected KDC(String realm, int port, boolean asDaemon)
+            throws IOException {
+        this(realm);
+        startServer(port, asDaemon);
+    }
+    /**
+     * Generates a 32-char random password
+     * @return the password
+     */
+    private static char[] randomPassword() {
+        char[] pass = new char[32];
+        for (int i=0; i<32; i++)
+            pass[i] = (char)secureRandom.nextInt();
+        return pass;
+    }
+
+    /**
+     * Generates a random key for the given encryption type.
+     * @param eType the encryption type
+     * @return the generated key
+     * @throws sun.security.krb5.KrbException for unknown/unsupported etype
+     */
+    private static EncryptionKey generateRandomKey(int eType)
+            throws KrbException  {
+        // Is 32 enough for AES256? I should have generated the keys directly
+        // but different cryptos have different rules on what keys are valid.
+        char[] pass = randomPassword();
+        String algo;
+        switch (eType) {
+            case EncryptedData.ETYPE_DES_CBC_MD5: algo = "DES"; break;
+            case EncryptedData.ETYPE_DES3_CBC_HMAC_SHA1_KD: algo = "DESede"; break;
+            case EncryptedData.ETYPE_AES128_CTS_HMAC_SHA1_96: algo = "AES128"; break;
+            case EncryptedData.ETYPE_ARCFOUR_HMAC: algo = "ArcFourHMAC"; break;
+            case EncryptedData.ETYPE_AES256_CTS_HMAC_SHA1_96: algo = "AES256"; break;
+            default: algo = "DES"; break;
+        }
+        return new EncryptionKey(pass, "NOTHING", algo);    // Silly
+    }
+
+    /**
+     * Returns the password for a given principal
+     * @param p principal
+     * @return the password
+     * @throws sun.security.krb5.KrbException when the principal is not inside
+     *         the database.
+     */
+    private char[] getPassword(PrincipalName p) throws KrbException {
+        char[] pass = passwords.get(p.getNameString());
+        if (pass == null) {
+            throw new KrbException(Krb5.KDC_ERR_C_PRINCIPAL_UNKNOWN);
+        }
+        return pass;
+    }
+
+    /**
+     * Returns the salt string for the principal. For normal users, the
+     * concatenation for the realm name and the sections of the principal;
+     * for krgtgt/A@B and krbtgt/B@A, always return AB (so that inter-realm
+     * principals have the same key).
+     * @param p principal
+     * @return the salt
+     */
+    private String getSalt(PrincipalName p) {
+        String[] ns = p.getNameStrings();
+        if (ns[0].equals("krbtgt") && ns.length > 1) {
+            // Shared cross-realm keys must be the same
+            if (ns[1].compareTo(realm) < 0) {
+                return ns[1] + realm;
+            } else {
+                return realm + ns[1];
+            }
+        } else {
+            String s = getRealm();
+            for (String n: p.getNameStrings()) {
+                s += n;
+            }
+            return s;
+        }
+    }
+
+    /**
+     * Returns the key for a given principal of the given encryption type
+     * @param p the principal
+     * @param etype the encryption type
+     * @return the key
+     * @throws sun.security.krb5.KrbException for unknown/unsupported etype
+     */
+    private EncryptionKey keyForUser(PrincipalName p, int etype) throws KrbException {
+        try {
+            // Do not call EncryptionKey.acquireSecretKeys(), otherwise
+            // the krb5.conf config file would be loaded.
+            Method stringToKey = EncryptionKey.class.getDeclaredMethod("stringToKey", char[].class, String.class, byte[].class, Integer.TYPE);
+            stringToKey.setAccessible(true);
+            return new EncryptionKey((byte[]) stringToKey.invoke(null, getPassword(p), getSalt(p), null, etype), etype, null);
+        } catch (InvocationTargetException ex) {
+            KrbException ke = (KrbException)ex.getCause();
+            throw ke;
+        } catch (Exception e) {
+            throw new RuntimeException(e);  // should not happen
+        }
+    }
+
+    /**
+     * Processes an incoming request and generates a response.
+     * @param in the request
+     * @return the response
+     * @throws java.lang.Exception for various errors
+     */
+    private byte[] processMessage(byte[] in) throws Exception {
+        if ((in[0] & 0x1f) == Krb5.KRB_AS_REQ)
+            return processAsReq(in);
+        else
+            return processTgsReq(in);
+    }
+
+    /**
+     * Processes a TGS_REQ and generates a TGS_REP (or KRB_ERROR)
+     * @param in the request
+     * @return the response
+     * @throws java.lang.Exception for various errors
+     */
+    private byte[] processTgsReq(byte[] in) throws Exception {
+        TGSReq tgsReq = new TGSReq(in);
+        try {
+            System.out.println(realm + "> " + tgsReq.reqBody.cname +
+                    " sends TGS-REQ for " +
+                    tgsReq.reqBody.sname);
+            KDCReqBody body = tgsReq.reqBody;
+            int etype = 0;
+
+            // Reflection: PAData[] pas = tgsReq.pAData;
+            Field f = KDCReq.class.getDeclaredField("pAData");
+            f.setAccessible(true);
+            PAData[] pas = (PAData[])f.get(tgsReq);
+
+            Ticket tkt = null;
+            EncTicketPart etp = null;
+            if (pas == null || pas.length == 0) {
+                throw new KrbException(Krb5.KDC_ERR_PADATA_TYPE_NOSUPP);
+            } else {
+                for (PAData pa: pas) {
+                    if (pa.getType() == Krb5.PA_TGS_REQ) {
+                        APReq apReq = new APReq(pa.getValue());
+                        EncryptedData ed = apReq.authenticator;
+                        tkt = apReq.ticket;
+                        etype = tkt.encPart.getEType();
+                        EncryptionKey kkey = null;
+                        if (!tkt.realm.toString().equals(realm)) {
+                            if (tkt.sname.getNameString().equals("krbtgt/" + realm)) {
+                                kkey = keyForUser(new PrincipalName("krbtgt/" + tkt.realm.toString(), realm), etype);
+                            }
+                        } else {
+                            kkey = keyForUser(tkt.sname, etype);
+                        }
+                        byte[] bb = tkt.encPart.decrypt(kkey, KeyUsage.KU_TICKET);
+                        DerInputStream derIn = new DerInputStream(bb);
+                        DerValue der = derIn.getDerValue();
+                        etp = new EncTicketPart(der.toByteArray());
+                    }
+                }
+                if (tkt == null) {
+                    throw new KrbException(Krb5.KDC_ERR_PADATA_TYPE_NOSUPP);
+                }
+            }
+            EncryptionKey skey = keyForUser(body.sname, etype);
+            if (skey == null) {
+                throw new KrbException(Krb5.KDC_ERR_SUMTYPE_NOSUPP); // TODO
+            }
+
+            // Session key for original ticket, TGT
+            EncryptionKey ckey = etp.key;
+
+            // Session key for session with the service
+            EncryptionKey key = generateRandomKey(etype);
+
+            // Check time, TODO
+            KerberosTime till = body.till;
+            if (till == null) {
+                throw new KrbException(Krb5.KDC_ERR_NEVER_VALID); // TODO
+            } else if (till.isZero()) {
+                till = new KerberosTime(new Date().getTime() + 1000 * 3600 * 11);
+            }
+
+            boolean[] bFlags = new boolean[Krb5.TKT_OPTS_MAX+1];
+            if (body.kdcOptions.get(KDCOptions.FORWARDABLE)) {
+                bFlags[Krb5.TKT_OPTS_FORWARDABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.FORWARDED) ||
+                    etp.flags.get(Krb5.TKT_OPTS_FORWARDED)) {
+                bFlags[Krb5.TKT_OPTS_FORWARDED] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.RENEWABLE)) {
+                bFlags[Krb5.TKT_OPTS_RENEWABLE] = true;
+                //renew = new KerberosTime(new Date().getTime() + 1000 * 3600 * 24 * 7);
+            }
+            if (body.kdcOptions.get(KDCOptions.PROXIABLE)) {
+                bFlags[Krb5.TKT_OPTS_PROXIABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.POSTDATED)) {
+                bFlags[Krb5.TKT_OPTS_POSTDATED] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.ALLOW_POSTDATE)) {
+                bFlags[Krb5.TKT_OPTS_MAY_POSTDATE] = true;
+            }
+            bFlags[Krb5.TKT_OPTS_INITIAL] = true;
+
+            TicketFlags tFlags = new TicketFlags(bFlags);
+            EncTicketPart enc = new EncTicketPart(
+                    tFlags,
+                    key,
+                    etp.crealm,
+                    etp.cname,
+                    new TransitedEncoding(1, new byte[0]),  // TODO
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.addresses,
+                    null);
+            Ticket t = new Ticket(
+                    body.crealm,
+                    body.sname,
+                    new EncryptedData(skey, enc.asn1Encode(), KeyUsage.KU_TICKET)
+            );
+            EncTGSRepPart enc_part = new EncTGSRepPart(
+                    key,
+                    new LastReq(new LastReqEntry[]{
+                        new LastReqEntry(0, new KerberosTime(new Date().getTime() - 10000))
+                    }),
+                    body.getNonce(),    // TODO: detect replay
+                    new KerberosTime(new Date().getTime() + 1000 * 3600 * 24),
+                    // Next 5 and last MUST be same with ticket
+                    tFlags,
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.crealm,
+                    body.sname,
+                    body.addresses
+                    );
+            EncryptedData edata = new EncryptedData(ckey, enc_part.asn1Encode(), KeyUsage.KU_ENC_TGS_REP_PART_SESSKEY);
+            TGSRep tgsRep = new TGSRep(null,
+                    etp.crealm,
+                    etp.cname,
+                    t,
+                    edata);
+            System.out.println("     Return " + tgsRep.cname
+                    + " ticket for " + tgsRep.ticket.sname);
+
+            DerOutputStream out = new DerOutputStream();
+            out.write(DerValue.createTag(DerValue.TAG_APPLICATION,
+                    true, (byte)Krb5.KRB_TGS_REP), tgsRep.asn1Encode());
+            return out.toByteArray();
+        } catch (KrbException ke) {
+            ke.printStackTrace(System.out);
+            KRBError kerr = ke.getError();
+            KDCReqBody body = tgsReq.reqBody;
+            System.out.println("     Error " + ke.returnCode()
+                    + " " +ke.returnCodeMessage());
+            if (kerr == null) {
+                kerr = new KRBError(null, null, null,
+                        new KerberosTime(new Date()),
+                        0,
+                        ke.returnCode(),
+                        body.crealm, body.cname,
+                        new Realm(getRealm()), body.sname,
+                        KrbException.errorMessage(ke.returnCode()),
+                        null);
+            }
+            return kerr.asn1Encode();
+        }
+    }
+
+    /**
+     * Processes a AS_REQ and generates a AS_REP (or KRB_ERROR)
+     * @param in the request
+     * @return the response
+     * @throws java.lang.Exception for various errors
+     */
+    private byte[] processAsReq(byte[] in) throws Exception {
+        ASReq asReq = new ASReq(in);
+        int[] eTypes = null;
+        try {
+            System.out.println(realm + "> " + asReq.reqBody.cname +
+                    " sends AS-REQ for " +
+                    asReq.reqBody.sname);
+
+            KDCReqBody body = asReq.reqBody;
+
+            // Reflection: int[] eType = body.eType;
+            Field f = KDCReqBody.class.getDeclaredField("eType");
+            f.setAccessible(true);
+            eTypes = (int[])f.get(body);
+            int eType = eTypes[0];
+
+            EncryptionKey ckey = keyForUser(body.cname, eType);
+            EncryptionKey skey = keyForUser(body.sname, eType);
+            if (ckey == null) {
+                throw new KrbException(Krb5.KDC_ERR_ETYPE_NOSUPP);
+            }
+            if (skey == null) {
+                throw new KrbException(Krb5.KDC_ERR_SUMTYPE_NOSUPP); // TODO
+            }
+
+            // Session key
+            EncryptionKey key = generateRandomKey(eType);
+            // Check time, TODO
+            KerberosTime till = body.till;
+            if (till == null) {
+                throw new KrbException(Krb5.KDC_ERR_NEVER_VALID); // TODO
+            } else if (till.isZero()) {
+                till = new KerberosTime(new Date().getTime() + 1000 * 3600 * 11);
+            }
+            //body.from
+            boolean[] bFlags = new boolean[Krb5.TKT_OPTS_MAX+1];
+            if (body.kdcOptions.get(KDCOptions.FORWARDABLE)) {
+                bFlags[Krb5.TKT_OPTS_FORWARDABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.RENEWABLE)) {
+                bFlags[Krb5.TKT_OPTS_RENEWABLE] = true;
+                //renew = new KerberosTime(new Date().getTime() + 1000 * 3600 * 24 * 7);
+            }
+            if (body.kdcOptions.get(KDCOptions.PROXIABLE)) {
+                bFlags[Krb5.TKT_OPTS_PROXIABLE] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.POSTDATED)) {
+                bFlags[Krb5.TKT_OPTS_POSTDATED] = true;
+            }
+            if (body.kdcOptions.get(KDCOptions.ALLOW_POSTDATE)) {
+                bFlags[Krb5.TKT_OPTS_MAY_POSTDATE] = true;
+            }
+            bFlags[Krb5.TKT_OPTS_INITIAL] = true;
+
+            f = KDCReq.class.getDeclaredField("pAData");
+            f.setAccessible(true);
+            PAData[] pas = (PAData[])f.get(asReq);
+            if (pas == null || pas.length == 0) {
+                Object preauth = options.get(Option.PREAUTH_REQUIRED);
+                if (preauth == null || preauth.equals(Boolean.TRUE)) {
+                    throw new KrbException(Krb5.KDC_ERR_PREAUTH_REQUIRED);
+                }
+            } else {
+                try {
+                    Constructor<EncryptedData> ctor = EncryptedData.class.getDeclaredConstructor(DerValue.class);
+                    ctor.setAccessible(true);
+                    EncryptedData data = ctor.newInstance(new DerValue(pas[0].getValue()));
+                    data.decrypt(ckey, KeyUsage.KU_PA_ENC_TS);
+                } catch (Exception e) {
+                    throw new KrbException(Krb5.KDC_ERR_PREAUTH_FAILED);
+                }
+                bFlags[Krb5.TKT_OPTS_PRE_AUTHENT] = true;
+            }
+
+            TicketFlags tFlags = new TicketFlags(bFlags);
+            EncTicketPart enc = new EncTicketPart(
+                    tFlags,
+                    key,
+                    body.crealm,
+                    body.cname,
+                    new TransitedEncoding(1, new byte[0]),
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.addresses,
+                    null);
+            Ticket t = new Ticket(
+                    body.crealm,
+                    body.sname,
+                    new EncryptedData(skey, enc.asn1Encode(), KeyUsage.KU_TICKET)
+            );
+            EncASRepPart enc_part = new EncASRepPart(
+                    key,
+                    new LastReq(new LastReqEntry[]{
+                        new LastReqEntry(0, new KerberosTime(new Date().getTime() - 10000))
+                    }),
+                    body.getNonce(),    // TODO: detect replay?
+                    new KerberosTime(new Date().getTime() + 1000 * 3600 * 24),
+                    // Next 5 and last MUST be same with ticket
+                    tFlags,
+                    new KerberosTime(new Date()),
+                    body.from,
+                    till, body.rtime,
+                    body.crealm,
+                    body.sname,
+                    body.addresses
+                    );
+            EncryptedData edata = new EncryptedData(ckey, enc_part.asn1Encode(), KeyUsage.KU_ENC_AS_REP_PART);
+            ASRep asRep = new ASRep(null,
+                    body.crealm,
+                    body.cname,
+                    t,
+                    edata);
+
+            System.out.println("     Return " + asRep.cname
+                    + " ticket for " + asRep.ticket.sname);
+
+            DerOutputStream out = new DerOutputStream();
+            out.write(DerValue.createTag(DerValue.TAG_APPLICATION,
+                    true, (byte)Krb5.KRB_AS_REP), asRep.asn1Encode());
+            return out.toByteArray();
+        } catch (KrbException ke) {
+            ke.printStackTrace(System.out);
+            KRBError kerr = ke.getError();
+            KDCReqBody body = asReq.reqBody;
+            System.out.println("     Error " + ke.returnCode()
+                    + " " +ke.returnCodeMessage());
+            byte[] eData = null;
+            if (kerr == null) {
+                if (ke.returnCode() == Krb5.KDC_ERR_PREAUTH_REQUIRED ||
+                        ke.returnCode() == Krb5.KDC_ERR_PREAUTH_FAILED) {
+                    PAData pa;
+
+                    ETypeInfo2 ei2 = new ETypeInfo2(eTypes[0], null, null);
+                    DerOutputStream eid = new DerOutputStream();
+                    eid.write(DerValue.tag_Sequence, ei2.asn1Encode());
+
+                    pa = new PAData(Krb5.PA_ETYPE_INFO2, eid.toByteArray());
+
+                    DerOutputStream bytes = new DerOutputStream();
+                    bytes.write(new PAData(Krb5.PA_ENC_TIMESTAMP, new byte[0]).asn1Encode());
+                    bytes.write(pa.asn1Encode());
+
+                    boolean allOld = true;
+                    for (int i: eTypes) {
+                        if (i == EncryptedData.ETYPE_AES128_CTS_HMAC_SHA1_96 ||
+                                i == EncryptedData.ETYPE_AES256_CTS_HMAC_SHA1_96) {
+                            allOld = false;
+                            break;
+                        }
+                    }
+                    if (allOld) {
+                        ETypeInfo ei = new ETypeInfo(eTypes[0], null);
+                        eid = new DerOutputStream();
+                        eid.write(DerValue.tag_Sequence, ei.asn1Encode());
+                        pa = new PAData(Krb5.PA_ETYPE_INFO, eid.toByteArray());
+                        bytes.write(pa.asn1Encode());
+                    }
+                    DerOutputStream temp = new DerOutputStream();
+                    temp.write(DerValue.tag_Sequence, bytes);
+                    eData = temp.toByteArray();
+                }
+                kerr = new KRBError(null, null, null,
+                        new KerberosTime(new Date()),
+                        0,
+                        ke.returnCode(),
+                        body.crealm, body.cname,
+                        new Realm(getRealm()), body.sname,
+                        KrbException.errorMessage(ke.returnCode()),
+                        eData);
+            }
+            return kerr.asn1Encode();
+        }
+    }
+
+    /**
+     * Generates a line for a KDC to put inside [realms] of krb5.conf
+     * @param kdc the KDC
+     * @return REALM.NAME = { kdc = localhost:port }
+     */
+    private static String realmLineForKDC(KDC kdc) {
+        return String.format("  %s = {\n    kdc = localhost:%d\n  }\n", kdc.realm, kdc.port);
+    }
+
+    /**
+     * Start the KDC service. This server listens on both UDP and TCP using
+     * the same port number. It uses three threads to deal with requests.
+     * They can be set to daemon threads if requested.
+     * @param port the port number to listen to. If zero, a random available
+     *  port no less than 8000 will be chosen and used.
+     * @param asDaemon true if the KDC threads should be daemons
+     * @throws java.io.IOException for any communication error
+     */
+    protected void startServer(int port, boolean asDaemon) throws IOException {
+        DatagramSocket u1 = null;
+        ServerSocket t1 = null;
+        if (port > 0) {
+            u1 = new DatagramSocket(port, InetAddress.getByName("127.0.0.1"));
+            t1 = new ServerSocket(port);
+        } else {
+            while (true) {
+                // Try to find a port number that's both TCP and UDP free
+                try {
+                    port = 8000 + new java.util.Random().nextInt(10000);
+                    u1 = null;
+                    u1 = new DatagramSocket(port, InetAddress.getByName("127.0.0.1"));
+                    t1 = new ServerSocket(port);
+                    break;
+                } catch (Exception e) {
+                    if (u1 != null) u1.close();
+                }
+            }
+        }
+        final DatagramSocket udp = u1;
+        final ServerSocket tcp = t1;
+        System.out.println("Start KDC on " + port);
+
+        this.port = port;
+
+        // The UDP consumer
+        Thread thread = new Thread() {
+            public void run() {
+                while (true) {
+                    try {
+                        byte[] inbuf = new byte[8192];
+                        DatagramPacket p = new DatagramPacket(inbuf, inbuf.length);
+                        udp.receive(p);
+                        System.out.println("-----------------------------------------------");
+                        System.out.println(">>>>> UDP packet received");
+                        q.put(new Job(processMessage(Arrays.copyOf(inbuf, p.getLength())), udp, p));
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+        };
+        thread.setDaemon(asDaemon);
+        thread.start();
+
+        // The TCP consumer
+        thread = new Thread() {
+            public void run() {
+                while (true) {
+                    try {
+                        Socket socket = tcp.accept();
+                        System.out.println("-----------------------------------------------");
+                        System.out.println(">>>>> TCP connection established");
+                        DataInputStream in = new DataInputStream(socket.getInputStream());
+                        DataOutputStream out = new DataOutputStream(socket.getOutputStream());
+                        byte[] token = new byte[in.readInt()];
+                        in.readFully(token);
+                        q.put(new Job(processMessage(token), socket, out));
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+        };
+        thread.setDaemon(asDaemon);
+        thread.start();
+
+        // The dispatcher
+        thread = new Thread() {
+            public void run() {
+                while (true) {
+                    try {
+                        q.take().send();
+                    } catch (Exception e) {
+                    }
+                }
+            }
+        };
+        thread.setDaemon(true);
+        thread.start();
+    }
+
+    /**
+     * Helper class to encapsulate a job in a KDC.
+     */
+    private static class Job {
+        byte[] token;           // The received request at creation time and
+                                // the response at send time
+        Socket s;               // The TCP socket from where the request comes
+        DataOutputStream out;   // The OutputStream of the TCP socket
+        DatagramSocket s2;      // The UDP socket from where the request comes
+        DatagramPacket dp;      // The incoming UDP datagram packet
+        boolean useTCP;         // Whether TCP or UDP is used
+
+        // Creates a job object for TCP
+        Job(byte[] token, Socket s, DataOutputStream out) {
+            useTCP = true;
+            this.token = token;
+            this.s = s;
+            this.out = out;
+        }
+
+        // Creates a job object for UDP
+        Job(byte[] token, DatagramSocket s2, DatagramPacket dp) {
+            useTCP = false;
+            this.token = token;
+            this.s2 = s2;
+            this.dp = dp;
+        }
+
+        // Sends the output back to the client
+        void send() {
+            try {
+                if (useTCP) {
+                    System.out.println(">>>>> TCP request honored");
+                    out.writeInt(token.length);
+                    out.write(token);
+                    s.close();
+                } else {
+                    System.out.println(">>>>> UDP request honored");
+                    s2.send(new DatagramPacket(token, token.length, dp.getAddress(), dp.getPort()));
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/KerberosHashEqualsTest.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 4641821
+ * @summary hashCode() and equals() for KerberosKey and KerberosTicket
+ */
+
+import java.net.InetAddress;
+import java.util.Date;
+import javax.security.auth.kerberos.KerberosKey;
+import javax.security.auth.kerberos.KerberosPrincipal;
+import javax.security.auth.kerberos.KerberosTicket;
+
+public class KerberosHashEqualsTest {
+    public static void main(String[] args) throws Exception {
+        new OneKDC(null);
+        new KerberosHashEqualsTest().check();
+    }
+
+    void checkSame(Object o1, Object o2) {
+        if(!o1.equals(o2)) {
+            throw new RuntimeException("equals() fails");
+        }
+        if(o1.hashCode() != o2.hashCode()) {
+            throw new RuntimeException("hashCode() not same");
+        }
+    }
+
+    void checkNotSame(Object o1, Object o2) {
+        if(o1.equals(o2)) {
+            throw new RuntimeException("equals() succeeds");
+        }
+    }
+
+    void check() throws Exception {
+
+        // The key part:
+        // new KerberosKey(principal, bytes, keyType, version)
+
+        KerberosKey k1, k2;
+        KerberosPrincipal CLIENT = new KerberosPrincipal("client");
+        KerberosPrincipal SERVER = new KerberosPrincipal("server");
+        byte[] PASS = "pass".getBytes();
+
+        k1 = new KerberosKey(CLIENT, PASS, 1, 1);
+        k2 = new KerberosKey(CLIENT, PASS, 1, 1);
+        checkSame(k1, k1);  // me is me
+        checkSame(k1, k2);  // same
+
+        // A destroyed key doesn't equal to any key
+        k2.destroy();
+        checkNotSame(k1, k2);
+        checkNotSame(k2, k1);
+        k1.destroy();
+        checkNotSame(k1, k2);   // even if they are both destroyed
+        checkNotSame(k2, k1);
+        checkSame(k2, k2);
+
+        // a little difference means not equal
+        k1 = new KerberosKey(CLIENT, PASS, 1, 1);
+        k2 = new KerberosKey(SERVER, PASS, 1, 1);
+        checkNotSame(k1, k2);   // Different principal name
+
+        k2 = new KerberosKey(CLIENT, "ssap".getBytes(), 1, 1);
+        checkNotSame(k1, k2);   // Different password
+
+        k2 = new KerberosKey(CLIENT, PASS, 2, 1);
+        checkNotSame(k1, k2);   // Different keytype
+
+        k2 = new KerberosKey(CLIENT, PASS, 1, 2);
+        checkNotSame(k1, k2);   // Different version
+
+        k2 = new KerberosKey(null, PASS, 1, 2);
+        checkNotSame(k1, k2);   // null is not non-null
+
+        k1 = new KerberosKey(null, PASS, 1, 2);
+        checkSame(k1, k2);      // null is null
+
+        checkNotSame(k1, "Another Object");
+
+        // The ticket part:
+        // new KerberosTicket(asn1 bytes, client, server, session key, type, flags,
+        //      auth, start, end, renewUntil times, address)
+
+        KerberosTicket t1, t2;
+
+        byte[] ASN1 = "asn1".getBytes();
+        boolean[] FORWARDABLE = new boolean[] {true, true};
+        boolean[] ALLTRUE = new boolean[] {true, true, true, true, true, true, true, true, true, true};
+        Date D0 = new Date(0);
+
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkSame(t1, t1);
+        checkSame(t1, t2);
+
+        // destroyed tickets doesn't equal to each other
+        t1.destroy();
+        checkNotSame(t1, t2);
+        checkNotSame(t2, t1);
+
+        t2.destroy();
+        checkNotSame(t1, t2);   // even if they are both destroyed
+        checkNotSame(t2, t1);
+
+        checkSame(t2, t2);  // unless they are the same object
+
+        // a little difference means not equal
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        t2 = new KerberosTicket("asn11".getBytes(), CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different ASN1 encoding
+
+        t2 = new KerberosTicket(ASN1, new KerberosPrincipal("client1"), SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different client
+
+        t2 = new KerberosTicket(ASN1, CLIENT, new KerberosPrincipal("server1"), PASS, 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different server
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, "pass1".getBytes(), 1, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different session key
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 2, FORWARDABLE, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different key type
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, new boolean[] {true, false}, D0, D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different flags, not FORWARDABLE
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, new Date(1), D0, D0, D0, null);
+        checkNotSame(t1, t2);   // Different authtime
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, new Date(1), D0, D0, null);
+        checkNotSame(t1, t2);   // Different starttime
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, new Date(1), D0, null);
+        checkNotSame(t1, t2);   // Different endtime
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, new InetAddress[2]);
+        checkNotSame(t1, t2);   // Different client addresses
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, new Date(1), null);
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, new Date(2), null);
+        checkSame(t1, t2);      // renewtill is ignored when RENEWABLE ticket flag is not set.
+
+        t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, ALLTRUE, D0, D0, D0, new Date(1), null);
+        t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, ALLTRUE, D0, D0, D0, new Date(2), null);
+        checkNotSame(t1, t2);   // renewtill is used when RENEWABLE is set.
+
+        checkNotSame(t1, "Another Object");
+        System.out.println("Good!");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/OneKDC.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.security.Security;
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import sun.security.krb5.Config;
+
+/**
+ * This class starts a simple KDC with one realm, several typical principal
+ * names, generates delete-on-exit krb5.conf and keytab files, and setup
+ * system properties for them. There's also a helper method to generate a
+ * JAAS login config file that can be used for JAAS or JGSS apps.
+ * <p>
+ * Just call this line to start everything:
+ * <pre>
+ * new OneKDC(null).writeJaasConf();
+ * </pre>
+ */
+public class OneKDC extends KDC {
+
+    // The krb5 codes would try to canonicalize hostnames before creating
+    // a service principal name, so let's find out the canonicalized form
+    // of localhost first. The following codes mimic the process inside
+    // PrincipalName.java.
+    static String localhost = "localhost";
+    static {
+        try {
+            localhost = InetAddress.getByName(localhost)
+                    .getCanonicalHostName();
+        } catch (UnknownHostException uhe) {
+            ;   // Ignore, localhost is still "localhost"
+        }
+    }
+    public static final String USER = "dummy";
+    public static final char[] PASS = "bogus".toCharArray();
+    public static String SERVER = "server/" + localhost;
+    public static String BACKEND = "backend/" + localhost;
+    public static final String KRB5_CONF = "localkdc-krb5.conf";
+    public static final String KTAB = "localkdc.ktab";
+    public static final String JAAS_CONF = "localkdc-jaas.conf";
+    public static final String REALM = "RABBIT.HOLE";
+
+    /**
+     * Creates the KDC and starts it.
+     * @param etype Encryption type, null if not specified
+     * @throws java.lang.Exception if there's anything wrong
+     */
+    public OneKDC(String etype) throws Exception {
+        super(REALM, 0, true);
+        addPrincipal(USER, PASS);
+        addPrincipalRandKey("krbtgt/" + REALM);
+        addPrincipalRandKey(SERVER);
+        addPrincipalRandKey(BACKEND);
+        KDC.saveConfig(KRB5_CONF, this,
+                "forwardable = true",
+                "default_keytab_name = " + KTAB,
+                etype == null ? "" : "default_tkt_enctypes=" + etype + "\ndefault_tgs_enctypes=" + etype);
+        System.setProperty("java.security.krb5.conf", KRB5_CONF);
+        // Whatever krb5.conf had been loaded before, we reload ours now.
+        Config.refresh();
+
+        writeKtab(KTAB);
+        new File(KRB5_CONF).deleteOnExit();
+        new File(KTAB).deleteOnExit();
+    }
+
+    /**
+     * Writes a JAAS login config file, which contains as many as useful
+     * entries, including JGSS style initiator/acceptor and normal JAAS
+     * entries with names using existing OneKDC principals.
+     * @throws java.lang.Exception if anything goes wrong
+     */
+    public void writeJAASConf() throws IOException {
+        System.setProperty("java.security.auth.login.config", JAAS_CONF);
+        File f = new File(JAAS_CONF);
+        FileOutputStream fos = new FileOutputStream(f);
+        fos.write((
+                "com.sun.security.jgss.krb5.initiate {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required;\n};\n" +
+                "com.sun.security.jgss.krb5.accept {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required\n" +
+                "    principal=\"" + SERVER + "\"\n" +
+                "    useKeyTab=true\n" +
+                "    isInitiator=false\n" +
+                "    storeKey=true;\n};\n" +
+                "client {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required;\n};\n" +
+                "server {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required\n" +
+                "    principal=\"" + SERVER + "\"\n" +
+                "    useKeyTab=true\n" +
+                "    storeKey=true;\n};\n" +
+                "backend {\n" +
+                "    com.sun.security.auth.module.Krb5LoginModule required\n" +
+                "    principal=\"" + BACKEND + "\"\n" +
+                "    useKeyTab=true\n" +
+                "    storeKey=true\n" +
+                "    isInitiator=false;\n};\n"
+                ).getBytes());
+        fos.close();
+        f.deleteOnExit();
+        Security.setProperty("auth.login.defaultCallbackHandler", "OneKDC$CallbackForClient");
+    }
+
+    /**
+     * The default callback handler for JAAS login. Note that this handler is
+     * hard coded to provide only info for USER1. If you need to provide info
+     * for another principal, please use Context.fromUserPass() instead.
+     */
+    public static class CallbackForClient implements CallbackHandler {
+        public void handle(Callback[] callbacks) {
+            String user = OneKDC.USER;
+            char[] pass = OneKDC.PASS;
+            for (Callback callback : callbacks) {
+                if (callback instanceof NameCallback) {
+                    System.out.println("Callback for name: " + user);
+                    ((NameCallback) callback).setName(user);
+                }
+                if (callback instanceof PasswordCallback) {
+                    System.out.println("Callback for pass: "
+                            + new String(pass));
+                    ((PasswordCallback) callback).setPassword(pass);
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/security/krb5/auto/basic.sh	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,65 @@
+#
+# Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+# CA 95054 USA or visit www.sun.com if you need additional information or
+# have any questions.
+#
+
+# @test
+# @bug 6706974
+# @summary Add krb5 test infrastructure
+# @run shell/timeout=300 basic.sh
+#
+
+if [ "${TESTSRC}" = "" ] ; then
+  TESTSRC="."
+fi
+if [ "${TESTJAVA}" = "" ] ; then
+  echo "TESTJAVA not set.  Test cannot execute."
+  echo "FAILED!!!"
+  exit 1
+fi
+
+# set platform-dependent variables
+OS=`uname -s`
+case "$OS" in
+  Windows_* )
+    FS="\\"
+    ;;
+  * )
+    FS="/"
+    ;;
+esac
+
+${TESTJAVA}${FS}bin${FS}javac -d . \
+    ${TESTSRC}${FS}BasicKrb5Test.java \
+    ${TESTSRC}${FS}KDC.java \
+    ${TESTSRC}${FS}OneKDC.java \
+    ${TESTSRC}${FS}Action.java \
+    ${TESTSRC}${FS}Context.java \
+    || exit 10
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test || exit 100
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des-cbc-crc || exit 1
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des-cbc-md5 || exit 3
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des3-cbc-sha1 || exit 16
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test aes128-cts || exit 17
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test aes256-cts || exit 18
+${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test rc4-hmac || exit 23
+
+exit 0
--- a/langtools/.hgtags	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/.hgtags	Wed Jul 05 16:43:17 2017 +0200
@@ -12,3 +12,4 @@
 81f66dd906eb28e43bcaa66c092e794f59a4e52d jdk7-b35
 258af9b67b7cb4262ab1b5424160c9ad22d52e8f jdk7-b36
 24a47c3062fe8869fcfb533ce0ff770c8ceb550d jdk7-b37
+3fd42dfa6f27f2767a241fb82bc01a613f0c2096 jdk7-b38
--- a/langtools/src/share/classes/com/sun/tools/apt/util/Bark.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/apt/util/Bark.java	Wed Jul 05 16:43:17 2017 +0200
@@ -29,7 +29,7 @@
 import com.sun.tools.javac.util.JCDiagnostic;
 import com.sun.tools.javac.util.JCDiagnostic.SimpleDiagnosticPosition;
 import com.sun.tools.javac.util.Log;
-import com.sun.tools.javac.util.Messages;
+import com.sun.tools.javac.util.JavacMessages;
 import com.sun.tools.javac.util.Position;
 
 /** A subtype of Log for use in APT.
@@ -87,7 +87,7 @@
         context.put(barkKey, this);
 
         // register additional resource bundle for APT messages.
-        Messages aptMessages = Messages.instance(context);
+        JavacMessages aptMessages = JavacMessages.instance(context);
         aptMessages.add("com.sun.tools.apt.resources.apt");
         aptDiags = new JCDiagnostic.Factory(aptMessages, "apt");
 
--- a/langtools/src/share/classes/com/sun/tools/doclets/formats/html/ConfigurationImpl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/doclets/formats/html/ConfigurationImpl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -51,7 +51,7 @@
  */
 public class ConfigurationImpl extends Configuration {
 
-    private static final ConfigurationImpl instance = new ConfigurationImpl();
+    private static ConfigurationImpl instance = new ConfigurationImpl();
 
     /**
      * The build date.  Note: For now, we will use
@@ -189,6 +189,15 @@
             "com.sun.tools.doclets.formats.html.resources.standard");
     }
 
+    /**
+     * Reset to a fresh new ConfigurationImpl, to allow multiple invocations
+     * of javadoc within a single VM. It would be better not to be using
+     * static fields at all, but .... (sigh).
+     */
+    public static void reset() {
+        instance = new ConfigurationImpl();
+    }
+
     public static ConfigurationImpl getInstance() {
         return instance;
     }
@@ -475,7 +484,7 @@
      * {@inheritDoc}
      */
     public WriterFactory getWriterFactory() {
-        return WriterFactoryImpl.getInstance();
+        return new WriterFactoryImpl(this);
     }
 
     /**
--- a/langtools/src/share/classes/com/sun/tools/doclets/formats/html/HtmlDoclet.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/doclets/formats/html/HtmlDoclet.java	Wed Jul 05 16:43:17 2017 +0200
@@ -41,12 +41,14 @@
  *
  */
 public class HtmlDoclet extends AbstractDoclet {
+    public HtmlDoclet() {
+        configuration = (ConfigurationImpl) configuration();
+    }
 
     /**
      * The global configuration information for this run.
      */
-    public ConfigurationImpl configuration =
-        (ConfigurationImpl) configuration();
+    public ConfigurationImpl configuration;
 
     /**
      * The "start" method as required by Javadoc.
@@ -56,8 +58,12 @@
      * @return true if the doclet ran without encountering any errors.
      */
     public static boolean start(RootDoc root) {
-        HtmlDoclet doclet = new HtmlDoclet();
-        return doclet.start(doclet, root);
+        try {
+            HtmlDoclet doclet = new HtmlDoclet();
+            return doclet.start(doclet, root);
+        } finally {
+            ConfigurationImpl.reset();
+        }
     }
 
     /**
--- a/langtools/src/share/classes/com/sun/tools/doclets/formats/html/WriterFactoryImpl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/doclets/formats/html/WriterFactoryImpl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -37,27 +37,13 @@
  */
 public class WriterFactoryImpl implements WriterFactory {
 
-    private static WriterFactoryImpl instance;
-
     private ConfigurationImpl configuration;
 
-    private WriterFactoryImpl(ConfigurationImpl configuration) {
+    public WriterFactoryImpl(ConfigurationImpl configuration) {
         this.configuration = configuration;
     }
 
     /**
-     * Return an instance of this factory.
-     *
-     * @return an instance of this factory.
-     */
-    public static WriterFactoryImpl getInstance() {
-        if (instance == null) {
-            instance = new WriterFactoryImpl(ConfigurationImpl.getInstance());
-        }
-        return instance;
-    }
-
-    /**
      * {@inheritDoc}
      */
     public ConstantsSummaryWriter getConstantsSummaryWriter() throws Exception {
--- a/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/AbstractDoclet.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/AbstractDoclet.java	Wed Jul 05 16:43:17 2017 +0200
@@ -45,7 +45,7 @@
     /**
      * The global configuration information for this run.
      */
-    public Configuration configuration = configuration();
+    public Configuration configuration;
 
     /**
      * The only doclet that may use this toolkit is {@value}
@@ -74,6 +74,7 @@
      * @return true if the doclet executed without error.  False otherwise.
      */
     public boolean start(AbstractDoclet doclet, RootDoc root) {
+        configuration = configuration();
         configuration.root = root;
         if (! isValidDoclet(doclet)) {
             return false;
--- a/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/Configuration.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/Configuration.java	Wed Jul 05 16:43:17 2017 +0200
@@ -113,9 +113,9 @@
     public boolean keywords = false;
 
     /**
-     * The meta tag keywords sole-instance.
+     * The meta tag keywords instance.
      */
-    public final MetaKeywords metakeywords = MetaKeywords.getInstance(this);
+    public final MetaKeywords metakeywords = new MetaKeywords(this);
 
     /**
      * The list of doc-file subdirectories to exclude
@@ -211,12 +211,12 @@
     public boolean notimestamp= false;
 
     /**
-     * The package grouping sole-instance.
+     * The package grouping instance.
      */
-    public final Group group = Group.getInstance(this);
+    public final Group group = new Group(this);
 
     /**
-     * The tracker of external package links (sole-instance).
+     * The tracker of external package links.
      */
     public final Extern extern = new Extern(this);
 
--- a/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/Group.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/Group.java	Wed Jul 05 16:43:17 2017 +0200
@@ -56,8 +56,6 @@
  */
 public class Group {
 
-    private static Group instance;
-
     /**
      * Map of regular expressions with the corresponding group name.
      */
@@ -96,17 +94,10 @@
         }
     }
 
-    private Group(Configuration configuration) {
+    public Group(Configuration configuration) {
         this.configuration = configuration;
     }
 
-    public static Group getInstance(Configuration configuration) {
-        if (instance == null) {
-            instance = new Group(configuration);
-        }
-        return instance;
-    }
-
     /**
      * Depending upon the format of the package name provided in the "-group"
      * option, generate two separate maps. There will be a map for mapping
--- a/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/MetaKeywords.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/MetaKeywords.java	Wed Jul 05 16:43:17 2017 +0200
@@ -43,8 +43,6 @@
  */
 public class MetaKeywords {
 
-    private static MetaKeywords instance = null;
-
     /**
      * The global configuration information for this run.
      */
@@ -53,23 +51,11 @@
     /**
      * Constructor
      */
-    private MetaKeywords(Configuration configuration) {
+    public MetaKeywords(Configuration configuration) {
         this.configuration = configuration;
     }
 
     /**
-     * Return an instance of MetaKeywords.  This class is a singleton.
-     *
-     * @param configuration the current configuration of the doclet.
-     */
-    public static MetaKeywords getInstance(Configuration configuration) {
-        if (instance == null) {
-            instance = new MetaKeywords(configuration);
-        }
-        return instance;
-    }
-
-    /**
      * Returns an array of strings where each element
      * is a class, method or field name.  This array is
      * used to create one meta keyword tag for each element.
--- a/langtools/src/share/classes/com/sun/tools/javac/api/Formattable.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/api/Formattable.java	Wed Jul 05 16:43:17 2017 +0200
@@ -25,7 +25,7 @@
 
 package com.sun.tools.javac.api;
 
-import java.util.ResourceBundle;
+import java.util.Locale;
 
 /**
  * This interface must be implemented by any javac class that has non-trivial
@@ -39,10 +39,11 @@
      * Used to obtain a localized String representing the object accordingly
      * to a given locale
      *
-     * @param bundle resource bundle class used for localization
+     * @param locale locale in which the object's representation is to be rendered
+     * @param messages messages object used for localization
      * @return a locale-dependent string representing the object
      */
-    public String toString(ResourceBundle bundle);
+    public String toString(Locale locale, Messages messages);
     /**
      * Retrieve a pretty name of this object's kind
      * @return a string representing the object's kind
--- a/langtools/src/share/classes/com/sun/tools/javac/api/JavacTaskImpl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/api/JavacTaskImpl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -68,6 +68,7 @@
     private JavacTool tool;
     private Main compilerMain;
     private JavaCompiler compiler;
+    private Locale locale;
     private String[] args;
     private Context context;
     private List<JavaFileObject> fileObjects;
@@ -89,6 +90,7 @@
         this.args = args;
         this.context = context;
         this.fileObjects = fileObjects;
+        setLocale(Locale.getDefault());
         // null checks
         compilerMain.getClass();
         args.getClass();
@@ -156,9 +158,9 @@
     }
 
     public void setLocale(Locale locale) {
-        // locale argument is ignored, see RFE 6443132
         if (used.get())
             throw new IllegalStateException();
+        this.locale = locale;
     }
 
     private void prepareCompiler() throws IOException {
@@ -191,6 +193,8 @@
         if (taskListener != null)
             context.put(TaskListener.class, wrap(taskListener));
         tool.beginContext(context);
+        //initialize compiler's default locale
+        JavacMessages.instance(context).setCurrentLocale(locale);
     }
     // where
     private TaskListener wrap(final TaskListener tl) {
--- a/langtools/src/share/classes/com/sun/tools/javac/api/JavacTool.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/api/JavacTool.java	Wed Jul 05 16:43:17 2017 +0200
@@ -49,6 +49,7 @@
 import com.sun.tools.javac.main.RecognizedOptions;
 import com.sun.tools.javac.util.Context;
 import com.sun.tools.javac.util.Log;
+import com.sun.tools.javac.util.JavacMessages;
 import com.sun.tools.javac.util.Options;
 import com.sun.tools.javac.util.Pair;
 import java.nio.charset.Charset;
@@ -144,6 +145,7 @@
         Locale locale,
         Charset charset) {
         Context context = new Context();
+        JavacMessages.instance(context).setCurrentLocale(locale);
         if (diagnosticListener != null)
             context.put(DiagnosticListener.class, diagnosticListener);
         context.put(Log.outKey, new PrintWriter(System.err, true)); // FIXME
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/src/share/classes/com/sun/tools/javac/api/Messages.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package com.sun.tools.javac.api;
+
+import java.util.Locale;
+import java.util.MissingResourceException;
+
+/**
+ * This interface defines the minimum requirements in order to provide support
+ * for localized formatted strings.
+ *
+ * @author Maurizio Cimadamore
+ */
+public interface Messages {
+
+    /**
+     * Add a new resource bundle to the list that is searched for localized messages.
+     * @param bundleName the name to identify the resource bundle of localized messages.
+     * @throws MissingResourceException if the given resource is not found
+     */
+    void add(String bundleName) throws MissingResourceException;
+
+    /**
+     * Get a localized formatted string
+     * @param l locale in which the text is to be localized
+     * @param key locale-independent message key
+     * @param args misc message arguments
+     * @return a localized formatted string
+     */
+    String getLocalizedString(Locale l, String key, Object... args);
+}
--- a/langtools/src/share/classes/com/sun/tools/javac/code/Kinds.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/code/Kinds.java	Wed Jul 05 16:43:17 2017 +0200
@@ -26,9 +26,10 @@
 package com.sun.tools.javac.code;
 
 import java.util.EnumSet;
-import java.util.ResourceBundle;
+import java.util.Locale;
 
 import com.sun.tools.javac.api.Formattable;
+import com.sun.tools.javac.api.Messages;
 
 import static com.sun.tools.javac.code.TypeTags.*;
 import static com.sun.tools.javac.code.Flags.*;
@@ -117,9 +118,9 @@
             return "Kindname";
         }
 
-        public String toString(ResourceBundle bundle) {
+        public String toString(Locale locale, Messages messages) {
             String s = toString();
-            return bundle.getString("compiler.misc." + s);
+            return messages.getLocalizedString(locale, "compiler.misc." + s);
         }
     }
 
--- a/langtools/src/share/classes/com/sun/tools/javac/code/Symtab.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/code/Symtab.java	Wed Jul 05 16:43:17 2017 +0200
@@ -336,7 +336,7 @@
 
         // create the basic builtin symbols
         rootPackage = new PackageSymbol(names.empty, null);
-        final Messages messages = Messages.instance(context);
+        final JavacMessages messages = JavacMessages.instance(context);
         unnamedPackage = new PackageSymbol(names.empty, rootPackage) {
                 public String toString() {
                     return messages.getLocalizedString("compiler.misc.unnamed.package");
--- a/langtools/src/share/classes/com/sun/tools/javac/code/Types.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/code/Types.java	Wed Jul 05 16:43:17 2017 +0200
@@ -67,7 +67,7 @@
         new Context.Key<Types>();
 
     final Symtab syms;
-    final Messages messages;
+    final JavacMessages messages;
     final Names names;
     final boolean allowBoxing;
     final ClassReader reader;
@@ -93,7 +93,7 @@
         source = Source.instance(context);
         chk = Check.instance(context);
         capturedName = names.fromString("<captured wildcard>");
-        messages = Messages.instance(context);
+        messages = JavacMessages.instance(context);
     }
     // </editor-fold>
 
@@ -1430,6 +1430,10 @@
                 long flags = sym.flags();
                 if (((flags & STATIC) == 0) && owner.type.isParameterized()) {
                     Type base = asOuterSuper(t, owner);
+                    //if t is an intersection type T = CT & I1 & I2 ... & In
+                    //its supertypes CT, I1, ... In might contain wildcards
+                    //so we need to go through capture conversion
+                    base = t.isCompound() ? capture(base) : base;
                     if (base != null) {
                         List<Type> ownerParams = owner.type.allparams();
                         List<Type> baseParams = base.allparams();
@@ -3209,6 +3213,7 @@
             containsType(t, s) && containsType(s, t);
     }
 
+    // <editor-fold defaultstate="collapsed" desc="adapt">
     /**
      * Adapt a type by computing a substitution which maps a source
      * type to a target type.
@@ -3222,94 +3227,115 @@
                        Type target,
                        ListBuffer<Type> from,
                        ListBuffer<Type> to) throws AdaptFailure {
-        Map<Symbol,Type> mapping = new HashMap<Symbol,Type>();
-        adaptRecursive(source, target, from, to, mapping);
-        List<Type> fromList = from.toList();
-        List<Type> toList = to.toList();
-        while (!fromList.isEmpty()) {
-            Type val = mapping.get(fromList.head.tsym);
-            if (toList.head != val)
-                toList.head = val;
-            fromList = fromList.tail;
-            toList = toList.tail;
-        }
+        new Adapter(from, to).adapt(source, target);
     }
-    // where
-        private void adaptRecursive(Type source,
-                                    Type target,
-                                    ListBuffer<Type> from,
-                                    ListBuffer<Type> to,
-                                    Map<Symbol,Type> mapping) throws AdaptFailure {
-            if (source.tag == TYPEVAR) {
-                // Check to see if there is
-                // already a mapping for $source$, in which case
-                // the old mapping will be merged with the new
-                Type val = mapping.get(source.tsym);
-                if (val != null) {
-                    if (val.isSuperBound() && target.isSuperBound()) {
-                        val = isSubtype(lowerBound(val), lowerBound(target))
-                            ? target : val;
-                    } else if (val.isExtendsBound() && target.isExtendsBound()) {
-                        val = isSubtype(upperBound(val), upperBound(target))
-                            ? val : target;
-                    } else if (!isSameType(val, target)) {
-                        throw new AdaptFailure();
-                    }
-                } else {
-                    val = target;
-                    from.append(source);
-                    to.append(target);
+
+    class Adapter extends SimpleVisitor<Void, Type> {
+
+        ListBuffer<Type> from;
+        ListBuffer<Type> to;
+        Map<Symbol,Type> mapping;
+
+        Adapter(ListBuffer<Type> from, ListBuffer<Type> to) {
+            this.from = from;
+            this.to = to;
+            mapping = new HashMap<Symbol,Type>();
+        }
+
+        public void adapt(Type source, Type target) throws AdaptFailure {
+            visit(source, target);
+            List<Type> fromList = from.toList();
+            List<Type> toList = to.toList();
+            while (!fromList.isEmpty()) {
+                Type val = mapping.get(fromList.head.tsym);
+                if (toList.head != val)
+                    toList.head = val;
+                fromList = fromList.tail;
+                toList = toList.tail;
+            }
+        }
+
+        @Override
+        public Void visitClassType(ClassType source, Type target) throws AdaptFailure {
+            if (target.tag == CLASS)
+                adaptRecursive(source.allparams(), target.allparams());
+            return null;
+        }
+
+        @Override
+        public Void visitArrayType(ArrayType source, Type target) throws AdaptFailure {
+            if (target.tag == ARRAY)
+                adaptRecursive(elemtype(source), elemtype(target));
+            return null;
+        }
+
+        @Override
+        public Void visitWildcardType(WildcardType source, Type target) throws AdaptFailure {
+            if (source.isExtendsBound())
+                adaptRecursive(upperBound(source), upperBound(target));
+            else if (source.isSuperBound())
+                adaptRecursive(lowerBound(source), lowerBound(target));
+            return null;
+        }
+
+        @Override
+        public Void visitTypeVar(TypeVar source, Type target) throws AdaptFailure {
+            // Check to see if there is
+            // already a mapping for $source$, in which case
+            // the old mapping will be merged with the new
+            Type val = mapping.get(source.tsym);
+            if (val != null) {
+                if (val.isSuperBound() && target.isSuperBound()) {
+                    val = isSubtype(lowerBound(val), lowerBound(target))
+                        ? target : val;
+                } else if (val.isExtendsBound() && target.isExtendsBound()) {
+                    val = isSubtype(upperBound(val), upperBound(target))
+                        ? val : target;
+                } else if (!isSameType(val, target)) {
+                    throw new AdaptFailure();
                 }
-                mapping.put(source.tsym, val);
-            } else if (source.tag == target.tag) {
-                switch (source.tag) {
-                    case CLASS:
-                        adapt(source.allparams(), target.allparams(),
-                              from, to, mapping);
-                        break;
-                    case ARRAY:
-                        adaptRecursive(elemtype(source), elemtype(target),
-                                       from, to, mapping);
-                        break;
-                    case WILDCARD:
-                        if (source.isExtendsBound()) {
-                            adaptRecursive(upperBound(source), upperBound(target),
-                                           from, to, mapping);
-                        } else if (source.isSuperBound()) {
-                            adaptRecursive(lowerBound(source), lowerBound(target),
-                                           from, to, mapping);
-                        }
-                        break;
+            } else {
+                val = target;
+                from.append(source);
+                to.append(target);
+            }
+            mapping.put(source.tsym, val);
+            return null;
+        }
+
+        @Override
+        public Void visitType(Type source, Type target) {
+            return null;
+        }
+
+        private Set<TypePair> cache = new HashSet<TypePair>();
+
+        private void adaptRecursive(Type source, Type target) {
+            TypePair pair = new TypePair(source, target);
+            if (cache.add(pair)) {
+                try {
+                    visit(source, target);
+                } finally {
+                    cache.remove(pair);
                 }
             }
         }
-        public static class AdaptFailure extends Exception {
-            static final long serialVersionUID = -7490231548272701566L;
-        }
-
-    /**
-     * Adapt a type by computing a substitution which maps a list of
-     * source types to a list of target types.
-     *
-     * @param source    the source type
-     * @param target    the target type
-     * @param from      the type variables of the computed substitution
-     * @param to        the types of the computed substitution.
-     */
-    private void adapt(List<Type> source,
-                       List<Type> target,
-                       ListBuffer<Type> from,
-                       ListBuffer<Type> to,
-                       Map<Symbol,Type> mapping) throws AdaptFailure {
-        if (source.length() == target.length()) {
-            while (source.nonEmpty()) {
-                adaptRecursive(source.head, target.head, from, to, mapping);
-                source = source.tail;
-                target = target.tail;
+
+        private void adaptRecursive(List<Type> source, List<Type> target) {
+            if (source.length() == target.length()) {
+                while (source.nonEmpty()) {
+                    adaptRecursive(source.head, target.head);
+                    source = source.tail;
+                    target = target.tail;
+                }
             }
         }
     }
 
+    public static class AdaptFailure extends RuntimeException {
+        static final long serialVersionUID = -7490231548272701566L;
+    }
+
     private void adaptSelf(Type t,
                            ListBuffer<Type> from,
                            ListBuffer<Type> to) {
@@ -3322,6 +3348,7 @@
             throw new AssertionError(ex);
         }
     }
+    // </editor-fold>
 
     /**
      * Rewrite all type variables (universal quantifiers) in the given
--- a/langtools/src/share/classes/com/sun/tools/javac/comp/Lower.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/comp/Lower.java	Wed Jul 05 16:43:17 2017 +0200
@@ -1884,6 +1884,9 @@
                     }
                 });
         }
+        case JCTree.TYPECAST: {
+            return abstractLval(((JCTypeCast)lval).expr, builder);
+        }
         }
         throw new AssertionError(lval);
     }
@@ -2713,10 +2716,7 @@
             // boxing required; need to rewrite as x = (unbox typeof x)(x op y);
             // or if x == (typeof x)z then z = (unbox typeof x)((typeof x)z op y)
             // (but without recomputing x)
-            JCTree arg = (tree.lhs.getTag() == JCTree.TYPECAST)
-                ? ((JCTypeCast)tree.lhs).expr
-                : tree.lhs;
-            JCTree newTree = abstractLval(arg, new TreeBuilder() {
+            JCTree newTree = abstractLval(tree.lhs, new TreeBuilder() {
                     public JCTree build(final JCTree lhs) {
                         int newTag = tree.getTag() - JCTree.ASGOffset;
                         // Erasure (TransTypes) can change the type of
@@ -2768,9 +2768,8 @@
         // or
         // translate to tmp1=lval(e); tmp2=tmp1; (typeof tree)tmp1 OP 1; tmp2
         // where OP is += or -=
-        final boolean cast = tree.arg.getTag() == JCTree.TYPECAST;
-        final JCExpression arg = cast ? ((JCTypeCast)tree.arg).expr : tree.arg;
-        return abstractLval(arg, new TreeBuilder() {
+        final boolean cast = TreeInfo.skipParens(tree.arg).getTag() == JCTree.TYPECAST;
+        return abstractLval(tree.arg, new TreeBuilder() {
                 public JCTree build(final JCTree tmp1) {
                     return abstractRval(tmp1, tree.arg.type, new TreeBuilder() {
                             public JCTree build(final JCTree tmp2) {
--- a/langtools/src/share/classes/com/sun/tools/javac/comp/Resolve.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/comp/Resolve.java	Wed Jul 05 16:43:17 2017 +0200
@@ -741,7 +741,7 @@
             while (ct.tag == TYPEVAR)
                 ct = ct.getUpperBound();
             ClassSymbol c = (ClassSymbol)ct.tsym;
-            if ((c.flags() & (ABSTRACT | INTERFACE)) == 0)
+            if ((c.flags() & (ABSTRACT | INTERFACE | ENUM)) == 0)
                 abstractok = false;
             for (Scope.Entry e = c.members().lookup(name);
                  e.scope != null;
--- a/langtools/src/share/classes/com/sun/tools/javac/comp/TransTypes.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/comp/TransTypes.java	Wed Jul 05 16:43:17 2017 +0200
@@ -623,8 +623,8 @@
     }
 
     public void visitAssignop(JCAssignOp tree) {
-        tree.lhs = translate(tree.lhs, null);
-        tree.rhs = translate(tree.rhs, erasure(tree.rhs.type));
+        tree.lhs = translate(tree.lhs, tree.operator.type.getParameterTypes().head);
+        tree.rhs = translate(tree.rhs, tree.operator.type.getParameterTypes().tail.head);
         tree.type = erasure(tree.type);
         result = tree;
     }
--- a/langtools/src/share/classes/com/sun/tools/javac/file/Paths.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/file/Paths.java	Wed Jul 05 16:43:17 2017 +0200
@@ -34,12 +34,12 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.LinkedHashSet;
-import java.util.Iterator;
 import java.util.zip.ZipFile;
 import javax.tools.JavaFileManager.Location;
 
 import com.sun.tools.javac.code.Lint;
 import com.sun.tools.javac.util.Context;
+import com.sun.tools.javac.util.ListBuffer;
 import com.sun.tools.javac.util.Log;
 import com.sun.tools.javac.util.Options;
 
@@ -174,38 +174,38 @@
         return file.equals(bootClassPathRtJar);
     }
 
-    private static class PathIterator implements Iterable<String> {
-        private int pos = 0;
-        private final String path;
-        private final String emptyPathDefault;
+    /**
+     * Split a path into its elements. Empty path elements will be ignored.
+     * @param path The path to be split
+     * @return The elements of the path
+     */
+    private static Iterable<File> getPathEntries(String path) {
+        return getPathEntries(path, null);
+    }
 
-        public PathIterator(String path, String emptyPathDefault) {
-            this.path = path;
-            this.emptyPathDefault = emptyPathDefault;
+    /**
+     * Split a path into its elements. If emptyPathDefault is not null, all
+     * empty elements in the path, including empty elements at either end of
+     * the path, will be replaced with the value of emptyPathDefault.
+     * @param path The path to be split
+     * @param emptyPathDefault The value to substitute for empty path elements,
+     *  or null, to ignore empty path elements
+     * @return The elements of the path
+     */
+    private static Iterable<File> getPathEntries(String path, File emptyPathDefault) {
+        ListBuffer<File> entries = new ListBuffer<File>();
+        int start = 0;
+        while (start <= path.length()) {
+            int sep = path.indexOf(File.pathSeparatorChar, start);
+            if (sep == -1)
+                sep = path.length();
+            if (start < sep)
+                entries.add(new File(path.substring(start, sep)));
+            else if (emptyPathDefault != null)
+                entries.add(emptyPathDefault);
+            start = sep + 1;
         }
-        public PathIterator(String path) { this(path, null); }
-        public Iterator<String> iterator() {
-            return new Iterator<String>() {
-                public boolean hasNext() {
-                    return pos <= path.length();
-                }
-                public String next() {
-                    int beg = pos;
-                    int end = path.indexOf(File.pathSeparator, beg);
-                    if (end == -1)
-                        end = path.length();
-                    pos = end + 1;
-
-                    if (beg == end && emptyPathDefault != null)
-                        return emptyPathDefault;
-                    else
-                        return path.substring(beg, end);
-                }
-                public void remove() {
-                    throw new UnsupportedOperationException();
-                }
-            };
-        }
+        return entries;
     }
 
     private class Path extends LinkedHashSet<File> {
@@ -220,9 +220,9 @@
         }
 
         /** What to use when path element is the empty string */
-        private String emptyPathDefault = null;
+        private File emptyPathDefault = null;
 
-        public Path emptyPathDefault(String x) {
+        public Path emptyPathDefault(File x) {
             emptyPathDefault = x;
             return this;
         }
@@ -231,7 +231,7 @@
 
         public Path addDirectories(String dirs, boolean warn) {
             if (dirs != null)
-                for (String dir : new PathIterator(dirs))
+                for (File dir : getPathEntries(dirs))
                     addDirectory(dir, warn);
             return this;
         }
@@ -240,14 +240,14 @@
             return addDirectories(dirs, warn);
         }
 
-        private void addDirectory(String dir, boolean warn) {
-            if (! new File(dir).isDirectory()) {
+        private void addDirectory(File dir, boolean warn) {
+            if (!dir.isDirectory()) {
                 if (warn)
                     log.warning("dir.path.element.not.found", dir);
                 return;
             }
 
-            File[] files = new File(dir).listFiles();
+            File[] files = dir.listFiles();
             if (files == null)
                 return;
 
@@ -259,7 +259,7 @@
 
         public Path addFiles(String files, boolean warn) {
             if (files != null)
-                for (String file : new PathIterator(files, emptyPathDefault))
+                for (File file : getPathEntries(files, emptyPathDefault))
                     addFile(file, warn);
             return this;
         }
@@ -268,11 +268,6 @@
             return addFiles(files, warn);
         }
 
-        public Path addFile(String file, boolean warn) {
-            addFile(new File(file), warn);
-            return this;
-        }
-
         public void addFile(File file, boolean warn) {
             File canonFile = fsInfo.getCanonicalFile(file);
             if (contains(file) || canonicalValues.contains(canonFile)) {
@@ -346,10 +341,9 @@
             String files = System.getProperty("sun.boot.class.path");
             path.addFiles(files, false);
             File rt_jar = new File("rt.jar");
-            for (String file : new PathIterator(files, null)) {
-                File f = new File(file);
-                if (new File(f.getName()).equals(rt_jar))
-                    bootClassPathRtJar = f;
+            for (File file : getPathEntries(files)) {
+                if (new File(file.getName()).equals(rt_jar))
+                    bootClassPathRtJar = file;
             }
         }
 
@@ -381,8 +375,8 @@
         if (cp == null) cp = ".";
 
         return new Path()
-            .expandJarClassPaths(true) // Only search user jars for Class-Paths
-            .emptyPathDefault(".")     // Empty path elt ==> current directory
+            .expandJarClassPaths(true)        // Only search user jars for Class-Paths
+            .emptyPathDefault(new File("."))  // Empty path elt ==> current directory
             .addFiles(cp);
     }
 
--- a/langtools/src/share/classes/com/sun/tools/javac/file/RegularFileObject.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/file/RegularFileObject.java	Wed Jul 05 16:43:17 2017 +0200
@@ -89,6 +89,8 @@
         for (File dir: path) {
             //System.err.println("dir: " + dir);
             String dPath = dir.getPath();
+            if (dPath.length() == 0)
+                dPath = System.getProperty("user.dir");
             if (!dPath.endsWith(File.separator))
                 dPath += File.separator;
             if (fPath.regionMatches(true, 0, dPath, 0, dPath.length())
--- a/langtools/src/share/classes/com/sun/tools/javac/main/Main.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/main/Main.java	Wed Jul 05 16:43:17 2017 +0200
@@ -484,7 +484,7 @@
     public static String getLocalizedString(String key, Object... args) { // FIXME sb private
         try {
             if (messages == null)
-                messages = new Messages(javacBundleName);
+                messages = new JavacMessages(javacBundleName);
             return messages.getLocalizedString("javac." + key, args);
         }
         catch (MissingResourceException e) {
@@ -494,18 +494,18 @@
 
     public static void useRawMessages(boolean enable) {
         if (enable) {
-            messages = new Messages(javacBundleName) {
+            messages = new JavacMessages(javacBundleName) {
                     public String getLocalizedString(String key, Object... args) {
                         return key;
                     }
                 };
         } else {
-            messages = new Messages(javacBundleName);
+            messages = new JavacMessages(javacBundleName);
         }
     }
 
     private static final String javacBundleName =
         "com.sun.tools.javac.resources.javac";
 
-    private static Messages messages;
+    private static JavacMessages messages;
 }
--- a/langtools/src/share/classes/com/sun/tools/javac/parser/Token.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/parser/Token.java	Wed Jul 05 16:43:17 2017 +0200
@@ -25,9 +25,10 @@
 
 package com.sun.tools.javac.parser;
 
-import java.util.ResourceBundle;
+import java.util.Locale;
 
 import com.sun.tools.javac.api.Formattable;
+import com.sun.tools.javac.api.Messages;
 
 /** An interface that defines codes for Java source tokens
  *  returned from lexical analysis.
@@ -191,8 +192,7 @@
         return "Token";
     }
 
-    public String toString(ResourceBundle bundle) {
-        String s = toString();
-        return s.startsWith("token.") ? bundle.getString("compiler.misc." + s) : s;
+    public String toString(Locale locale, Messages messages) {
+        return name != null ? toString() : messages.getLocalizedString(locale, "compiler.misc." + toString());
     }
 }
--- a/langtools/src/share/classes/com/sun/tools/javac/processing/JavacProcessingEnvironment.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/processing/JavacProcessingEnvironment.java	Wed Jul 05 16:43:17 2017 +0200
@@ -69,6 +69,7 @@
 import com.sun.tools.javac.util.List;
 import com.sun.tools.javac.util.ListBuffer;
 import com.sun.tools.javac.util.Log;
+import com.sun.tools.javac.util.JavacMessages;
 import com.sun.tools.javac.util.Name;
 import com.sun.tools.javac.util.Names;
 import com.sun.tools.javac.util.Options;
@@ -133,9 +134,14 @@
      */
     Source source;
 
+    /**
+     * JavacMessages object used for localization
+     */
+    private JavacMessages messages;
+
     private Context context;
 
-   public JavacProcessingEnvironment(Context context, Iterable<? extends Processor> processors) {
+    public JavacProcessingEnvironment(Context context, Iterable<? extends Processor> processors) {
         options = Options.instance(context);
         this.context = context;
         log = Log.instance(context);
@@ -157,6 +163,7 @@
         typeUtils = new JavacTypes(context);
         processorOptions = initProcessorOptions(context);
         unmatchedProcessorOptions = initUnmatchedProcessorOptions();
+        messages = JavacMessages.instance(context);
         initProcessorIterator(context, processors);
     }
 
@@ -1246,7 +1253,7 @@
     }
 
     public Locale getLocale() {
-        return Locale.getDefault();
+        return messages.getCurrentLocale();
     }
 
     public Set<Symbol.PackageSymbol> getSpecifiedPackages() {
--- a/langtools/src/share/classes/com/sun/tools/javac/util/AbstractDiagnosticFormatter.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/util/AbstractDiagnosticFormatter.java	Wed Jul 05 16:43:17 2017 +0200
@@ -27,11 +27,13 @@
 import java.util.Collection;
 import java.util.Locale;
 import javax.tools.JavaFileObject;
+import java.util.ResourceBundle;
 
 import com.sun.tools.javac.api.DiagnosticFormatter;
 import com.sun.tools.javac.api.Formattable;
 import com.sun.tools.javac.api.DiagnosticFormatter.PositionKind;
 import com.sun.tools.javac.file.JavacFileManager;
+import static com.sun.tools.javac.util.JCDiagnostic.DiagnosticType.*;
 
 /**
  * This abstract class provides a basic implementation of the functionalities that should be provided
@@ -48,16 +50,24 @@
 public abstract class AbstractDiagnosticFormatter implements DiagnosticFormatter<JCDiagnostic> {
 
     /**
-     * Messages object used by this formatter for i18n
+     * JavacMessages object used by this formatter for i18n
      */
-    protected Messages messages;
+    protected JavacMessages messages;
+    protected boolean showSource;
 
     /**
-     * Initialize an AbstractDiagnosticFormatter by setting its Messages object
+     * Initialize an AbstractDiagnosticFormatter by setting its JavacMessages object
      * @param messages
      */
-    protected AbstractDiagnosticFormatter(Messages messages) {
+    protected AbstractDiagnosticFormatter(JavacMessages messages, Options options, boolean showSource) {
         this.messages = messages;
+        this.showSource = options.get("showSource") == null ? showSource :
+                          options.get("showSource").equals("true");
+    }
+
+    protected AbstractDiagnosticFormatter(JavacMessages messages, boolean showSource) {
+        this.messages = messages;
+        this.showSource = showSource;
     }
 
     public String formatMessage(JCDiagnostic d, Locale l) {
@@ -131,7 +141,7 @@
         else if (arg instanceof JavaFileObject)
             return JavacFileManager.getJavacBaseFileName((JavaFileObject)arg);
         else if (arg instanceof Formattable)
-            return ((Formattable)arg).toString(Messages.getDefaultBundle());
+            return ((Formattable)arg).toString(l, messages);
         else
             return String.valueOf(arg);
     }
@@ -155,6 +165,27 @@
         return sbuf.toString();
     }
 
+    /** Format the faulty source code line and point to the error.
+     *  @param d The diagnostic for which the error line should be printed
+     */
+    protected String formatSourceLine(JCDiagnostic d) {
+        StringBuilder buf = new StringBuilder();
+        DiagnosticSource source = d.getDiagnosticSource();
+        int pos = d.getIntPosition();
+        if (d.getIntPosition() != Position.NOPOS) {
+            String line = (source == null ? null : source.getLine(pos));
+            if (line == null)
+                return "";
+            buf.append(line+"\n");
+            int col = source.getColumnNumber(pos, false);
+            for (int i = 0; i < col - 1; i++)  {
+                buf.append((line.charAt(i) == '\t') ? "\t" : " ");
+            }
+            buf.append("^");
+         }
+         return buf.toString();
+    }
+
     /**
      * Converts a String into a locale-dependent representation accordingly to a given locale
      *
@@ -164,6 +195,10 @@
      * @return a locale-dependent string
      */
     protected String localize(Locale l, String key, Object... args) {
-        return messages.getLocalizedString(key, args);
+        return messages.getLocalizedString(l, key, args);
+    }
+
+    public boolean displaySource(JCDiagnostic d) {
+        return showSource && d.getType() != FRAGMENT;
     }
 }
--- a/langtools/src/share/classes/com/sun/tools/javac/util/BasicDiagnosticFormatter.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/util/BasicDiagnosticFormatter.java	Wed Jul 05 16:43:17 2017 +0200
@@ -59,10 +59,11 @@
      * Create a basic formatter based on the supplied options.
      *
      * @param opts list of command-line options
-     * @param msgs Messages object used for i18n
+     * @param msgs JavacMessages object used for i18n
      */
-    BasicDiagnosticFormatter(Options opts, Messages msgs) {
-        this(msgs); //common init
+    BasicDiagnosticFormatter(Options opts, JavacMessages msgs) {
+        super(msgs, opts, true);
+        initAvailableFormats();
         String fmt = opts.get("diags");
         if (fmt != null) {
             String[] formats = fmt.split("\\|");
@@ -80,10 +81,14 @@
     /**
      * Create a standard basic formatter
      *
-     * @param msgs Messages object used for i18n
+     * @param msgs JavacMessages object used for i18n
      */
-    public BasicDiagnosticFormatter(Messages msgs) {
-        super(msgs);
+    public BasicDiagnosticFormatter(JavacMessages msgs) {
+        super(msgs, true);
+        initAvailableFormats();
+    }
+
+    public void initAvailableFormats() {
         availableFormats = new HashMap<BasicFormatKind, String>();
         availableFormats.put(DEFAULT_POS_FORMAT, "%f:%l:%_%t%m");
         availableFormats.put(DEFAULT_NO_POS_FORMAT, "%p%m");
@@ -91,6 +96,8 @@
     }
 
     public String format(JCDiagnostic d, Locale l) {
+        if (l == null)
+            l = messages.getCurrentLocale();
         String format = selectFormat(d);
         StringBuilder buf = new StringBuilder();
         for (int i = 0; i < format.length(); i++) {
@@ -102,6 +109,9 @@
             }
             buf.append(meta ? formatMeta(c, d, l) : String.valueOf(c));
         }
+        if (displaySource(d)) {
+            buf.append("\n" + formatSourceLine(d));
+        }
         return buf.toString();
     }
 
@@ -165,10 +175,6 @@
         return format;
     }
 
-    public boolean displaySource(JCDiagnostic d) {
-        return true;
-    }
-
     /**
      * This enum contains all the kinds of formatting patterns supported
      * by a basic diagnostic formatter.
--- a/langtools/src/share/classes/com/sun/tools/javac/util/JCDiagnostic.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/util/JCDiagnostic.java	Wed Jul 05 16:43:17 2017 +0200
@@ -64,12 +64,12 @@
 
         /** Create a new diagnostic factory. */
         protected Factory(Context context) {
-            this(Messages.instance(context), "compiler");
+            this(JavacMessages.instance(context), "compiler");
             context.put(diagnosticFactoryKey, this);
         }
 
         /** Create a new diagnostic factory. */
-        public Factory(Messages messages, String prefix) {
+        public Factory(JavacMessages messages, String prefix) {
             this.prefix = prefix;
             this.formatter = new BasicDiagnosticFormatter(messages);
         }
@@ -178,7 +178,7 @@
     @Deprecated
     public static DiagnosticFormatter<JCDiagnostic> getFragmentFormatter() {
         if (fragmentFormatter == null) {
-            fragmentFormatter = new BasicDiagnosticFormatter(Messages.getDefaultMessages());
+            fragmentFormatter = new BasicDiagnosticFormatter(JavacMessages.getDefaultMessages());
         }
         return fragmentFormatter;
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/src/share/classes/com/sun/tools/javac/util/JavacMessages.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package com.sun.tools.javac.util;
+
+import com.sun.tools.javac.api.Messages;
+import java.lang.ref.SoftReference;
+import java.util.ResourceBundle;
+import java.util.MissingResourceException;
+import java.text.MessageFormat;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+/**
+ *  Support for formatted localized messages.
+ *
+ *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
+ *  you write code that depends on this, you do so at your own risk.
+ *  This code and its internal interfaces are subject to change or
+ *  deletion without notice.</b>
+ */
+public class JavacMessages implements Messages {
+    /** The context key for the JavacMessages object. */
+    protected static final Context.Key<JavacMessages> messagesKey =
+        new Context.Key<JavacMessages>();
+
+    /** Get the JavacMessages instance for this context. */
+    public static JavacMessages instance(Context context) {
+        JavacMessages instance = context.get(messagesKey);
+        if (instance == null)
+            instance = new JavacMessages(context);
+        return instance;
+    }
+
+    private Map<Locale, SoftReference<List<ResourceBundle>>> bundleCache;
+
+    private List<String> bundleNames;
+
+    private Locale currentLocale;
+    private List<ResourceBundle> currentBundles;
+
+    public Locale getCurrentLocale() {
+        return currentLocale;
+    }
+
+    public void setCurrentLocale(Locale locale) {
+        if (locale == null) {
+            locale = Locale.getDefault();
+        }
+        this.currentBundles = getBundles(locale);
+        this.currentLocale = locale;
+    }
+
+    /** Creates a JavacMessages object.
+     */
+    public JavacMessages(Context context) {
+        this(defaultBundleName);
+        context.put(messagesKey, this);
+    }
+
+    /** Creates a JavacMessages object.
+     * @param bundleName the name to identify the resource buundle of localized messages.
+     */
+    public JavacMessages(String bundleName) throws MissingResourceException {
+        bundleNames = List.nil();
+        bundleCache = new HashMap<Locale, SoftReference<List<ResourceBundle>>>();
+        add(bundleName);
+        setCurrentLocale(Locale.getDefault());
+    }
+
+    public JavacMessages() throws MissingResourceException {
+        this(defaultBundleName);
+    }
+
+    public void add(String bundleName) throws MissingResourceException {
+        bundleNames = bundleNames.prepend(bundleName);
+        if (!bundleCache.isEmpty())
+            bundleCache.clear();
+        currentBundles = null;
+    }
+
+    public List<ResourceBundle> getBundles(Locale locale) {
+        if (locale == currentLocale && currentBundles != null)
+            return currentBundles;
+        SoftReference<List<ResourceBundle>> bundles = bundleCache.get(locale);
+        List<ResourceBundle> bundleList = bundles == null ? null : bundles.get();
+        if (bundleList == null) {
+            bundleList = List.nil();
+            for (String bundleName : bundleNames) {
+                try {
+                    ResourceBundle rb = ResourceBundle.getBundle(bundleName, locale);
+                    bundleList = bundleList.prepend(rb);
+                } catch (MissingResourceException e) {
+                    throw new InternalError("Cannot find javac resource bundle for locale " + locale);
+                }
+            }
+            bundleCache.put(locale, new SoftReference<List<ResourceBundle>>(bundleList));
+        }
+        return bundleList;
+    }
+
+    /** Gets the localized string corresponding to a key, formatted with a set of args.
+     */
+    public String getLocalizedString(String key, Object... args) {
+        return getLocalizedString(currentLocale, key, args);
+    }
+
+    public String getLocalizedString(Locale l, String key, Object... args) {
+        if (l == null)
+            l = getCurrentLocale();
+        return getLocalizedString(getBundles(l), key, args);
+    }
+
+    /* Static access:
+     * javac has a firmly entrenched notion of a default message bundle
+     * which it can access from any static context. This is used to get
+     * easy access to simple localized strings.
+     */
+
+    private static final String defaultBundleName =
+        "com.sun.tools.javac.resources.compiler";
+    private static ResourceBundle defaultBundle;
+    private static JavacMessages defaultMessages;
+
+
+    /**
+     * Gets a localized string from the compiler's default bundle.
+     */
+    // used to support legacy Log.getLocalizedString
+    static String getDefaultLocalizedString(String key, Object... args) {
+        return getLocalizedString(List.of(getDefaultBundle()), key, args);
+    }
+
+    // used to support legacy static Diagnostic.fragment
+    @Deprecated
+    static JavacMessages getDefaultMessages() {
+        if (defaultMessages == null)
+            defaultMessages = new JavacMessages(defaultBundleName);
+        return defaultMessages;
+    }
+
+    public static ResourceBundle getDefaultBundle() {
+        try {
+            if (defaultBundle == null)
+                defaultBundle = ResourceBundle.getBundle(defaultBundleName);
+            return defaultBundle;
+        }
+        catch (MissingResourceException e) {
+            throw new Error("Fatal: Resource for compiler is missing", e);
+        }
+    }
+
+    private static String getLocalizedString(List<ResourceBundle> bundles,
+                                             String key,
+                                             Object... args) {
+       String msg = null;
+        for (List<ResourceBundle> l = bundles; l.nonEmpty() && msg == null; l = l.tail) {
+            ResourceBundle rb = l.head;
+            try {
+                msg = rb.getString(key);
+            }
+            catch (MissingResourceException e) {
+                // ignore, try other bundles in list
+            }
+        }
+        if (msg == null) {
+            msg = "compiler message file broken: key=" + key +
+                " arguments={0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}";
+        }
+        return MessageFormat.format(msg, args);
+    }
+}
--- a/langtools/src/share/classes/com/sun/tools/javac/util/Log.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/util/Log.java	Wed Jul 05 16:43:17 2017 +0200
@@ -29,7 +29,6 @@
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
-import java.util.Locale;
 import javax.tools.DiagnosticListener;
 import javax.tools.JavaFileObject;
 
@@ -97,6 +96,11 @@
      */
     private DiagnosticFormatter<JCDiagnostic> diagFormatter;
 
+    /**
+     * JavacMessages object used for localization
+     */
+    private JavacMessages messages;
+
     /** Construct a log with given I/O redirections.
      */
     @Deprecated
@@ -115,9 +119,9 @@
         this.MaxWarnings = getIntOption(options, "-Xmaxwarns", 100);
 
         boolean rawDiagnostics = options.get("rawDiagnostics") != null;
-        Messages msgs = Messages.instance(context);
-        this.diagFormatter = rawDiagnostics ? new RawDiagnosticFormatter(msgs) :
-                                              new BasicDiagnosticFormatter(options, msgs);
+        messages = JavacMessages.instance(context);
+        this.diagFormatter = rawDiagnostics ? new RawDiagnosticFormatter(options) :
+                                              new BasicDiagnosticFormatter(options, messages);
         @SuppressWarnings("unchecked") // FIXME
         DiagnosticListener<? super JavaFileObject> diagListener =
             context.get(DiagnosticListener.class);
@@ -335,15 +339,7 @@
 
         PrintWriter writer = getWriterForDiagnosticType(diag.getType());
 
-        printLines(writer, diagFormatter.format(diag, Locale.getDefault()));
-        if (diagFormatter.displaySource(diag)) {
-            int pos = diag.getIntPosition();
-            if (pos != Position.NOPOS) {
-                JavaFileObject prev = useSource(diag.getSource());
-                printErrLine(pos, writer);
-                useSource(prev);
-            }
-        }
+        printLines(writer, diagFormatter.format(diag, messages.getCurrentLocale()));
 
         if (promptOnError) {
             switch (diag.getType()) {
@@ -384,7 +380,7 @@
      *  @param args   Fields to substitute into the string.
      */
     public static String getLocalizedString(String key, Object ... args) {
-        return Messages.getDefaultLocalizedString("compiler.misc." + key, args);
+        return JavacMessages.getDefaultLocalizedString("compiler.misc." + key, args);
     }
 
 /***************************************************************************
--- a/langtools/src/share/classes/com/sun/tools/javac/util/Messages.java	Wed Jul 05 16:42:40 2017 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,157 +0,0 @@
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Sun designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Sun in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- */
-
-package com.sun.tools.javac.util;
-
-import java.util.ResourceBundle;
-import java.util.MissingResourceException;
-import java.text.MessageFormat;
-
-/**
- *  Support for localized messages.
- *
- *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
- *  you write code that depends on this, you do so at your own risk.
- *  This code and its internal interfaces are subject to change or
- *  deletion without notice.</b>
- */
-public class Messages {
-    /** The context key for the Messages object. */
-    protected static final Context.Key<Messages> messagesKey =
-        new Context.Key<Messages>();
-
-    /** Get the Messages instance for this context. */
-    public static Messages instance(Context context) {
-        Messages instance = context.get(messagesKey);
-        if (instance == null)
-            instance = new Messages(context);
-        return instance;
-    }
-
-    private List<ResourceBundle> bundles = List.nil();
-
-    /** Creates a Messages object.
-     */
-    public Messages(Context context) {
-        context.put(messagesKey, this);
-        add(getDefaultBundle());
-    }
-
-    /** Creates a Messages object.
-     * @param bundle the name to identify the resource buundle of localized messages.
-     */
-    public Messages(String bundleName) throws MissingResourceException {
-        add(bundleName);
-    }
-
-    /** Creates a Messages object.
-     * @param bundle the name to identif the resource buundle of localized messages.
-     */
-    public Messages(ResourceBundle bundle) throws MissingResourceException {
-        add(bundle);
-    }
-
-    /** Add a new resource bundle to the list that is searched for localized messages.
-     * @param bundle the name to identify the resource bundle of localized messages.
-     */
-    public void add(String bundleName) throws MissingResourceException {
-        add(ResourceBundle.getBundle(bundleName));
-    }
-
-    /** Add a new resource bundle to the list that is searched for localized messages.
-     * Resource bundles will be searched in reverse order in which they are added.
-     * @param bundle the bundle of localized messages.
-     */
-    public void add(ResourceBundle bundle) {
-        bundles = bundles.prepend(bundle);
-    }
-
-    /** Gets the localized string corresponding to a key, formatted with a set of args.
-     */
-    public String getLocalizedString(String key, Object... args) {
-        return getLocalizedString(bundles, key, args);
-    }
-
-
-    /* Static access:
-     * javac has a firmly entrenched notion of a default message bundle
-     * which it can access from any static context. This is used to get
-     * easy access to simple localized strings.
-     */
-
-    private static final String defaultBundleName =
-        "com.sun.tools.javac.resources.compiler";
-    private static ResourceBundle defaultBundle;
-    private static Messages defaultMessages;
-
-
-    /**
-     * Gets a localized string from the compiler's default bundle.
-     */
-    // used to support legacy Log.getLocalizedString
-    static String getDefaultLocalizedString(String key, Object... args) {
-        return getLocalizedString(List.of(getDefaultBundle()), key, args);
-    }
-
-    // used to support legacy static Diagnostic.fragment
-    static Messages getDefaultMessages() {
-        if (defaultMessages == null)
-            defaultMessages = new Messages(getDefaultBundle());
-        return defaultMessages;
-    }
-
-    public static ResourceBundle getDefaultBundle() {
-        try {
-            if (defaultBundle == null)
-                defaultBundle = ResourceBundle.getBundle(defaultBundleName);
-            return defaultBundle;
-        }
-        catch (MissingResourceException e) {
-            throw new Error("Fatal: Resource for compiler is missing", e);
-        }
-    }
-
-    private static String getLocalizedString(List<ResourceBundle> bundles,
-                                             String key,
-                                             Object... args) {
-       String msg = null;
-        for (List<ResourceBundle> l = bundles; l.nonEmpty() && msg == null; l = l.tail) {
-            ResourceBundle rb = l.head;
-            try {
-                msg = rb.getString(key);
-            }
-            catch (MissingResourceException e) {
-                // ignore, try other bundles in list
-            }
-        }
-        if (msg == null) {
-            msg = "compiler message file broken: key=" + key +
-                " arguments={0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}";
-        }
-        return MessageFormat.format(msg, args);
-    }
-
-
-}
--- a/langtools/src/share/classes/com/sun/tools/javac/util/RawDiagnosticFormatter.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/util/RawDiagnosticFormatter.java	Wed Jul 05 16:43:17 2017 +0200
@@ -41,8 +41,8 @@
      * Create a formatter based on the supplied options.
      * @param msgs
      */
-    public RawDiagnosticFormatter(Messages msgs) {
-        super(null);
+    public RawDiagnosticFormatter(Options opts) {
+        super(null, opts, false);
     }
 
     //provide common default formats
@@ -61,6 +61,8 @@
                 buf.append('-');
             buf.append(' ');
             buf.append(formatMessage(d, null));
+            if (displaySource(d))
+                buf.append("\n" + formatSourceLine(d));
             return buf.toString();
         }
         catch (Exception e) {
@@ -94,8 +96,4 @@
         }
         return buf.toString();
     }
-
-    public boolean displaySource(JCDiagnostic d) {
-        return false;
-    }
 }
--- a/langtools/test/com/sun/javadoc/AuthorDD/AuthorDD.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/com/sun/javadoc/AuthorDD/AuthorDD.java	Wed Jul 05 16:43:17 2017 +0200
@@ -72,7 +72,8 @@
 
     /** Run javadoc */
     public static void runJavadoc(String[] javadocArgs) {
-        if (com.sun.tools.javadoc.Main.execute(javadocArgs) != 0) {
+        if (com.sun.tools.javadoc.Main.execute(AuthorDD.class.getClassLoader(),
+                                               javadocArgs) != 0) {
             throw new Error("Javadoc failed to execute");
         }
     }
--- a/langtools/test/com/sun/javadoc/lib/JavadocTester.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/com/sun/javadoc/lib/JavadocTester.java	Wed Jul 05 16:43:17 2017 +0200
@@ -197,6 +197,7 @@
                 new PrintWriter(warnings, true),
                 new PrintWriter(notices, true),
                 docletClass,
+                getClass().getClassLoader(),
                 args);
         System.setOut(prev);
         standardOut = new StringBuffer(stdout.toString());
--- a/langtools/test/com/sun/javadoc/testSupplementary/TestSupplementary.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/com/sun/javadoc/testSupplementary/TestSupplementary.java	Wed Jul 05 16:43:17 2017 +0200
@@ -33,6 +33,8 @@
  * @run main TestSupplementary
  */
 
+import java.util.Locale;
+
 public class TestSupplementary extends JavadocTester {
 
     private static final String BUG_ID = "4914724";
@@ -56,9 +58,14 @@
      * @param args the array of command line arguments.
      */
     public static void main(String[] args) {
-        TestSupplementary tester = new TestSupplementary();
-        run(tester, ARGS, TEST, NEGATED_TEST);
-        tester.printSummary();
+        Locale saveLocale = Locale.getDefault();
+        try {
+            TestSupplementary tester = new TestSupplementary();
+            run(tester, ARGS, TEST, NEGATED_TEST);
+            tester.printSummary();
+        } finally {
+            Locale.setDefault(saveLocale);
+        }
     }
 
     /**
--- a/langtools/test/tools/apt/Basics/print.sh	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/Basics/print.sh	Wed Jul 05 16:43:17 2017 +0200
@@ -26,7 +26,6 @@
 # @test
 # @bug 5008759 4998341 5018369 5032476 5060121 5096932 5096931
 # @run shell ../verifyVariables.sh
-# @build Aggregate
 # @run shell print.sh
 # @summary test availabilty of print option
 # @author Joseph D. Darcy
@@ -42,6 +41,12 @@
 	;;
 esac
 
+# Compile file directly, without TESTJAVACOPTS
+# Don't use @build or @compile as these implicitly use jtreg -javacoption values
+# and it is important that this file be compiled as expected, for later comparison
+# against a golden file.
+"${TESTJAVA}/bin/javac" ${TESTTOOLVMOPTS} -d ${TESTCLASSES} ${TESTSRC}/Aggregate.java
+
 # Construct path to apt executable
 APT="${TESTJAVA}/bin/apt ${TESTTOOLVMOPTS} \
 -print "
--- a/langtools/test/tools/apt/Compile/compile.sh	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/Compile/compile.sh	Wed Jul 05 16:43:17 2017 +0200
@@ -473,6 +473,9 @@
 cp ${TESTCLASSES}/Round?.class .
 ${JAR} cf0 rounds.jar Round?.class
 
+# cleanup file to prevent accidental discovery in current directory
+rm -Rf META-INF/services/*
+
 printf "%s\n" "-factorypath round1Apf.jar${SEP}round2Apf.jar${SEP}round3Apf.jar${SEP}round4Apf.jar"   > options8
 printf "%s\n" "-classpath rounds.jar"  >> options8
 printf "%s\n" "-s ./src"               >> options8
--- a/langtools/test/tools/apt/Discovery/discovery.sh	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/Discovery/discovery.sh	Wed Jul 05 16:43:17 2017 +0200
@@ -109,7 +109,8 @@
 
 ${JAR} cf0 phantom/phantom.jar PhantomTouch*.class META-INF
 
-
+# cleanup file to prevent accidental discovery in current directory
+rm -f META-INF/services/com.sun.mirror.apt.AnnotationProcessorFactory
 
 # Jar files created; verify right output file is touched
 
--- a/langtools/test/tools/apt/mirror/declaration/AnnoMirror.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/AnnoMirror.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary Tests AnnotationMirror and AnnotationValue methods.
  * @library ../../lib
  * @compile -source 1.5 AnnoMirror.java
- * @run main AnnoMirror
+ * @run main/othervm AnnoMirror
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/AnnoTypeDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/AnnoTypeDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary AnnotationTypeDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 AnnoTypeDecl.java
- * @run main AnnoTypeDecl
+ * @run main/othervm AnnoTypeDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/AnnoTypeElemDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/AnnoTypeElemDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary AnnotationTypeElementDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 AnnoTypeElemDecl.java
- * @run main AnnoTypeElemDecl
+ * @run main/othervm AnnoTypeElemDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/AnnoVal.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/AnnoVal.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary Tests AnnotationValue methods.
  * @library ../../lib
  * @compile -source 1.5 AnnoVal.java
- * @run main AnnoVal
+ * @run main/othervm AnnoVal
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/ClassDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/ClassDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary ClassDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 ClassDecl.java
- * @run main ClassDecl
+ * @run main/othervm ClassDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/ConstExpr.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/ConstExpr.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary Tests FieldDeclaration.getConstantExpression method
  * @library ../../lib
  * @compile -source 1.5 ConstExpr.java
- * @run main ConstExpr
+ * @run main/othervm ConstExpr
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/ConstructorDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/ConstructorDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary ConstructorDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 ConstructorDecl.java
- * @run main ConstructorDecl
+ * @run main/othervm ConstructorDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/EnumDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/EnumDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary EnumDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 EnumDecl.java
- * @run main EnumDecl
+ * @run main/othervm EnumDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/FieldDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/FieldDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary FieldDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 FieldDecl.java
- * @run main FieldDecl
+ * @run main/othervm FieldDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/GetAnno.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/GetAnno.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary Tests Declaration.getAnnotation method
  * @library ../../lib
  * @compile -source 1.5 GetAnno.java
- * @run main GetAnno
+ * @run main/othervm GetAnno
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/InterfaceDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/InterfaceDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary InterfaceDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 InterfaceDecl.java
- * @run main InterfaceDecl
+ * @run main/othervm InterfaceDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/MethodDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/MethodDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary MethodDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 MethodDecl.java
- * @run main MethodDecl
+ * @run main/othervm MethodDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/PackageDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/PackageDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary PackageDeclaration tests
  * @library ../../lib
  * @compile -source 1.5 PackageDecl.java
- * @run main PackageDecl
+ * @run main/othervm PackageDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/declaration/ParameterDecl.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/declaration/ParameterDecl.java	Wed Jul 05 16:43:17 2017 +0200
@@ -27,6 +27,7 @@
  * @bug 4853450 5031171
  * @summary ParameterDeclaration tests
  * @library ../../lib
+ * @run main/othervm ParameterDecl
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/AnnoTyp.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/AnnoTyp.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary AnnotationType tests
  * @library ../../lib
  * @compile -source 1.5 AnnoTyp.java
- * @run main AnnoTyp
+ * @run main/othervm AnnoTyp
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/ArrayTyp.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/ArrayTyp.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary ArrayType tests
  * @library ../../lib
  * @compile -source 1.5 ArrayTyp.java
- * @run main ArrayTyp
+ * @run main/othervm ArrayTyp
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/ClassTyp.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/ClassTyp.java	Wed Jul 05 16:43:17 2017 +0200
@@ -27,6 +27,7 @@
  * @bug 4853450 5009360 5055963
  * @summary ClassType tests
  * @library ../../lib
+ * @run main/othervm ClassTyp
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/EnumTyp.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/EnumTyp.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary EnumType tests
  * @library ../../lib
  * @compile -source 1.5 EnumTyp.java
- * @run main EnumTyp
+ * @run main/othervm EnumTyp
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/InterfaceTyp.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/InterfaceTyp.java	Wed Jul 05 16:43:17 2017 +0200
@@ -27,6 +27,7 @@
  * @bug 4853450 5055963
  * @summary InterfaceType tests
  * @library ../../lib
+ * @run main/othervm InterfaceTyp
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/PrimitiveTyp.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/PrimitiveTyp.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary PrimitiveType tests
  * @library ../../lib
  * @compile -source 1.5 PrimitiveTyp.java
- * @run main PrimitiveTyp
+ * @run main/othervm PrimitiveTyp
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/TypeVar.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/TypeVar.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary TypeVariable tests
  * @library ../../lib
  * @compile -source 1.5 TypeVar.java
- * @run main TypeVar
+ * @run main/othervm TypeVar
  */
 
 
--- a/langtools/test/tools/apt/mirror/type/WildcardTyp.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/type/WildcardTyp.java	Wed Jul 05 16:43:17 2017 +0200
@@ -28,7 +28,7 @@
  * @summary WildcardType tests
  * @library ../../lib
  * @compile -source 1.5 WildcardTyp.java
- * @run main WildcardTyp
+ * @run main/othervm WildcardTyp
  */
 
 
--- a/langtools/test/tools/apt/mirror/util/Overrides.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/util/Overrides.java	Wed Jul 05 16:43:17 2017 +0200
@@ -27,6 +27,7 @@
  * @bug 5037165
  * @summary Test the Declarations.overrides method
  * @library ../../lib
+ * @run main/othervm Overrides
  */
 
 
--- a/langtools/test/tools/apt/mirror/util/TypeCreation.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/apt/mirror/util/TypeCreation.java	Wed Jul 05 16:43:17 2017 +0200
@@ -27,6 +27,7 @@
  * @bug 5033381
  * @summary Test the type creation methods in Types.
  * @library ../../lib
+ * @run main/othervm TypeCreation
  */
 
 
--- a/langtools/test/tools/javac/6457284/T6457284.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javac/6457284/T6457284.java	Wed Jul 05 16:43:17 2017 +0200
@@ -35,7 +35,7 @@
 import com.sun.tools.javac.api.JavacTaskImpl;
 import com.sun.tools.javac.util.Context;
 import com.sun.tools.javac.util.List;
-import com.sun.tools.javac.util.Messages;
+import com.sun.tools.javac.util.JavacMessages;
 
 import javax.tools.*;
 
@@ -63,7 +63,7 @@
         throw new AssertionError("No top-level classes!");
     }
 
-    static class MyMessages extends Messages {
+    static class MyMessages extends JavacMessages {
         static void preRegister(Context context) {
             context.put(messagesKey, new MyMessages());
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/OverrideChecks/6738538/T6738538a.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6738538 6687444
+ * @summary  javac crashes when using a type parameter as a covariant method return type
+ * @author Maurizio Cimadamore
+ *
+ * @compile T6738538a.java
+ */
+
+class T6738538a {
+
+    class C<T> {
+        public T m(){
+            return null;
+        }
+    }
+    interface I<T>{
+        public T m();
+    }
+    class Crash<T extends C<?> & I> {}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/OverrideChecks/6738538/T6738538b.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6738538 6687444
+ * @summary  javac crashes when using a type parameter as a covariant method return type
+ * @author Maurizio Cimadamore
+ *
+ * @compile T6738538b.java
+ */
+
+class T6738538b {
+    interface I1 {
+        Object m();
+    }
+
+    interface I2 {}
+
+    class C1<T> implements I1 {
+        public T m() {
+            return null;
+        }
+    }
+
+    class C2<T extends C1<?> & I2> {}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/T6759996.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6759996
+ * @summary javac should ignore empty entries on paths
+ */
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+
+public class T6759996 {
+    public static void main(String[] args) throws Exception {
+        new T6759996().run();
+    }
+
+    void run() throws IOException, InterruptedException {
+        String PS = File.pathSeparator;
+        write(new File("A.java"), "class A { }");
+        write(new File("B.java"), "class B extends A { }");
+        // In the following line, the presence of the empty element
+        // should not mask the presence of the "." element on the path
+        javac("-verbose", "-sourcepath", "" + PS + ".", "B.java");
+    }
+
+    void javac(String... args) throws IOException, InterruptedException {
+        StringWriter sw = new StringWriter();
+        PrintWriter out = new PrintWriter(sw);
+        int rc = com.sun.tools.javac.Main.compile(args, out);
+        System.out.println(sw.toString());
+        if (rc != 0)
+            throw new Error("javac failed: rc=" + rc);
+
+    }
+
+    void write(File to, String body) throws IOException {
+        System.err.println("write " + to);
+        File toDir = to.getParentFile();
+        if (toDir != null) {
+            boolean ok = toDir.mkdirs();
+            if (!ok) {
+                throw new Error("could not create directory " + toDir);
+            }
+        }
+        Writer out = new FileWriter(to);
+        try {
+            out.write(body);
+            if (!body.endsWith("\n"))
+                out.write("\n");
+        } finally {
+            out.close();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/api/6406133/Erroneous.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,4 @@
+@Deprecated
+class A {
+    class A {}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/api/6406133/T6406133.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug     6443132 6406133 6597678
+ * @summary Compiler API ignores locale settings
+ * @author  Maurizio Cimadamore
+ * @library ../lib
+ */
+
+import javax.tools.*;
+import javax.annotation.processing.*;
+import javax.lang.model.element.*;
+import java.util.*;
+import java.io.*;
+
+public class T6406133 extends ToolTester {
+
+    List<Locale> locales = Arrays.asList(Locale.US, Locale.JAPAN, Locale.CHINA);
+
+    class DiagnosticTester implements DiagnosticListener<JavaFileObject> {
+        Locale locale;
+        String result;
+
+        DiagnosticTester(Locale locale) {
+            this.locale = locale;
+        }
+        public void report(Diagnostic<? extends JavaFileObject> diagnostic) {
+            result = diagnostic.getMessage(locale); //6406133
+        }
+    }
+
+    class ProcessorTester extends AbstractProcessor {
+
+        Locale locale;
+
+        public Set<String> getSupportedAnnotationTypes() {
+            return new HashSet<String>(Arrays.asList("*"));
+        }
+
+        public void init(ProcessingEnvironment env) {
+            locale = env.getLocale();
+        }
+
+        public boolean process(Set<? extends TypeElement> annotations, RoundEnvironment roundEnv) {
+            return true;
+        }
+    }
+
+    void compare(Locale loc1, Locale loc2, boolean useListener) {
+        String res1 = exec(useListener, loc1);
+        String res2 = exec(useListener, loc2);
+        boolean success = (loc1.equals(loc2) && res1.equals(res2)) ||
+                          (!loc1.equals(loc2) && !res1.equals(res2));
+        if (!success)
+            throw new AssertionError("Error in diagnostic localization");
+    }
+
+    String exec(boolean useListener, Locale locale) {
+        final Iterable<? extends JavaFileObject> compilationUnits =
+            fm.getJavaFileObjects(new File(test_src, "Erroneous.java"));
+        StringWriter pw = new StringWriter();
+        DiagnosticTester listener = useListener ? new DiagnosticTester(locale) : null;
+        ProcessorTester processor = new ProcessorTester();
+        task = tool.getTask(pw, fm, listener, null, null, compilationUnits);
+        task.setProcessors(Arrays.asList(processor));
+        task.setLocale(locale); //6443132
+        task.call();
+        if (!processor.locale.equals(locale))
+            throw new AssertionError("Error in diagnostic localization during annotation processing");
+        String res = useListener ? listener.result : pw.toString();
+        System.err.println("[locale:"+ locale + ", listener:" + useListener + "] " +res);
+        return res;
+    }
+
+    void test() {
+        for (Locale l1 : locales) {
+            for (Locale l2 : locales) {
+                compare(l1, l2, true);
+                compare(l1, l2, false);
+            }
+        }
+    }
+
+    public static void main(String... args) throws Exception {
+        new T6406133().test();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/api/6731573/Erroneous.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,4 @@
+class A {
+    boolean b;
+    boolean b;
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/api/6731573/T6731573.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug     6731573
+ * @summary diagnostic output should optionally include source line
+ * @author  Maurizio Cimadamore
+ * @library ../lib
+ */
+
+import java.io.*;
+import java.util.*;
+import javax.tools.*;
+
+public class T6731573 extends ToolTester {
+
+    enum DiagnosticType {
+        BASIC(null) {
+            boolean shouldDisplaySource(SourceLine sourceLine) {
+                return sourceLine != SourceLine.DISABLED;
+            }
+        },
+        RAW("-XDrawDiagnostics") {
+            boolean shouldDisplaySource(SourceLine sourceLine) {
+                return sourceLine == SourceLine.ENABLED;
+            }
+        };
+
+        String optValue;
+
+        DiagnosticType(String optValue) {
+            this.optValue = optValue;
+        }
+
+        abstract boolean shouldDisplaySource(SourceLine sourceLine);
+    }
+
+    enum SourceLine {
+        STANDARD(null),
+        ENABLED("-XDshowSource=true"),
+        DISABLED("-XDshowSource=false");
+
+        String optValue;
+
+        SourceLine(String optValue) {
+            this.optValue = optValue;
+        }
+    }
+
+    void checkErrorLine(String output, boolean expected, List<String> options) {
+        System.err.println("\noptions = "+options);
+        System.err.println(output);
+        boolean errLinePresent = output.contains("^");
+        if (errLinePresent != expected) {
+            throw new AssertionError("Error in diagnostic: error line" +
+                    (expected ? "" : " not") + " expected but" +
+                    (errLinePresent ? "" : " not") + " found");
+        }
+    }
+
+    void exec(DiagnosticType diagType, SourceLine sourceLine) {
+        final Iterable<? extends JavaFileObject> compilationUnits =
+            fm.getJavaFileObjects(new File(test_src, "Erroneous.java"));
+        StringWriter pw = new StringWriter();
+        ArrayList<String> options = new ArrayList<String>();
+        if (diagType.optValue != null)
+            options.add(diagType.optValue);
+        if (sourceLine.optValue != null)
+            options.add(sourceLine.optValue);
+        task = tool.getTask(pw, fm, null, options, null, compilationUnits);
+        task.call();
+        checkErrorLine(pw.toString(),
+                diagType.shouldDisplaySource(sourceLine),
+                options);
+    }
+
+    void test() {
+        for (DiagnosticType dt : DiagnosticType.values()) {
+            for (SourceLine sl : SourceLine.values()) {
+                exec(dt, sl);
+            }
+        }
+    }
+
+    public static void main(String... args) throws Exception {
+        new T6731573().test();
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/cast/6586091/T6586091.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @author Maurizio Cimadamore
+ * @bug     6586091
+ * @summary javac crashes with StackOverflowError
+ * @compile T6586091.java
+ */
+
+class T6586091 {
+    static class A<T extends A<?>> {}
+    static class B extends A<A<?>> {}
+
+    A<A<?>> t = null;
+    B c = (B)t;
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/enum/T6724345.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6724345
+ *
+ * @summary incorrect method resolution for enum classes entered as source files
+ * @author Maurizio Cimadamore
+ *
+ * @compile T6509042.java
+ */
+
+class T6724345 {
+
+    interface I {
+        void i();
+    }
+
+    class U {
+        {
+        I i = E.V;
+        i.i();
+        E.V.i();
+        }
+    }
+
+    enum E implements I {
+        V {public void i() {}};
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/generics/T6751514.java	Wed Jul 05 16:43:17 2017 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug     6751514
+ * @summary Unary post-increment with type variables crash javac during lowering
+ * @author Maurizio Cimadamore
+ */
+
+public class T6751514 {
+
+    static class Foo<X> {
+        X x;
+        Foo (X x) {
+            this.x = x;
+        }
+    }
+
+    static void test1(Foo<Integer> foo) {
+        int start = foo.x;
+        equals(foo.x += 1, start + 1);
+        equals(foo.x++, start + 1);
+        equals(++foo.x, start + 3);
+        equals(foo.x--, start + 3);
+        equals(foo.x -= 1, start + 1);
+        equals(--foo.x, start);
+    }
+
+    static void test2(Foo<Integer> foo) {
+        int start = foo.x;
+        equals((foo.x) += 1, start + 1);
+        equals((foo.x)++, start + 1);
+        equals(++(foo.x), start + 3);
+        equals((foo.x)--, start + 3);
+        equals((foo.x) -= 1, start + 1);
+        equals(--(foo.x), start);
+    }
+
+    static void test3(Foo<Integer> foo) {
+        int start = foo.x;
+        equals(((foo.x)) += 1, start + 1);
+        equals(((foo.x))++, start + 1);
+        equals(++((foo.x)), start + 3);
+        equals(((foo.x))--, start + 3);
+        equals(((foo.x)) -= 1, start + 1);
+        equals(--((foo.x)), start);
+    }
+
+    public static void main(String[] args) {
+        test1(new Foo<Integer>(1));
+        test2(new Foo<Integer>(1));
+        test3(new Foo<Integer>(1));
+    }
+
+    static void equals(int found, int req) {
+        if (found != req) {
+            throw new AssertionError("Error (expected: "+ req +
+                                     " - found: " + found + ")");
+        }
+    }
+}
\ No newline at end of file
--- a/langtools/test/tools/javac/links/T.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javac/links/T.java	Wed Jul 05 16:43:17 2017 +0200
@@ -21,12 +21,4 @@
  * have any questions.
  */
 
-/*
- * @test
- * @bug 4266026
- * @summary javac no longer follows symlinks
- *
- * @run shell links.sh
- */
-
 class T extends a.B {}
--- a/langtools/test/tools/javac/links/links.sh	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javac/links/links.sh	Wed Jul 05 16:43:17 2017 +0200
@@ -23,6 +23,12 @@
 # have any questions.
 #
 
+# @test
+# @bug 4266026
+# @summary javac no longer follows symlinks
+#
+# @run shell links.sh
+
 
 if [ "${TESTSRC}" = "" ]
 then
@@ -58,8 +64,11 @@
     ;;
 esac
 
+mkdir tmp
+cp ${TESTSRC}/b/B.java tmp
+
 rm -rf T.class B.class b/B.class "${TESTCLASSES}/a" "${TESTCLASSES}/classes"
-ln -s "${TESTSRC}/b" "${TESTCLASSES}/a"
+ln -s `pwd`/tmp "${TESTCLASSES}/a"
 mkdir "${TESTCLASSES}/classes"
 
-exec "${TESTJAVA}/bin/javac" ${TESTTOOLVMOPTS} -sourcepath "${TESTCLASSES}" -d "${TESTCLASSES}/classes" "${TESTSRC}/T.java" 2>&1
+"${TESTJAVA}/bin/javac" ${TESTTOOLVMOPTS} -sourcepath "${TESTCLASSES}" -d "${TESTCLASSES}/classes" "${TESTSRC}/T.java" 2>&1
--- a/langtools/test/tools/javac/processing/6348193/T6348193.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javac/processing/6348193/T6348193.java	Wed Jul 05 16:43:17 2017 +0200
@@ -118,10 +118,7 @@
 
     // set up or remove a service configuration file
     static void installConfigFile(NoGoodBad type) throws IOException {
-        URL self = T6348193.class.getClassLoader().getResource(myName+".class");
-        if (!self.getProtocol().equals("file"))
-            throw new AssertionError();
-        File f = new File(self.getFile()).getParentFile();
+        File f = new File(System.getProperty("test.classes", "."));
         for (String s: new String[] { "META-INF", "services", Processor.class.getName() })
             f = new File(f, s);
         BufferedWriter out;
--- a/langtools/test/tools/javadoc/BooleanConst.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/BooleanConst.java	Wed Jul 05 16:43:17 2017 +0200
@@ -37,7 +37,7 @@
     public static void main(String[] args) {
         // run javadoc on package p
         if (com.sun.tools.javadoc.Main.
-            execute("javadoc", "BooleanConst",
+            execute("javadoc", "BooleanConst", BooleanConst.class.getClassLoader(),
                     new String[] {System.getProperty("test.src", ".") + java.io.File.separatorChar + "BooleanConst.java"}) != 0)
             throw new Error();
     }
--- a/langtools/test/tools/javadoc/BreakIteratorWarning.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/BreakIteratorWarning.java	Wed Jul 05 16:43:17 2017 +0200
@@ -41,6 +41,7 @@
         if (com.sun.tools.javadoc.Main.execute(
                 "javadoc",
                 "BreakIteratorWarning",
+                BreakIteratorWarning.class.getClassLoader(),
                 new String[] {"-Xwerror", thisFile}) != 0)
             throw new Error("Javadoc encountered warnings or errors.");
     }
--- a/langtools/test/tools/javadoc/FlagsTooEarly.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/FlagsTooEarly.java	Wed Jul 05 16:43:17 2017 +0200
@@ -40,6 +40,7 @@
         if (com.sun.tools.javadoc.Main.execute(
                 "javadoc",
                 "FlagsTooEarly",
+                FlagsTooEarly.class.getClassLoader(),
                 new String[] {"-Xwerror", thisFile}) != 0)
             throw new Error("Javadoc encountered warnings or errors.");
     }
--- a/langtools/test/tools/javadoc/InlineTagsWithBraces.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/InlineTagsWithBraces.java	Wed Jul 05 16:43:17 2017 +0200
@@ -60,6 +60,7 @@
         if (com.sun.tools.javadoc.Main.execute(
                 "javadoc",
                 "InlineTagsWithBraces",
+                InlineTagsWithBraces.class.getClassLoader(),
                 new String[] {"-Xwerror", thisFile}) != 0)
             throw new Error("Javadoc encountered warnings or errors.");
     }
--- a/langtools/test/tools/javadoc/LangVers.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/LangVers.java	Wed Jul 05 16:43:17 2017 +0200
@@ -43,6 +43,7 @@
         if (com.sun.tools.javadoc.Main.execute(
                 "javadoc",
                 "LangVers",
+                LangVers.class.getClassLoader(),
                 new String[] {"-source", "1.5", thisFile}) != 0)
             throw new Error("Javadoc encountered warnings or errors.");
     }
--- a/langtools/test/tools/javadoc/MethodLinks.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/MethodLinks.java	Wed Jul 05 16:43:17 2017 +0200
@@ -36,7 +36,7 @@
 {
     public static void main(String[] args) {
         if (com.sun.tools.javadoc.Main.
-            execute("javadoc", "MethodLinks",
+            execute("javadoc", "MethodLinks", MethodLinks.class.getClassLoader(),
                     new String[] {System.getProperty("test.src", ".") +
                                   java.io.File.separatorChar + "MethodLinks.java"}
                     ) != 0)
--- a/langtools/test/tools/javadoc/NoStar.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/NoStar.java	Wed Jul 05 16:43:17 2017 +0200
@@ -44,7 +44,7 @@
 {
     public static void main(String[] args) {
         if (com.sun.tools.javadoc.Main.
-            execute("javadoc", "NoStar",
+            execute("javadoc", "NoStar", NoStar.class.getClassLoader(),
                     new String[] {System.getProperty("test.src", ".") + java.io.File.separatorChar + "NoStar.java"}) != 0)
             throw new Error();
     }
--- a/langtools/test/tools/javadoc/T4994049/T4994049.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/T4994049/T4994049.java	Wed Jul 05 16:43:17 2017 +0200
@@ -55,7 +55,8 @@
     public static void main(String... args) {
         for (String file : args) {
             File source = new File(System.getProperty("test.src", "."), file);
-            if (execute("javadoc", "T4994049", new String[]{source.getPath()} ) != 0)
+            if (execute("javadoc", "T4994049", T4994049.class.getClassLoader(),
+                        new String[]{source.getPath()} ) != 0)
                 throw new Error();
         }
     }
--- a/langtools/test/tools/javadoc/XWerror.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/XWerror.java	Wed Jul 05 16:43:17 2017 +0200
@@ -36,7 +36,7 @@
 {
     public static void main(String[] args) {
         if (com.sun.tools.javadoc.Main.
-            execute("javadoc", "XWerror",
+            execute("javadoc", "XWerror", XWerror.class.getClassLoader(),
                     new String[] {"-Xwerror",
                                   System.getProperty("test.src", ".") +
                                   java.io.File.separatorChar +
--- a/langtools/test/tools/javadoc/completionFailure/CompletionFailure.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/completionFailure/CompletionFailure.java	Wed Jul 05 16:43:17 2017 +0200
@@ -37,6 +37,7 @@
         // run javadoc on package pkg
         if (com.sun.tools.javadoc.Main.execute("javadoc",
                                                "CompletionFailure",
+                                               CompletionFailure.class.getClassLoader(),
                                                new String[]{"pkg"}) != 0)
             throw new Error();
     }
--- a/langtools/test/tools/javadoc/dupOk/DupOk.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/dupOk/DupOk.java	Wed Jul 05 16:43:17 2017 +0200
@@ -36,7 +36,7 @@
     public static void main(String[] args) {
         // run javadoc on package p
         if (com.sun.tools.javadoc.Main.
-            execute("javadoc", "DupOk",
+            execute("javadoc", "DupOk", DupOk.class.getClassLoader(),
                     new String[]
                 {"-sourcepath",
                  System.getProperty("test.src", ".") + java.io.File.separatorChar + "sp1" +
--- a/langtools/test/tools/javadoc/imports/MissingImport.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/imports/MissingImport.java	Wed Jul 05 16:43:17 2017 +0200
@@ -41,6 +41,7 @@
         if (com.sun.tools.javadoc.Main.execute(
                 "javadoc",
                 "MissingImport",
+                MissingImport.class.getClassLoader(),
                 new String[] {thisFile}) != 0)
             throw new Error("Javadoc encountered warnings or errors.");
     }
--- a/langtools/test/tools/javadoc/lib/Tester.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/lib/Tester.java	Wed Jul 05 16:43:17 2017 +0200
@@ -89,7 +89,9 @@
     public void run() throws IOException {
         try {
             if (com.sun.tools.javadoc.Main.execute("javadoc",
-                                                   docletName, args) != 0) {
+                                                   docletName,
+                                                   getClass().getClassLoader(),
+                                                   args) != 0) {
                 throw new Error("Javadoc errors encountered.");
             }
             System.out.println("--> Output written to " + outputFile);
--- a/langtools/test/tools/javadoc/nestedClass/NestedClass.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/nestedClass/NestedClass.java	Wed Jul 05 16:43:17 2017 +0200
@@ -39,7 +39,7 @@
 
     public static void main(String[] args) {
         if (com.sun.tools.javadoc.Main.
-            execute("javadoc", "NestedClass",
+            execute("javadoc", "NestedClass", NestedClass.class.getClassLoader(),
                     new String[] {System.getProperty("test.src", ".") +
                                   java.io.File.separatorChar +
                                   "NestedClass.java"})
--- a/langtools/test/tools/javadoc/sourceOnly/p/SourceOnly.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/sourceOnly/p/SourceOnly.java	Wed Jul 05 16:43:17 2017 +0200
@@ -31,7 +31,7 @@
     public static void main(String[] args) {
         // run javadoc on package p
         int result = com.sun.tools.javadoc.Main.
-            execute("javadoc", "p.SourceOnly", new String[] {"p"});
+            execute("javadoc", "p.SourceOnly", SourceOnly.class.getClassLoader(), new String[] {"p"});
         if (result != 0)
             throw new Error();
     }
--- a/langtools/test/tools/javadoc/sourceOption/SourceOption.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/sourceOption/SourceOption.java	Wed Jul 05 16:43:17 2017 +0200
@@ -36,6 +36,7 @@
         if (com.sun.tools.javadoc.Main.execute(
                 "javadoc",
                 "SourceOption",
+                SourceOption.class.getClassLoader(),
                 new String[] {"-source", "1.3", "p"}) != 0)
             throw new Error("Javadoc encountered warnings or errors.");
     }
--- a/langtools/test/tools/javadoc/subpackageIgnore/SubpackageIgnore.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javadoc/subpackageIgnore/SubpackageIgnore.java	Wed Jul 05 16:43:17 2017 +0200
@@ -36,6 +36,7 @@
         if (com.sun.tools.javadoc.Main.execute(
                 "javadoc",
                 "SubpackageIgnore",
+                SubpackageIgnore.class.getClassLoader(),
                 new String[] {"-Xwerror",
                               "-sourcepath",
                               System.getProperty("test.src", "."),
--- a/langtools/test/tools/javap/T6622260.java	Wed Jul 05 16:42:40 2017 +0200
+++ b/langtools/test/tools/javap/T6622260.java	Wed Jul 05 16:43:17 2017 +0200
@@ -189,10 +189,7 @@
 
     void verify(String output) {
         System.out.println(output);
-        if (output.startsWith("Classfile")) {
-            // make sure to ignore filename
-            output = output.substring(output.indexOf('\n'));
-        }
+        output = output.substring(output.indexOf("Test.java"));
         if (output.indexOf("-") >= 0)
             throw new Error("- found in output");
         if (output.indexOf("FFFFFF") >= 0)