Merge
authorehelin
Fri, 05 Feb 2016 18:37:18 +0100
changeset 35944 ea9806c3f58f
parent 35942 5e71767c0d0f (current diff)
parent 35943 e726308008c0 (diff)
child 35945 f532c5c62540
Merge
hotspot/src/share/vm/gc/g1/concurrentMark.cpp
hotspot/src/share/vm/gc/g1/concurrentMark.hpp
hotspot/src/share/vm/gc/g1/concurrentMark.inline.hpp
--- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp	Fri Feb 05 16:19:31 2016 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3682 +0,0 @@
-/*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "classfile/metadataOnStackMark.hpp"
-#include "classfile/symbolTable.hpp"
-#include "code/codeCache.hpp"
-#include "gc/g1/concurrentMark.inline.hpp"
-#include "gc/g1/concurrentMarkThread.inline.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/g1CollectorPolicy.hpp"
-#include "gc/g1/g1CollectorState.hpp"
-#include "gc/g1/g1HeapVerifier.hpp"
-#include "gc/g1/g1OopClosures.inline.hpp"
-#include "gc/g1/g1StringDedup.hpp"
-#include "gc/g1/heapRegion.inline.hpp"
-#include "gc/g1/heapRegionRemSet.hpp"
-#include "gc/g1/heapRegionSet.inline.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
-#include "gc/shared/gcId.hpp"
-#include "gc/shared/gcTimer.hpp"
-#include "gc/shared/gcTrace.hpp"
-#include "gc/shared/gcTraceTime.inline.hpp"
-#include "gc/shared/genOopClosures.inline.hpp"
-#include "gc/shared/referencePolicy.hpp"
-#include "gc/shared/strongRootsScope.hpp"
-#include "gc/shared/taskqueue.inline.hpp"
-#include "gc/shared/vmGCOperations.hpp"
-#include "logging/log.hpp"
-#include "memory/allocation.hpp"
-#include "memory/resourceArea.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/atomic.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/java.hpp"
-#include "runtime/prefetch.inline.hpp"
-#include "services/memTracker.hpp"
-
-// Concurrent marking bit map wrapper
-
-CMBitMapRO::CMBitMapRO(int shifter) :
-  _bm(),
-  _shifter(shifter) {
-  _bmStartWord = 0;
-  _bmWordSize = 0;
-}
-
-HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
-                                               const HeapWord* limit) const {
-  // First we must round addr *up* to a possible object boundary.
-  addr = (HeapWord*)align_size_up((intptr_t)addr,
-                                  HeapWordSize << _shifter);
-  size_t addrOffset = heapWordToOffset(addr);
-  assert(limit != NULL, "limit must not be NULL");
-  size_t limitOffset = heapWordToOffset(limit);
-  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
-  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
-  assert(nextAddr >= addr, "get_next_one postcondition");
-  assert(nextAddr == limit || isMarked(nextAddr),
-         "get_next_one postcondition");
-  return nextAddr;
-}
-
-#ifndef PRODUCT
-bool CMBitMapRO::covers(MemRegion heap_rs) const {
-  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
-  assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
-         "size inconsistency");
-  return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
-         _bmWordSize  == heap_rs.word_size();
-}
-#endif
-
-void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
-  _bm.print_on_error(st, prefix);
-}
-
-size_t CMBitMap::compute_size(size_t heap_size) {
-  return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
-}
-
-size_t CMBitMap::mark_distance() {
-  return MinObjAlignmentInBytes * BitsPerByte;
-}
-
-void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
-  _bmStartWord = heap.start();
-  _bmWordSize = heap.word_size();
-
-  _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
-  _bm.set_size(_bmWordSize >> _shifter);
-
-  storage->set_mapping_changed_listener(&_listener);
-}
-
-void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
-  if (zero_filled) {
-    return;
-  }
-  // We need to clear the bitmap on commit, removing any existing information.
-  MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
-  _bm->clearRange(mr);
-}
-
-// Closure used for clearing the given mark bitmap.
-class ClearBitmapHRClosure : public HeapRegionClosure {
- private:
-  ConcurrentMark* _cm;
-  CMBitMap* _bitmap;
-  bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
- public:
-  ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
-    assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
-  }
-
-  virtual bool doHeapRegion(HeapRegion* r) {
-    size_t const chunk_size_in_words = M / HeapWordSize;
-
-    HeapWord* cur = r->bottom();
-    HeapWord* const end = r->end();
-
-    while (cur < end) {
-      MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
-      _bitmap->clearRange(mr);
-
-      cur += chunk_size_in_words;
-
-      // Abort iteration if after yielding the marking has been aborted.
-      if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
-        return true;
-      }
-      // Repeat the asserts from before the start of the closure. We will do them
-      // as asserts here to minimize their overhead on the product. However, we
-      // will have them as guarantees at the beginning / end of the bitmap
-      // clearing to get some checking in the product.
-      assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
-      assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
-    }
-
-    return false;
-  }
-};
-
-class ParClearNextMarkBitmapTask : public AbstractGangTask {
-  ClearBitmapHRClosure* _cl;
-  HeapRegionClaimer     _hrclaimer;
-  bool                  _suspendible; // If the task is suspendible, workers must join the STS.
-
-public:
-  ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
-      _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
-
-  void work(uint worker_id) {
-    SuspendibleThreadSetJoiner sts_join(_suspendible);
-    G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
-  }
-};
-
-void CMBitMap::clearAll() {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
-  uint n_workers = g1h->workers()->active_workers();
-  ParClearNextMarkBitmapTask task(&cl, n_workers, false);
-  g1h->workers()->run_task(&task);
-  guarantee(cl.complete(), "Must have completed iteration.");
-  return;
-}
-
-void CMBitMap::clearRange(MemRegion mr) {
-  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
-  assert(!mr.is_empty(), "unexpected empty region");
-  // convert address range into offset range
-  _bm.at_put_range(heapWordToOffset(mr.start()),
-                   heapWordToOffset(mr.end()), false);
-}
-
-CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
-  _base(NULL), _cm(cm)
-{}
-
-bool CMMarkStack::allocate(size_t capacity) {
-  // allocate a stack of the requisite depth
-  ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
-  if (!rs.is_reserved()) {
-    warning("ConcurrentMark MarkStack allocation failure");
-    return false;
-  }
-  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
-  if (!_virtual_space.initialize(rs, rs.size())) {
-    warning("ConcurrentMark MarkStack backing store failure");
-    // Release the virtual memory reserved for the marking stack
-    rs.release();
-    return false;
-  }
-  assert(_virtual_space.committed_size() == rs.size(),
-         "Didn't reserve backing store for all of ConcurrentMark stack?");
-  _base = (oop*) _virtual_space.low();
-  setEmpty();
-  _capacity = (jint) capacity;
-  _saved_index = -1;
-  _should_expand = false;
-  return true;
-}
-
-void CMMarkStack::expand() {
-  // Called, during remark, if we've overflown the marking stack during marking.
-  assert(isEmpty(), "stack should been emptied while handling overflow");
-  assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
-  // Clear expansion flag
-  _should_expand = false;
-  if (_capacity == (jint) MarkStackSizeMax) {
-    log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
-    return;
-  }
-  // Double capacity if possible
-  jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
-  // Do not give up existing stack until we have managed to
-  // get the double capacity that we desired.
-  ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
-                                                           sizeof(oop)));
-  if (rs.is_reserved()) {
-    // Release the backing store associated with old stack
-    _virtual_space.release();
-    // Reinitialize virtual space for new stack
-    if (!_virtual_space.initialize(rs, rs.size())) {
-      fatal("Not enough swap for expanded marking stack capacity");
-    }
-    _base = (oop*)(_virtual_space.low());
-    _index = 0;
-    _capacity = new_capacity;
-  } else {
-    // Failed to double capacity, continue;
-    log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
-                  _capacity / K, new_capacity / K);
-  }
-}
-
-void CMMarkStack::set_should_expand() {
-  // If we're resetting the marking state because of an
-  // marking stack overflow, record that we should, if
-  // possible, expand the stack.
-  _should_expand = _cm->has_overflown();
-}
-
-CMMarkStack::~CMMarkStack() {
-  if (_base != NULL) {
-    _base = NULL;
-    _virtual_space.release();
-  }
-}
-
-void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
-  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-  jint start = _index;
-  jint next_index = start + n;
-  if (next_index > _capacity) {
-    _overflow = true;
-    return;
-  }
-  // Otherwise.
-  _index = next_index;
-  for (int i = 0; i < n; i++) {
-    int ind = start + i;
-    assert(ind < _capacity, "By overflow test above.");
-    _base[ind] = ptr_arr[i];
-  }
-}
-
-bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
-  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-  jint index = _index;
-  if (index == 0) {
-    *n = 0;
-    return false;
-  } else {
-    int k = MIN2(max, index);
-    jint  new_ind = index - k;
-    for (int j = 0; j < k; j++) {
-      ptr_arr[j] = _base[new_ind + j];
-    }
-    _index = new_ind;
-    *n = k;
-    return true;
-  }
-}
-
-void CMMarkStack::note_start_of_gc() {
-  assert(_saved_index == -1,
-         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
-  _saved_index = _index;
-}
-
-void CMMarkStack::note_end_of_gc() {
-  // This is intentionally a guarantee, instead of an assert. If we
-  // accidentally add something to the mark stack during GC, it
-  // will be a correctness issue so it's better if we crash. we'll
-  // only check this once per GC anyway, so it won't be a performance
-  // issue in any way.
-  guarantee(_saved_index == _index,
-            "saved index: %d index: %d", _saved_index, _index);
-  _saved_index = -1;
-}
-
-CMRootRegions::CMRootRegions() :
-  _young_list(NULL), _cm(NULL), _scan_in_progress(false),
-  _should_abort(false),  _next_survivor(NULL) { }
-
-void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
-  _young_list = g1h->young_list();
-  _cm = cm;
-}
-
-void CMRootRegions::prepare_for_scan() {
-  assert(!scan_in_progress(), "pre-condition");
-
-  // Currently, only survivors can be root regions.
-  assert(_next_survivor == NULL, "pre-condition");
-  _next_survivor = _young_list->first_survivor_region();
-  _scan_in_progress = (_next_survivor != NULL);
-  _should_abort = false;
-}
-
-HeapRegion* CMRootRegions::claim_next() {
-  if (_should_abort) {
-    // If someone has set the should_abort flag, we return NULL to
-    // force the caller to bail out of their loop.
-    return NULL;
-  }
-
-  // Currently, only survivors can be root regions.
-  HeapRegion* res = _next_survivor;
-  if (res != NULL) {
-    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
-    // Read it again in case it changed while we were waiting for the lock.
-    res = _next_survivor;
-    if (res != NULL) {
-      if (res == _young_list->last_survivor_region()) {
-        // We just claimed the last survivor so store NULL to indicate
-        // that we're done.
-        _next_survivor = NULL;
-      } else {
-        _next_survivor = res->get_next_young_region();
-      }
-    } else {
-      // Someone else claimed the last survivor while we were trying
-      // to take the lock so nothing else to do.
-    }
-  }
-  assert(res == NULL || res->is_survivor(), "post-condition");
-
-  return res;
-}
-
-void CMRootRegions::scan_finished() {
-  assert(scan_in_progress(), "pre-condition");
-
-  // Currently, only survivors can be root regions.
-  if (!_should_abort) {
-    assert(_next_survivor == NULL, "we should have claimed all survivors");
-  }
-  _next_survivor = NULL;
-
-  {
-    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
-    _scan_in_progress = false;
-    RootRegionScan_lock->notify_all();
-  }
-}
-
-bool CMRootRegions::wait_until_scan_finished() {
-  if (!scan_in_progress()) return false;
-
-  {
-    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
-    while (scan_in_progress()) {
-      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
-    }
-  }
-  return true;
-}
-
-uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
-  return MAX2((n_par_threads + 2) / 4, 1U);
-}
-
-ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
-  _g1h(g1h),
-  _markBitMap1(),
-  _markBitMap2(),
-  _parallel_marking_threads(0),
-  _max_parallel_marking_threads(0),
-  _sleep_factor(0.0),
-  _marking_task_overhead(1.0),
-  _cleanup_list("Cleanup List"),
-  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
-  _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
-            CardTableModRefBS::card_shift,
-            false /* in_resource_area*/),
-
-  _prevMarkBitMap(&_markBitMap1),
-  _nextMarkBitMap(&_markBitMap2),
-
-  _markStack(this),
-  // _finger set in set_non_marking_state
-
-  _max_worker_id(ParallelGCThreads),
-  // _active_tasks set in set_non_marking_state
-  // _tasks set inside the constructor
-  _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
-  _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
-
-  _has_overflown(false),
-  _concurrent(false),
-  _has_aborted(false),
-  _restart_for_overflow(false),
-  _concurrent_marking_in_progress(false),
-  _concurrent_phase_started(false),
-
-  // _verbose_level set below
-
-  _init_times(),
-  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
-  _cleanup_times(),
-  _total_counting_time(0.0),
-  _total_rs_scrub_time(0.0),
-
-  _parallel_workers(NULL),
-
-  _count_card_bitmaps(NULL),
-  _count_marked_bytes(NULL),
-  _completed_initialization(false) {
-
-  _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
-  _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
-
-  // Create & start a ConcurrentMark thread.
-  _cmThread = new ConcurrentMarkThread(this);
-  assert(cmThread() != NULL, "CM Thread should have been created");
-  assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
-  if (_cmThread->osthread() == NULL) {
-      vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
-  }
-
-  assert(CGC_lock != NULL, "Where's the CGC_lock?");
-  assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
-  assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
-
-  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
-  satb_qs.set_buffer_size(G1SATBBufferSize);
-
-  _root_regions.init(_g1h, this);
-
-  if (ConcGCThreads > ParallelGCThreads) {
-    warning("Can't have more ConcGCThreads (%u) "
-            "than ParallelGCThreads (%u).",
-            ConcGCThreads, ParallelGCThreads);
-    return;
-  }
-  if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
-    // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
-    // if both are set
-    _sleep_factor             = 0.0;
-    _marking_task_overhead    = 1.0;
-  } else if (G1MarkingOverheadPercent > 0) {
-    // We will calculate the number of parallel marking threads based
-    // on a target overhead with respect to the soft real-time goal
-    double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
-    double overall_cm_overhead =
-      (double) MaxGCPauseMillis * marking_overhead /
-      (double) GCPauseIntervalMillis;
-    double cpu_ratio = 1.0 / (double) os::processor_count();
-    double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
-    double marking_task_overhead =
-      overall_cm_overhead / marking_thread_num *
-                                              (double) os::processor_count();
-    double sleep_factor =
-                       (1.0 - marking_task_overhead) / marking_task_overhead;
-
-    FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
-    _sleep_factor             = sleep_factor;
-    _marking_task_overhead    = marking_task_overhead;
-  } else {
-    // Calculate the number of parallel marking threads by scaling
-    // the number of parallel GC threads.
-    uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
-    FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
-    _sleep_factor             = 0.0;
-    _marking_task_overhead    = 1.0;
-  }
-
-  assert(ConcGCThreads > 0, "Should have been set");
-  _parallel_marking_threads = ConcGCThreads;
-  _max_parallel_marking_threads = _parallel_marking_threads;
-
-  _parallel_workers = new WorkGang("G1 Marker",
-       _max_parallel_marking_threads, false, true);
-  if (_parallel_workers == NULL) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  } else {
-    _parallel_workers->initialize_workers();
-  }
-
-  if (FLAG_IS_DEFAULT(MarkStackSize)) {
-    size_t mark_stack_size =
-      MIN2(MarkStackSizeMax,
-          MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
-    // Verify that the calculated value for MarkStackSize is in range.
-    // It would be nice to use the private utility routine from Arguments.
-    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
-      warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
-              "must be between 1 and " SIZE_FORMAT,
-              mark_stack_size, MarkStackSizeMax);
-      return;
-    }
-    FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
-  } else {
-    // Verify MarkStackSize is in range.
-    if (FLAG_IS_CMDLINE(MarkStackSize)) {
-      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
-        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
-          warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
-                  "must be between 1 and " SIZE_FORMAT,
-                  MarkStackSize, MarkStackSizeMax);
-          return;
-        }
-      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
-        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
-          warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
-                  " or for MarkStackSizeMax (" SIZE_FORMAT ")",
-                  MarkStackSize, MarkStackSizeMax);
-          return;
-        }
-      }
-    }
-  }
-
-  if (!_markStack.allocate(MarkStackSize)) {
-    warning("Failed to allocate CM marking stack");
-    return;
-  }
-
-  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
-  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
-
-  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
-  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
-
-  BitMap::idx_t card_bm_size = _card_bm.size();
-
-  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
-  _active_tasks = _max_worker_id;
-
-  uint max_regions = _g1h->max_regions();
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    CMTaskQueue* task_queue = new CMTaskQueue();
-    task_queue->initialize();
-    _task_queues->register_queue(i, task_queue);
-
-    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
-    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
-
-    _tasks[i] = new CMTask(i, this,
-                           _count_marked_bytes[i],
-                           &_count_card_bitmaps[i],
-                           task_queue, _task_queues);
-
-    _accum_task_vtime[i] = 0.0;
-  }
-
-  // Calculate the card number for the bottom of the heap. Used
-  // in biasing indexes into the accounting card bitmaps.
-  _heap_bottom_card_num =
-    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
-                                CardTableModRefBS::card_shift);
-
-  // Clear all the liveness counting data
-  clear_all_count_data();
-
-  // so that the call below can read a sensible value
-  _heap_start = g1h->reserved_region().start();
-  set_non_marking_state();
-  _completed_initialization = true;
-}
-
-void ConcurrentMark::reset() {
-  // Starting values for these two. This should be called in a STW
-  // phase.
-  MemRegion reserved = _g1h->g1_reserved();
-  _heap_start = reserved.start();
-  _heap_end   = reserved.end();
-
-  // Separated the asserts so that we know which one fires.
-  assert(_heap_start != NULL, "heap bounds should look ok");
-  assert(_heap_end != NULL, "heap bounds should look ok");
-  assert(_heap_start < _heap_end, "heap bounds should look ok");
-
-  // Reset all the marking data structures and any necessary flags
-  reset_marking_state();
-
-  // We do reset all of them, since different phases will use
-  // different number of active threads. So, it's easiest to have all
-  // of them ready.
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    _tasks[i]->reset(_nextMarkBitMap);
-  }
-
-  // we need this to make sure that the flag is on during the evac
-  // pause with initial mark piggy-backed
-  set_concurrent_marking_in_progress();
-}
-
-
-void ConcurrentMark::reset_marking_state(bool clear_overflow) {
-  _markStack.set_should_expand();
-  _markStack.setEmpty();        // Also clears the _markStack overflow flag
-  if (clear_overflow) {
-    clear_has_overflown();
-  } else {
-    assert(has_overflown(), "pre-condition");
-  }
-  _finger = _heap_start;
-
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    CMTaskQueue* queue = _task_queues->queue(i);
-    queue->set_empty();
-  }
-}
-
-void ConcurrentMark::set_concurrency(uint active_tasks) {
-  assert(active_tasks <= _max_worker_id, "we should not have more");
-
-  _active_tasks = active_tasks;
-  // Need to update the three data structures below according to the
-  // number of active threads for this phase.
-  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
-  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
-  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
-}
-
-void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
-  set_concurrency(active_tasks);
-
-  _concurrent = concurrent;
-  // We propagate this to all tasks, not just the active ones.
-  for (uint i = 0; i < _max_worker_id; ++i)
-    _tasks[i]->set_concurrent(concurrent);
-
-  if (concurrent) {
-    set_concurrent_marking_in_progress();
-  } else {
-    // We currently assume that the concurrent flag has been set to
-    // false before we start remark. At this point we should also be
-    // in a STW phase.
-    assert(!concurrent_marking_in_progress(), "invariant");
-    assert(out_of_regions(),
-           "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
-           p2i(_finger), p2i(_heap_end));
-  }
-}
-
-void ConcurrentMark::set_non_marking_state() {
-  // We set the global marking state to some default values when we're
-  // not doing marking.
-  reset_marking_state();
-  _active_tasks = 0;
-  clear_concurrent_marking_in_progress();
-}
-
-ConcurrentMark::~ConcurrentMark() {
-  // The ConcurrentMark instance is never freed.
-  ShouldNotReachHere();
-}
-
-void ConcurrentMark::clearNextBitmap() {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // Make sure that the concurrent mark thread looks to still be in
-  // the current cycle.
-  guarantee(cmThread()->during_cycle(), "invariant");
-
-  // We are finishing up the current cycle by clearing the next
-  // marking bitmap and getting it ready for the next cycle. During
-  // this time no other cycle can start. So, let's make sure that this
-  // is the case.
-  guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
-
-  ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
-  ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
-  _parallel_workers->run_task(&task);
-
-  // Clear the liveness counting data. If the marking has been aborted, the abort()
-  // call already did that.
-  if (cl.complete()) {
-    clear_all_count_data();
-  }
-
-  // Repeat the asserts from above.
-  guarantee(cmThread()->during_cycle(), "invariant");
-  guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
-}
-
-class CheckBitmapClearHRClosure : public HeapRegionClosure {
-  CMBitMap* _bitmap;
-  bool _error;
- public:
-  CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
-  }
-
-  virtual bool doHeapRegion(HeapRegion* r) {
-    // This closure can be called concurrently to the mutator, so we must make sure
-    // that the result of the getNextMarkedWordAddress() call is compared to the
-    // value passed to it as limit to detect any found bits.
-    // end never changes in G1.
-    HeapWord* end = r->end();
-    return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
-  }
-};
-
-bool ConcurrentMark::nextMarkBitmapIsClear() {
-  CheckBitmapClearHRClosure cl(_nextMarkBitMap);
-  _g1h->heap_region_iterate(&cl);
-  return cl.complete();
-}
-
-class NoteStartOfMarkHRClosure: public HeapRegionClosure {
-public:
-  bool doHeapRegion(HeapRegion* r) {
-    r->note_start_of_marking();
-    return false;
-  }
-};
-
-void ConcurrentMark::checkpointRootsInitialPre() {
-  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
-  G1CollectorPolicy* g1p = g1h->g1_policy();
-
-  _has_aborted = false;
-
-  // Initialize marking structures. This has to be done in a STW phase.
-  reset();
-
-  // For each region note start of marking.
-  NoteStartOfMarkHRClosure startcl;
-  g1h->heap_region_iterate(&startcl);
-}
-
-
-void ConcurrentMark::checkpointRootsInitialPost() {
-  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
-
-  // Start Concurrent Marking weak-reference discovery.
-  ReferenceProcessor* rp = g1h->ref_processor_cm();
-  // enable ("weak") refs discovery
-  rp->enable_discovery();
-  rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
-
-  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  // This is the start of  the marking cycle, we're expected all
-  // threads to have SATB queues with active set to false.
-  satb_mq_set.set_active_all_threads(true, /* new active value */
-                                     false /* expected_active */);
-
-  _root_regions.prepare_for_scan();
-
-  // update_g1_committed() will be called at the end of an evac pause
-  // when marking is on. So, it's also called at the end of the
-  // initial-mark pause to update the heap end, if the heap expands
-  // during it. No need to call it here.
-}
-
-/*
- * Notice that in the next two methods, we actually leave the STS
- * during the barrier sync and join it immediately afterwards. If we
- * do not do this, the following deadlock can occur: one thread could
- * be in the barrier sync code, waiting for the other thread to also
- * sync up, whereas another one could be trying to yield, while also
- * waiting for the other threads to sync up too.
- *
- * Note, however, that this code is also used during remark and in
- * this case we should not attempt to leave / enter the STS, otherwise
- * we'll either hit an assert (debug / fastdebug) or deadlock
- * (product). So we should only leave / enter the STS if we are
- * operating concurrently.
- *
- * Because the thread that does the sync barrier has left the STS, it
- * is possible to be suspended for a Full GC or an evacuation pause
- * could occur. This is actually safe, since the entering the sync
- * barrier is one of the last things do_marking_step() does, and it
- * doesn't manipulate any data structures afterwards.
- */
-
-void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
-  bool barrier_aborted;
-  {
-    SuspendibleThreadSetLeaver sts_leave(concurrent());
-    barrier_aborted = !_first_overflow_barrier_sync.enter();
-  }
-
-  // at this point everyone should have synced up and not be doing any
-  // more work
-
-  if (barrier_aborted) {
-    // If the barrier aborted we ignore the overflow condition and
-    // just abort the whole marking phase as quickly as possible.
-    return;
-  }
-
-  // If we're executing the concurrent phase of marking, reset the marking
-  // state; otherwise the marking state is reset after reference processing,
-  // during the remark pause.
-  // If we reset here as a result of an overflow during the remark we will
-  // see assertion failures from any subsequent set_concurrency_and_phase()
-  // calls.
-  if (concurrent()) {
-    // let the task associated with with worker 0 do this
-    if (worker_id == 0) {
-      // task 0 is responsible for clearing the global data structures
-      // We should be here because of an overflow. During STW we should
-      // not clear the overflow flag since we rely on it being true when
-      // we exit this method to abort the pause and restart concurrent
-      // marking.
-      reset_marking_state(true /* clear_overflow */);
-
-      log_info(gc)("Concurrent Mark reset for overflow");
-    }
-  }
-
-  // after this, each task should reset its own data structures then
-  // then go into the second barrier
-}
-
-void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
-  SuspendibleThreadSetLeaver sts_leave(concurrent());
-  _second_overflow_barrier_sync.enter();
-
-  // at this point everything should be re-initialized and ready to go
-}
-
-class CMConcurrentMarkingTask: public AbstractGangTask {
-private:
-  ConcurrentMark*       _cm;
-  ConcurrentMarkThread* _cmt;
-
-public:
-  void work(uint worker_id) {
-    assert(Thread::current()->is_ConcurrentGC_thread(),
-           "this should only be done by a conc GC thread");
-    ResourceMark rm;
-
-    double start_vtime = os::elapsedVTime();
-
-    {
-      SuspendibleThreadSetJoiner sts_join;
-
-      assert(worker_id < _cm->active_tasks(), "invariant");
-      CMTask* the_task = _cm->task(worker_id);
-      the_task->record_start_time();
-      if (!_cm->has_aborted()) {
-        do {
-          double start_vtime_sec = os::elapsedVTime();
-          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
-
-          the_task->do_marking_step(mark_step_duration_ms,
-                                    true  /* do_termination */,
-                                    false /* is_serial*/);
-
-          double end_vtime_sec = os::elapsedVTime();
-          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
-          _cm->clear_has_overflown();
-
-          _cm->do_yield_check(worker_id);
-
-          jlong sleep_time_ms;
-          if (!_cm->has_aborted() && the_task->has_aborted()) {
-            sleep_time_ms =
-              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
-            {
-              SuspendibleThreadSetLeaver sts_leave;
-              os::sleep(Thread::current(), sleep_time_ms, false);
-            }
-          }
-        } while (!_cm->has_aborted() && the_task->has_aborted());
-      }
-      the_task->record_end_time();
-      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
-    }
-
-    double end_vtime = os::elapsedVTime();
-    _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
-  }
-
-  CMConcurrentMarkingTask(ConcurrentMark* cm,
-                          ConcurrentMarkThread* cmt) :
-      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
-
-  ~CMConcurrentMarkingTask() { }
-};
-
-// Calculates the number of active workers for a concurrent
-// phase.
-uint ConcurrentMark::calc_parallel_marking_threads() {
-  uint n_conc_workers = 0;
-  if (!UseDynamicNumberOfGCThreads ||
-      (!FLAG_IS_DEFAULT(ConcGCThreads) &&
-       !ForceDynamicNumberOfGCThreads)) {
-    n_conc_workers = max_parallel_marking_threads();
-  } else {
-    n_conc_workers =
-      AdaptiveSizePolicy::calc_default_active_workers(
-                                   max_parallel_marking_threads(),
-                                   1, /* Minimum workers */
-                                   parallel_marking_threads(),
-                                   Threads::number_of_non_daemon_threads());
-    // Don't scale down "n_conc_workers" by scale_parallel_threads() because
-    // that scaling has already gone into "_max_parallel_marking_threads".
-  }
-  assert(n_conc_workers > 0, "Always need at least 1");
-  return n_conc_workers;
-}
-
-void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
-  // Currently, only survivors can be root regions.
-  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
-  G1RootRegionScanClosure cl(_g1h, this, worker_id);
-
-  const uintx interval = PrefetchScanIntervalInBytes;
-  HeapWord* curr = hr->bottom();
-  const HeapWord* end = hr->top();
-  while (curr < end) {
-    Prefetch::read(curr, interval);
-    oop obj = oop(curr);
-    int size = obj->oop_iterate_size(&cl);
-    assert(size == obj->size(), "sanity");
-    curr += size;
-  }
-}
-
-class CMRootRegionScanTask : public AbstractGangTask {
-private:
-  ConcurrentMark* _cm;
-
-public:
-  CMRootRegionScanTask(ConcurrentMark* cm) :
-    AbstractGangTask("Root Region Scan"), _cm(cm) { }
-
-  void work(uint worker_id) {
-    assert(Thread::current()->is_ConcurrentGC_thread(),
-           "this should only be done by a conc GC thread");
-
-    CMRootRegions* root_regions = _cm->root_regions();
-    HeapRegion* hr = root_regions->claim_next();
-    while (hr != NULL) {
-      _cm->scanRootRegion(hr, worker_id);
-      hr = root_regions->claim_next();
-    }
-  }
-};
-
-void ConcurrentMark::scanRootRegions() {
-  // Start of concurrent marking.
-  ClassLoaderDataGraph::clear_claimed_marks();
-
-  // scan_in_progress() will have been set to true only if there was
-  // at least one root region to scan. So, if it's false, we
-  // should not attempt to do any further work.
-  if (root_regions()->scan_in_progress()) {
-    GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
-
-    _parallel_marking_threads = calc_parallel_marking_threads();
-    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
-           "Maximum number of marking threads exceeded");
-    uint active_workers = MAX2(1U, parallel_marking_threads());
-
-    CMRootRegionScanTask task(this);
-    _parallel_workers->set_active_workers(active_workers);
-    _parallel_workers->run_task(&task);
-
-    // It's possible that has_aborted() is true here without actually
-    // aborting the survivor scan earlier. This is OK as it's
-    // mainly used for sanity checking.
-    root_regions()->scan_finished();
-  }
-}
-
-void ConcurrentMark::register_concurrent_phase_start(const char* title) {
-  assert(!_concurrent_phase_started, "Sanity");
-  _concurrent_phase_started = true;
-  _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
-}
-
-void ConcurrentMark::register_concurrent_phase_end() {
-  if (_concurrent_phase_started) {
-    _concurrent_phase_started = false;
-    _g1h->gc_timer_cm()->register_gc_concurrent_end();
-  }
-}
-
-void ConcurrentMark::markFromRoots() {
-  // we might be tempted to assert that:
-  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
-  //        "inconsistent argument?");
-  // However that wouldn't be right, because it's possible that
-  // a safepoint is indeed in progress as a younger generation
-  // stop-the-world GC happens even as we mark in this generation.
-
-  _restart_for_overflow = false;
-
-  // _g1h has _n_par_threads
-  _parallel_marking_threads = calc_parallel_marking_threads();
-  assert(parallel_marking_threads() <= max_parallel_marking_threads(),
-    "Maximum number of marking threads exceeded");
-
-  uint active_workers = MAX2(1U, parallel_marking_threads());
-  assert(active_workers > 0, "Should have been set");
-
-  // Parallel task terminator is set in "set_concurrency_and_phase()"
-  set_concurrency_and_phase(active_workers, true /* concurrent */);
-
-  CMConcurrentMarkingTask markingTask(this, cmThread());
-  _parallel_workers->set_active_workers(active_workers);
-  _parallel_workers->run_task(&markingTask);
-  print_stats();
-}
-
-void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
-  // world is stopped at this checkpoint
-  assert(SafepointSynchronize::is_at_safepoint(),
-         "world should be stopped");
-
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // If a full collection has happened, we shouldn't do this.
-  if (has_aborted()) {
-    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
-    return;
-  }
-
-  SvcGCMarker sgcm(SvcGCMarker::OTHER);
-
-  if (VerifyDuringGC) {
-    HandleMark hm;  // handle scope
-    g1h->prepare_for_verify();
-    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
-  }
-  g1h->verifier()->check_bitmaps("Remark Start");
-
-  G1CollectorPolicy* g1p = g1h->g1_policy();
-  g1p->record_concurrent_mark_remark_start();
-
-  double start = os::elapsedTime();
-
-  checkpointRootsFinalWork();
-
-  double mark_work_end = os::elapsedTime();
-
-  weakRefsWork(clear_all_soft_refs);
-
-  if (has_overflown()) {
-    // Oops.  We overflowed.  Restart concurrent marking.
-    _restart_for_overflow = true;
-    log_develop_trace(gc)("Remark led to restart for overflow.");
-
-    // Verify the heap w.r.t. the previous marking bitmap.
-    if (VerifyDuringGC) {
-      HandleMark hm;  // handle scope
-      g1h->prepare_for_verify();
-      Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
-    }
-
-    // Clear the marking state because we will be restarting
-    // marking due to overflowing the global mark stack.
-    reset_marking_state();
-  } else {
-    {
-      GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm());
-
-      // Aggregate the per-task counting data that we have accumulated
-      // while marking.
-      aggregate_count_data();
-    }
-
-    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-    // We're done with marking.
-    // This is the end of  the marking cycle, we're expected all
-    // threads to have SATB queues with active set to true.
-    satb_mq_set.set_active_all_threads(false, /* new active value */
-                                       true /* expected_active */);
-
-    if (VerifyDuringGC) {
-      HandleMark hm;  // handle scope
-      g1h->prepare_for_verify();
-      Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
-    }
-    g1h->verifier()->check_bitmaps("Remark End");
-    assert(!restart_for_overflow(), "sanity");
-    // Completely reset the marking state since marking completed
-    set_non_marking_state();
-  }
-
-  // Expand the marking stack, if we have to and if we can.
-  if (_markStack.should_expand()) {
-    _markStack.expand();
-  }
-
-  // Statistics
-  double now = os::elapsedTime();
-  _remark_mark_times.add((mark_work_end - start) * 1000.0);
-  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
-  _remark_times.add((now - start) * 1000.0);
-
-  g1p->record_concurrent_mark_remark_end();
-
-  G1CMIsAliveClosure is_alive(g1h);
-  g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
-}
-
-// Base class of the closures that finalize and verify the
-// liveness counting data.
-class CMCountDataClosureBase: public HeapRegionClosure {
-protected:
-  G1CollectedHeap* _g1h;
-  ConcurrentMark* _cm;
-  CardTableModRefBS* _ct_bs;
-
-  BitMap* _region_bm;
-  BitMap* _card_bm;
-
-  // Takes a region that's not empty (i.e., it has at least one
-  // live object in it and sets its corresponding bit on the region
-  // bitmap to 1.
-  void set_bit_for_region(HeapRegion* hr) {
-    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
-    _region_bm->par_at_put(index, true);
-  }
-
-public:
-  CMCountDataClosureBase(G1CollectedHeap* g1h,
-                         BitMap* region_bm, BitMap* card_bm):
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
-    _region_bm(region_bm), _card_bm(card_bm) { }
-};
-
-// Closure that calculates the # live objects per region. Used
-// for verification purposes during the cleanup pause.
-class CalcLiveObjectsClosure: public CMCountDataClosureBase {
-  CMBitMapRO* _bm;
-  size_t _region_marked_bytes;
-
-public:
-  CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
-                         BitMap* region_bm, BitMap* card_bm) :
-    CMCountDataClosureBase(g1h, region_bm, card_bm),
-    _bm(bm), _region_marked_bytes(0) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* ntams = hr->next_top_at_mark_start();
-    HeapWord* start = hr->bottom();
-
-    assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
-           "Preconditions not met - "
-           "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
-           p2i(start), p2i(ntams), p2i(hr->end()));
-
-    // Find the first marked object at or after "start".
-    start = _bm->getNextMarkedWordAddress(start, ntams);
-
-    size_t marked_bytes = 0;
-
-    while (start < ntams) {
-      oop obj = oop(start);
-      int obj_sz = obj->size();
-      HeapWord* obj_end = start + obj_sz;
-
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
-
-      // Note: if we're looking at the last region in heap - obj_end
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
-
-      // Set the bits in the card BM for the cards spanned by this object.
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-
-      // Add the size of this object to the number of marked bytes.
-      marked_bytes += (size_t)obj_sz * HeapWordSize;
-
-      // This will happen if we are handling a humongous object that spans
-      // several heap regions.
-      if (obj_end > hr->end()) {
-        break;
-      }
-      // Find the next marked object after this one.
-      start = _bm->getNextMarkedWordAddress(obj_end, ntams);
-    }
-
-    // Mark the allocated-since-marking portion...
-    HeapWord* top = hr->top();
-    if (ntams < top) {
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
-
-      // Note: if we're looking at the last region in heap - top
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-
-      // This definitely means the region has live objects.
-      set_bit_for_region(hr);
-    }
-
-    // Update the live region bitmap.
-    if (marked_bytes > 0) {
-      set_bit_for_region(hr);
-    }
-
-    // Set the marked bytes for the current region so that
-    // it can be queried by a calling verification routine
-    _region_marked_bytes = marked_bytes;
-
-    return false;
-  }
-
-  size_t region_marked_bytes() const { return _region_marked_bytes; }
-};
-
-// Heap region closure used for verifying the counting data
-// that was accumulated concurrently and aggregated during
-// the remark pause. This closure is applied to the heap
-// regions during the STW cleanup pause.
-
-class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  ConcurrentMark* _cm;
-  CalcLiveObjectsClosure _calc_cl;
-  BitMap* _region_bm;   // Region BM to be verified
-  BitMap* _card_bm;     // Card BM to be verified
-
-  BitMap* _exp_region_bm; // Expected Region BM values
-  BitMap* _exp_card_bm;   // Expected card BM values
-
-  int _failures;
-
-public:
-  VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
-                                BitMap* region_bm,
-                                BitMap* card_bm,
-                                BitMap* exp_region_bm,
-                                BitMap* exp_card_bm) :
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
-    _region_bm(region_bm), _card_bm(card_bm),
-    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
-    _failures(0) { }
-
-  int failures() const { return _failures; }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    int failures = 0;
-
-    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
-    // this region and set the corresponding bits in the expected region
-    // and card bitmaps.
-    bool res = _calc_cl.doHeapRegion(hr);
-    assert(res == false, "should be continuing");
-
-    // Verify the marked bytes for this region.
-    size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
-    size_t act_marked_bytes = hr->next_marked_bytes();
-
-    if (exp_marked_bytes > act_marked_bytes) {
-      if (hr->is_starts_humongous()) {
-        // For start_humongous regions, the size of the whole object will be
-        // in exp_marked_bytes.
-        HeapRegion* region = hr;
-        int num_regions;
-        for (num_regions = 0; region != NULL; num_regions++) {
-          region = _g1h->next_region_in_humongous(region);
-        }
-        if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
-          failures += 1;
-        } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
-          failures += 1;
-        }
-      } else {
-        // We're not OK if expected marked bytes > actual marked bytes. It means
-        // we have missed accounting some objects during the actual marking.
-        failures += 1;
-      }
-    }
-
-    // Verify the bit, for this region, in the actual and expected
-    // (which was just calculated) region bit maps.
-    // We're not OK if the bit in the calculated expected region
-    // bitmap is set and the bit in the actual region bitmap is not.
-    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
-
-    bool expected = _exp_region_bm->at(index);
-    bool actual = _region_bm->at(index);
-    if (expected && !actual) {
-      failures += 1;
-    }
-
-    // Verify that the card bit maps for the cards spanned by the current
-    // region match. We have an error if we have a set bit in the expected
-    // bit map and the corresponding bit in the actual bitmap is not set.
-
-    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
-    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
-
-    for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
-      expected = _exp_card_bm->at(i);
-      actual = _card_bm->at(i);
-
-      if (expected && !actual) {
-        failures += 1;
-      }
-    }
-
-    _failures += failures;
-
-    // We could stop iteration over the heap when we
-    // find the first violating region by returning true.
-    return false;
-  }
-};
-
-class G1ParVerifyFinalCountTask: public AbstractGangTask {
-protected:
-  G1CollectedHeap* _g1h;
-  ConcurrentMark* _cm;
-  BitMap* _actual_region_bm;
-  BitMap* _actual_card_bm;
-
-  uint    _n_workers;
-
-  BitMap* _expected_region_bm;
-  BitMap* _expected_card_bm;
-
-  int  _failures;
-
-  HeapRegionClaimer _hrclaimer;
-
-public:
-  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
-                            BitMap* region_bm, BitMap* card_bm,
-                            BitMap* expected_region_bm, BitMap* expected_card_bm)
-    : AbstractGangTask("G1 verify final counting"),
-      _g1h(g1h), _cm(_g1h->concurrent_mark()),
-      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
-      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
-      _failures(0),
-      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
-    assert(VerifyDuringGC, "don't call this otherwise");
-    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
-    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
-  }
-
-  void work(uint worker_id) {
-    assert(worker_id < _n_workers, "invariant");
-
-    VerifyLiveObjectDataHRClosure verify_cl(_g1h,
-                                            _actual_region_bm, _actual_card_bm,
-                                            _expected_region_bm,
-                                            _expected_card_bm);
-
-    _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
-
-    Atomic::add(verify_cl.failures(), &_failures);
-  }
-
-  int failures() const { return _failures; }
-};
-
-// Closure that finalizes the liveness counting data.
-// Used during the cleanup pause.
-// Sets the bits corresponding to the interval [NTAMS, top]
-// (which contains the implicitly live objects) in the
-// card liveness bitmap. Also sets the bit for each region,
-// containing live data, in the region liveness bitmap.
-
-class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
- public:
-  FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
-                              BitMap* region_bm,
-                              BitMap* card_bm) :
-    CMCountDataClosureBase(g1h, region_bm, card_bm) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* ntams = hr->next_top_at_mark_start();
-    HeapWord* top   = hr->top();
-
-    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
-
-    // Mark the allocated-since-marking portion...
-    if (ntams < top) {
-      // This definitely means the region has live objects.
-      set_bit_for_region(hr);
-
-      // Now set the bits in the card bitmap for [ntams, top)
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
-
-      // Note: if we're looking at the last region in heap - top
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
-
-      assert(end_idx <= _card_bm->size(),
-             "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
-             end_idx, _card_bm->size());
-      assert(start_idx < _card_bm->size(),
-             "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
-             start_idx, _card_bm->size());
-
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-    }
-
-    // Set the bit for the region if it contains live data
-    if (hr->next_marked_bytes() > 0) {
-      set_bit_for_region(hr);
-    }
-
-    return false;
-  }
-};
-
-class G1ParFinalCountTask: public AbstractGangTask {
-protected:
-  G1CollectedHeap* _g1h;
-  ConcurrentMark* _cm;
-  BitMap* _actual_region_bm;
-  BitMap* _actual_card_bm;
-
-  uint    _n_workers;
-  HeapRegionClaimer _hrclaimer;
-
-public:
-  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
-    : AbstractGangTask("G1 final counting"),
-      _g1h(g1h), _cm(_g1h->concurrent_mark()),
-      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
-      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
-  }
-
-  void work(uint worker_id) {
-    assert(worker_id < _n_workers, "invariant");
-
-    FinalCountDataUpdateClosure final_update_cl(_g1h,
-                                                _actual_region_bm,
-                                                _actual_card_bm);
-
-    _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
-  }
-};
-
-class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
-  G1CollectedHeap* _g1;
-  size_t _freed_bytes;
-  FreeRegionList* _local_cleanup_list;
-  uint _old_regions_removed;
-  uint _humongous_regions_removed;
-  HRRSCleanupTask* _hrrs_cleanup_task;
-
-public:
-  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
-                             FreeRegionList* local_cleanup_list,
-                             HRRSCleanupTask* hrrs_cleanup_task) :
-    _g1(g1),
-    _freed_bytes(0),
-    _local_cleanup_list(local_cleanup_list),
-    _old_regions_removed(0),
-    _humongous_regions_removed(0),
-    _hrrs_cleanup_task(hrrs_cleanup_task) { }
-
-  size_t freed_bytes() { return _freed_bytes; }
-  const uint old_regions_removed() { return _old_regions_removed; }
-  const uint humongous_regions_removed() { return _humongous_regions_removed; }
-
-  bool doHeapRegion(HeapRegion *hr) {
-    if (hr->is_archive()) {
-      return false;
-    }
-    // We use a claim value of zero here because all regions
-    // were claimed with value 1 in the FinalCount task.
-    _g1->reset_gc_time_stamps(hr);
-    hr->note_end_of_marking();
-
-    if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
-      _freed_bytes += hr->used();
-      hr->set_containing_set(NULL);
-      if (hr->is_humongous()) {
-        _humongous_regions_removed++;
-        _g1->free_humongous_region(hr, _local_cleanup_list, true);
-      } else {
-        _old_regions_removed++;
-        _g1->free_region(hr, _local_cleanup_list, true);
-      }
-    } else {
-      hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
-    }
-
-    return false;
-  }
-};
-
-class G1ParNoteEndTask: public AbstractGangTask {
-  friend class G1NoteEndOfConcMarkClosure;
-
-protected:
-  G1CollectedHeap* _g1h;
-  FreeRegionList* _cleanup_list;
-  HeapRegionClaimer _hrclaimer;
-
-public:
-  G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
-      AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
-  }
-
-  void work(uint worker_id) {
-    FreeRegionList local_cleanup_list("Local Cleanup List");
-    HRRSCleanupTask hrrs_cleanup_task;
-    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
-                                           &hrrs_cleanup_task);
-    _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
-    assert(g1_note_end.complete(), "Shouldn't have yielded!");
-
-    // Now update the lists
-    _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
-    {
-      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-      _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
-
-      // If we iterate over the global cleanup list at the end of
-      // cleanup to do this printing we will not guarantee to only
-      // generate output for the newly-reclaimed regions (the list
-      // might not be empty at the beginning of cleanup; we might
-      // still be working on its previous contents). So we do the
-      // printing here, before we append the new regions to the global
-      // cleanup list.
-
-      G1HRPrinter* hr_printer = _g1h->hr_printer();
-      if (hr_printer->is_active()) {
-        FreeRegionListIterator iter(&local_cleanup_list);
-        while (iter.more_available()) {
-          HeapRegion* hr = iter.get_next();
-          hr_printer->cleanup(hr);
-        }
-      }
-
-      _cleanup_list->add_ordered(&local_cleanup_list);
-      assert(local_cleanup_list.is_empty(), "post-condition");
-
-      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
-    }
-  }
-};
-
-void ConcurrentMark::cleanup() {
-  // world is stopped at this checkpoint
-  assert(SafepointSynchronize::is_at_safepoint(),
-         "world should be stopped");
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // If a full collection has happened, we shouldn't do this.
-  if (has_aborted()) {
-    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
-    return;
-  }
-
-  g1h->verifier()->verify_region_sets_optional();
-
-  if (VerifyDuringGC) {
-    HandleMark hm;  // handle scope
-    g1h->prepare_for_verify();
-    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
-  }
-  g1h->verifier()->check_bitmaps("Cleanup Start");
-
-  G1CollectorPolicy* g1p = g1h->g1_policy();
-  g1p->record_concurrent_mark_cleanup_start();
-
-  double start = os::elapsedTime();
-
-  HeapRegionRemSet::reset_for_cleanup_tasks();
-
-  // Do counting once more with the world stopped for good measure.
-  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
-
-  g1h->workers()->run_task(&g1_par_count_task);
-
-  if (VerifyDuringGC) {
-    // Verify that the counting data accumulated during marking matches
-    // that calculated by walking the marking bitmap.
-
-    // Bitmaps to hold expected values
-    BitMap expected_region_bm(_region_bm.size(), true);
-    BitMap expected_card_bm(_card_bm.size(), true);
-
-    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
-                                                 &_region_bm,
-                                                 &_card_bm,
-                                                 &expected_region_bm,
-                                                 &expected_card_bm);
-
-    g1h->workers()->run_task(&g1_par_verify_task);
-
-    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
-  }
-
-  size_t start_used_bytes = g1h->used();
-  g1h->collector_state()->set_mark_in_progress(false);
-
-  double count_end = os::elapsedTime();
-  double this_final_counting_time = (count_end - start);
-  _total_counting_time += this_final_counting_time;
-
-  if (log_is_enabled(Trace, gc, liveness)) {
-    G1PrintRegionLivenessInfoClosure cl("Post-Marking");
-    _g1h->heap_region_iterate(&cl);
-  }
-
-  // Install newly created mark bitMap as "prev".
-  swapMarkBitMaps();
-
-  g1h->reset_gc_time_stamp();
-
-  uint n_workers = _g1h->workers()->active_workers();
-
-  // Note end of marking in all heap regions.
-  G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
-  g1h->workers()->run_task(&g1_par_note_end_task);
-  g1h->check_gc_time_stamps();
-
-  if (!cleanup_list_is_empty()) {
-    // The cleanup list is not empty, so we'll have to process it
-    // concurrently. Notify anyone else that might be wanting free
-    // regions that there will be more free regions coming soon.
-    g1h->set_free_regions_coming();
-  }
-
-  // call below, since it affects the metric by which we sort the heap
-  // regions.
-  if (G1ScrubRemSets) {
-    double rs_scrub_start = os::elapsedTime();
-    g1h->scrub_rem_set(&_region_bm, &_card_bm);
-    _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
-  }
-
-  // this will also free any regions totally full of garbage objects,
-  // and sort the regions.
-  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
-
-  // Statistics.
-  double end = os::elapsedTime();
-  _cleanup_times.add((end - start) * 1000.0);
-
-  // Clean up will have freed any regions completely full of garbage.
-  // Update the soft reference policy with the new heap occupancy.
-  Universe::update_heap_info_at_gc();
-
-  if (VerifyDuringGC) {
-    HandleMark hm;  // handle scope
-    g1h->prepare_for_verify();
-    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
-  }
-
-  g1h->verifier()->check_bitmaps("Cleanup End");
-
-  g1h->verifier()->verify_region_sets_optional();
-
-  // We need to make this be a "collection" so any collection pause that
-  // races with it goes around and waits for completeCleanup to finish.
-  g1h->increment_total_collections();
-
-  // Clean out dead classes and update Metaspace sizes.
-  if (ClassUnloadingWithConcurrentMark) {
-    ClassLoaderDataGraph::purge();
-  }
-  MetaspaceGC::compute_new_size();
-
-  // We reclaimed old regions so we should calculate the sizes to make
-  // sure we update the old gen/space data.
-  g1h->g1mm()->update_sizes();
-  g1h->allocation_context_stats().update_after_mark();
-
-  g1h->trace_heap_after_concurrent_cycle();
-}
-
-void ConcurrentMark::completeCleanup() {
-  if (has_aborted()) return;
-
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  _cleanup_list.verify_optional();
-  FreeRegionList tmp_free_list("Tmp Free List");
-
-  log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
-                                  "cleanup list has %u entries",
-                                  _cleanup_list.length());
-
-  // No one else should be accessing the _cleanup_list at this point,
-  // so it is not necessary to take any locks
-  while (!_cleanup_list.is_empty()) {
-    HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
-    assert(hr != NULL, "Got NULL from a non-empty list");
-    hr->par_clear();
-    tmp_free_list.add_ordered(hr);
-
-    // Instead of adding one region at a time to the secondary_free_list,
-    // we accumulate them in the local list and move them a few at a
-    // time. This also cuts down on the number of notify_all() calls
-    // we do during this process. We'll also append the local list when
-    // _cleanup_list is empty (which means we just removed the last
-    // region from the _cleanup_list).
-    if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
-        _cleanup_list.is_empty()) {
-      log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
-                                      "appending %u entries to the secondary_free_list, "
-                                      "cleanup list still has %u entries",
-                                      tmp_free_list.length(),
-                                      _cleanup_list.length());
-
-      {
-        MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
-        g1h->secondary_free_list_add(&tmp_free_list);
-        SecondaryFreeList_lock->notify_all();
-      }
-#ifndef PRODUCT
-      if (G1StressConcRegionFreeing) {
-        for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
-          os::sleep(Thread::current(), (jlong) 1, false);
-        }
-      }
-#endif
-    }
-  }
-  assert(tmp_free_list.is_empty(), "post-condition");
-}
-
-// Supporting Object and Oop closures for reference discovery
-// and processing in during marking
-
-bool G1CMIsAliveClosure::do_object_b(oop obj) {
-  HeapWord* addr = (HeapWord*)obj;
-  return addr != NULL &&
-         (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
-}
-
-// 'Keep Alive' oop closure used by both serial parallel reference processing.
-// Uses the CMTask associated with a worker thread (for serial reference
-// processing the CMTask for worker 0 is used) to preserve (mark) and
-// trace referent objects.
-//
-// Using the CMTask and embedded local queues avoids having the worker
-// threads operating on the global mark stack. This reduces the risk
-// of overflowing the stack - which we would rather avoid at this late
-// state. Also using the tasks' local queues removes the potential
-// of the workers interfering with each other that could occur if
-// operating on the global stack.
-
-class G1CMKeepAliveAndDrainClosure: public OopClosure {
-  ConcurrentMark* _cm;
-  CMTask*         _task;
-  int             _ref_counter_limit;
-  int             _ref_counter;
-  bool            _is_serial;
- public:
-  G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
-    _cm(cm), _task(task), _is_serial(is_serial),
-    _ref_counter_limit(G1RefProcDrainInterval) {
-    assert(_ref_counter_limit > 0, "sanity");
-    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
-    _ref_counter = _ref_counter_limit;
-  }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-
-  template <class T> void do_oop_work(T* p) {
-    if (!_cm->has_overflown()) {
-      oop obj = oopDesc::load_decode_heap_oop(p);
-      _task->deal_with_reference(obj);
-      _ref_counter--;
-
-      if (_ref_counter == 0) {
-        // We have dealt with _ref_counter_limit references, pushing them
-        // and objects reachable from them on to the local stack (and
-        // possibly the global stack). Call CMTask::do_marking_step() to
-        // process these entries.
-        //
-        // We call CMTask::do_marking_step() in a loop, which we'll exit if
-        // there's nothing more to do (i.e. we're done with the entries that
-        // were pushed as a result of the CMTask::deal_with_reference() calls
-        // above) or we overflow.
-        //
-        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
-        // flag while there may still be some work to do. (See the comment at
-        // the beginning of CMTask::do_marking_step() for those conditions -
-        // one of which is reaching the specified time target.) It is only
-        // when CMTask::do_marking_step() returns without setting the
-        // has_aborted() flag that the marking step has completed.
-        do {
-          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
-          _task->do_marking_step(mark_step_duration_ms,
-                                 false      /* do_termination */,
-                                 _is_serial);
-        } while (_task->has_aborted() && !_cm->has_overflown());
-        _ref_counter = _ref_counter_limit;
-      }
-    }
-  }
-};
-
-// 'Drain' oop closure used by both serial and parallel reference processing.
-// Uses the CMTask associated with a given worker thread (for serial
-// reference processing the CMtask for worker 0 is used). Calls the
-// do_marking_step routine, with an unbelievably large timeout value,
-// to drain the marking data structures of the remaining entries
-// added by the 'keep alive' oop closure above.
-
-class G1CMDrainMarkingStackClosure: public VoidClosure {
-  ConcurrentMark* _cm;
-  CMTask*         _task;
-  bool            _is_serial;
- public:
-  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
-    _cm(cm), _task(task), _is_serial(is_serial) {
-    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
-  }
-
-  void do_void() {
-    do {
-      // We call CMTask::do_marking_step() to completely drain the local
-      // and global marking stacks of entries pushed by the 'keep alive'
-      // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
-      //
-      // CMTask::do_marking_step() is called in a loop, which we'll exit
-      // if there's nothing more to do (i.e. we've completely drained the
-      // entries that were pushed as a a result of applying the 'keep alive'
-      // closure to the entries on the discovered ref lists) or we overflow
-      // the global marking stack.
-      //
-      // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
-      // flag while there may still be some work to do. (See the comment at
-      // the beginning of CMTask::do_marking_step() for those conditions -
-      // one of which is reaching the specified time target.) It is only
-      // when CMTask::do_marking_step() returns without setting the
-      // has_aborted() flag that the marking step has completed.
-
-      _task->do_marking_step(1000000000.0 /* something very large */,
-                             true         /* do_termination */,
-                             _is_serial);
-    } while (_task->has_aborted() && !_cm->has_overflown());
-  }
-};
-
-// Implementation of AbstractRefProcTaskExecutor for parallel
-// reference processing at the end of G1 concurrent marking
-
-class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
-private:
-  G1CollectedHeap* _g1h;
-  ConcurrentMark*  _cm;
-  WorkGang*        _workers;
-  uint             _active_workers;
-
-public:
-  G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
-                          ConcurrentMark* cm,
-                          WorkGang* workers,
-                          uint n_workers) :
-    _g1h(g1h), _cm(cm),
-    _workers(workers), _active_workers(n_workers) { }
-
-  // Executes the given task using concurrent marking worker threads.
-  virtual void execute(ProcessTask& task);
-  virtual void execute(EnqueueTask& task);
-};
-
-class G1CMRefProcTaskProxy: public AbstractGangTask {
-  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
-  ProcessTask&     _proc_task;
-  G1CollectedHeap* _g1h;
-  ConcurrentMark*  _cm;
-
-public:
-  G1CMRefProcTaskProxy(ProcessTask& proc_task,
-                     G1CollectedHeap* g1h,
-                     ConcurrentMark* cm) :
-    AbstractGangTask("Process reference objects in parallel"),
-    _proc_task(proc_task), _g1h(g1h), _cm(cm) {
-    ReferenceProcessor* rp = _g1h->ref_processor_cm();
-    assert(rp->processing_is_mt(), "shouldn't be here otherwise");
-  }
-
-  virtual void work(uint worker_id) {
-    ResourceMark rm;
-    HandleMark hm;
-    CMTask* task = _cm->task(worker_id);
-    G1CMIsAliveClosure g1_is_alive(_g1h);
-    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
-    G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
-
-    _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
-  }
-};
-
-void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
-  assert(_workers != NULL, "Need parallel worker threads.");
-  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
-
-  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
-
-  // We need to reset the concurrency level before each
-  // proxy task execution, so that the termination protocol
-  // and overflow handling in CMTask::do_marking_step() knows
-  // how many workers to wait for.
-  _cm->set_concurrency(_active_workers);
-  _workers->run_task(&proc_task_proxy);
-}
-
-class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
-  typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
-  EnqueueTask& _enq_task;
-
-public:
-  G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
-    AbstractGangTask("Enqueue reference objects in parallel"),
-    _enq_task(enq_task) { }
-
-  virtual void work(uint worker_id) {
-    _enq_task.work(worker_id);
-  }
-};
-
-void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
-  assert(_workers != NULL, "Need parallel worker threads.");
-  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
-
-  G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
-
-  // Not strictly necessary but...
-  //
-  // We need to reset the concurrency level before each
-  // proxy task execution, so that the termination protocol
-  // and overflow handling in CMTask::do_marking_step() knows
-  // how many workers to wait for.
-  _cm->set_concurrency(_active_workers);
-  _workers->run_task(&enq_task_proxy);
-}
-
-void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
-  G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
-}
-
-void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
-  if (has_overflown()) {
-    // Skip processing the discovered references if we have
-    // overflown the global marking stack. Reference objects
-    // only get discovered once so it is OK to not
-    // de-populate the discovered reference lists. We could have,
-    // but the only benefit would be that, when marking restarts,
-    // less reference objects are discovered.
-    return;
-  }
-
-  ResourceMark rm;
-  HandleMark   hm;
-
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // Is alive closure.
-  G1CMIsAliveClosure g1_is_alive(g1h);
-
-  // Inner scope to exclude the cleaning of the string and symbol
-  // tables from the displayed time.
-  {
-    GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm());
-
-    ReferenceProcessor* rp = g1h->ref_processor_cm();
-
-    // See the comment in G1CollectedHeap::ref_processing_init()
-    // about how reference processing currently works in G1.
-
-    // Set the soft reference policy
-    rp->setup_policy(clear_all_soft_refs);
-    assert(_markStack.isEmpty(), "mark stack should be empty");
-
-    // Instances of the 'Keep Alive' and 'Complete GC' closures used
-    // in serial reference processing. Note these closures are also
-    // used for serially processing (by the the current thread) the
-    // JNI references during parallel reference processing.
-    //
-    // These closures do not need to synchronize with the worker
-    // threads involved in parallel reference processing as these
-    // instances are executed serially by the current thread (e.g.
-    // reference processing is not multi-threaded and is thus
-    // performed by the current thread instead of a gang worker).
-    //
-    // The gang tasks involved in parallel reference processing create
-    // their own instances of these closures, which do their own
-    // synchronization among themselves.
-    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
-    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
-
-    // We need at least one active thread. If reference processing
-    // is not multi-threaded we use the current (VMThread) thread,
-    // otherwise we use the work gang from the G1CollectedHeap and
-    // we utilize all the worker threads we can.
-    bool processing_is_mt = rp->processing_is_mt();
-    uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
-    active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
-
-    // Parallel processing task executor.
-    G1CMRefProcTaskExecutor par_task_executor(g1h, this,
-                                              g1h->workers(), active_workers);
-    AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
-
-    // Set the concurrency level. The phase was already set prior to
-    // executing the remark task.
-    set_concurrency(active_workers);
-
-    // Set the degree of MT processing here.  If the discovery was done MT,
-    // the number of threads involved during discovery could differ from
-    // the number of active workers.  This is OK as long as the discovered
-    // Reference lists are balanced (see balance_all_queues() and balance_queues()).
-    rp->set_active_mt_degree(active_workers);
-
-    // Process the weak references.
-    const ReferenceProcessorStats& stats =
-        rp->process_discovered_references(&g1_is_alive,
-                                          &g1_keep_alive,
-                                          &g1_drain_mark_stack,
-                                          executor,
-                                          g1h->gc_timer_cm());
-    g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
-
-    // The do_oop work routines of the keep_alive and drain_marking_stack
-    // oop closures will set the has_overflown flag if we overflow the
-    // global marking stack.
-
-    assert(_markStack.overflow() || _markStack.isEmpty(),
-            "mark stack should be empty (unless it overflowed)");
-
-    if (_markStack.overflow()) {
-      // This should have been done already when we tried to push an
-      // entry on to the global mark stack. But let's do it again.
-      set_has_overflown();
-    }
-
-    assert(rp->num_q() == active_workers, "why not");
-
-    rp->enqueue_discovered_references(executor);
-
-    rp->verify_no_references_recorded();
-    assert(!rp->discovery_enabled(), "Post condition");
-  }
-
-  if (has_overflown()) {
-    // We can not trust g1_is_alive if the marking stack overflowed
-    return;
-  }
-
-  assert(_markStack.isEmpty(), "Marking should have completed");
-
-  // Unload Klasses, String, Symbols, Code Cache, etc.
-  {
-    GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
-
-    if (ClassUnloadingWithConcurrentMark) {
-      bool purged_classes;
-
-      {
-        GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
-        purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
-      }
-
-      {
-        GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
-        weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
-      }
-    }
-
-    if (G1StringDedup::is_enabled()) {
-      GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
-      G1StringDedup::unlink(&g1_is_alive);
-    }
-  }
-}
-
-void ConcurrentMark::swapMarkBitMaps() {
-  CMBitMapRO* temp = _prevMarkBitMap;
-  _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
-  _nextMarkBitMap  = (CMBitMap*)  temp;
-}
-
-// Closure for marking entries in SATB buffers.
-class CMSATBBufferClosure : public SATBBufferClosure {
-private:
-  CMTask* _task;
-  G1CollectedHeap* _g1h;
-
-  // This is very similar to CMTask::deal_with_reference, but with
-  // more relaxed requirements for the argument, so this must be more
-  // circumspect about treating the argument as an object.
-  void do_entry(void* entry) const {
-    _task->increment_refs_reached();
-    HeapRegion* hr = _g1h->heap_region_containing(entry);
-    if (entry < hr->next_top_at_mark_start()) {
-      // Until we get here, we don't know whether entry refers to a valid
-      // object; it could instead have been a stale reference.
-      oop obj = static_cast<oop>(entry);
-      assert(obj->is_oop(true /* ignore mark word */),
-             "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
-      _task->make_reference_grey(obj, hr);
-    }
-  }
-
-public:
-  CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
-    : _task(task), _g1h(g1h) { }
-
-  virtual void do_buffer(void** buffer, size_t size) {
-    for (size_t i = 0; i < size; ++i) {
-      do_entry(buffer[i]);
-    }
-  }
-};
-
-class G1RemarkThreadsClosure : public ThreadClosure {
-  CMSATBBufferClosure _cm_satb_cl;
-  G1CMOopClosure _cm_cl;
-  MarkingCodeBlobClosure _code_cl;
-  int _thread_parity;
-
- public:
-  G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
-    _cm_satb_cl(task, g1h),
-    _cm_cl(g1h, g1h->concurrent_mark(), task),
-    _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
-    _thread_parity(Threads::thread_claim_parity()) {}
-
-  void do_thread(Thread* thread) {
-    if (thread->is_Java_thread()) {
-      if (thread->claim_oops_do(true, _thread_parity)) {
-        JavaThread* jt = (JavaThread*)thread;
-
-        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
-        // however the liveness of oops reachable from nmethods have very complex lifecycles:
-        // * Alive if on the stack of an executing method
-        // * Weakly reachable otherwise
-        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
-        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
-        jt->nmethods_do(&_code_cl);
-
-        jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
-      }
-    } else if (thread->is_VM_thread()) {
-      if (thread->claim_oops_do(true, _thread_parity)) {
-        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
-      }
-    }
-  }
-};
-
-class CMRemarkTask: public AbstractGangTask {
-private:
-  ConcurrentMark* _cm;
-public:
-  void work(uint worker_id) {
-    // Since all available tasks are actually started, we should
-    // only proceed if we're supposed to be active.
-    if (worker_id < _cm->active_tasks()) {
-      CMTask* task = _cm->task(worker_id);
-      task->record_start_time();
-      {
-        ResourceMark rm;
-        HandleMark hm;
-
-        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
-        Threads::threads_do(&threads_f);
-      }
-
-      do {
-        task->do_marking_step(1000000000.0 /* something very large */,
-                              true         /* do_termination       */,
-                              false        /* is_serial            */);
-      } while (task->has_aborted() && !_cm->has_overflown());
-      // If we overflow, then we do not want to restart. We instead
-      // want to abort remark and do concurrent marking again.
-      task->record_end_time();
-    }
-  }
-
-  CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
-    AbstractGangTask("Par Remark"), _cm(cm) {
-    _cm->terminator()->reset_for_reuse(active_workers);
-  }
-};
-
-void ConcurrentMark::checkpointRootsFinalWork() {
-  ResourceMark rm;
-  HandleMark   hm;
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
-
-  g1h->ensure_parsability(false);
-
-  // this is remark, so we'll use up all active threads
-  uint active_workers = g1h->workers()->active_workers();
-  set_concurrency_and_phase(active_workers, false /* concurrent */);
-  // Leave _parallel_marking_threads at it's
-  // value originally calculated in the ConcurrentMark
-  // constructor and pass values of the active workers
-  // through the gang in the task.
-
-  {
-    StrongRootsScope srs(active_workers);
-
-    CMRemarkTask remarkTask(this, active_workers);
-    // We will start all available threads, even if we decide that the
-    // active_workers will be fewer. The extra ones will just bail out
-    // immediately.
-    g1h->workers()->run_task(&remarkTask);
-  }
-
-  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  guarantee(has_overflown() ||
-            satb_mq_set.completed_buffers_num() == 0,
-            "Invariant: has_overflown = %s, num buffers = %d",
-            BOOL_TO_STR(has_overflown()),
-            satb_mq_set.completed_buffers_num());
-
-  print_stats();
-}
-
-void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
-  // Note we are overriding the read-only view of the prev map here, via
-  // the cast.
-  ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
-}
-
-HeapRegion*
-ConcurrentMark::claim_region(uint worker_id) {
-  // "checkpoint" the finger
-  HeapWord* finger = _finger;
-
-  // _heap_end will not change underneath our feet; it only changes at
-  // yield points.
-  while (finger < _heap_end) {
-    assert(_g1h->is_in_g1_reserved(finger), "invariant");
-
-    HeapRegion* curr_region = _g1h->heap_region_containing(finger);
-
-    // Above heap_region_containing may return NULL as we always scan claim
-    // until the end of the heap. In this case, just jump to the next region.
-    HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
-
-    // Is the gap between reading the finger and doing the CAS too long?
-    HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
-    if (res == finger && curr_region != NULL) {
-      // we succeeded
-      HeapWord*   bottom        = curr_region->bottom();
-      HeapWord*   limit         = curr_region->next_top_at_mark_start();
-
-      // notice that _finger == end cannot be guaranteed here since,
-      // someone else might have moved the finger even further
-      assert(_finger >= end, "the finger should have moved forward");
-
-      if (limit > bottom) {
-        return curr_region;
-      } else {
-        assert(limit == bottom,
-               "the region limit should be at bottom");
-        // we return NULL and the caller should try calling
-        // claim_region() again.
-        return NULL;
-      }
-    } else {
-      assert(_finger > finger, "the finger should have moved forward");
-      // read it again
-      finger = _finger;
-    }
-  }
-
-  return NULL;
-}
-
-#ifndef PRODUCT
-class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
-private:
-  G1CollectedHeap* _g1h;
-  const char* _phase;
-  int _info;
-
-public:
-  VerifyNoCSetOops(const char* phase, int info = -1) :
-    _g1h(G1CollectedHeap::heap()),
-    _phase(phase),
-    _info(info)
-  { }
-
-  void operator()(oop obj) const {
-    guarantee(obj->is_oop(),
-              "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
-              p2i(obj), _phase, _info);
-    guarantee(!_g1h->obj_in_cs(obj),
-              "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
-              p2i(obj), _phase, _info);
-  }
-};
-
-void ConcurrentMark::verify_no_cset_oops() {
-  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
-  if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
-    return;
-  }
-
-  // Verify entries on the global mark stack
-  _markStack.iterate(VerifyNoCSetOops("Stack"));
-
-  // Verify entries on the task queues
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    CMTaskQueue* queue = _task_queues->queue(i);
-    queue->iterate(VerifyNoCSetOops("Queue", i));
-  }
-
-  // Verify the global finger
-  HeapWord* global_finger = finger();
-  if (global_finger != NULL && global_finger < _heap_end) {
-    // Since we always iterate over all regions, we might get a NULL HeapRegion
-    // here.
-    HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
-    guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
-              "global finger: " PTR_FORMAT " region: " HR_FORMAT,
-              p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
-  }
-
-  // Verify the task fingers
-  assert(parallel_marking_threads() <= _max_worker_id, "sanity");
-  for (uint i = 0; i < parallel_marking_threads(); ++i) {
-    CMTask* task = _tasks[i];
-    HeapWord* task_finger = task->finger();
-    if (task_finger != NULL && task_finger < _heap_end) {
-      // See above note on the global finger verification.
-      HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
-      guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
-                !task_hr->in_collection_set(),
-                "task finger: " PTR_FORMAT " region: " HR_FORMAT,
-                p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
-    }
-  }
-}
-#endif // PRODUCT
-
-// Aggregate the counting data that was constructed concurrently
-// with marking.
-class AggregateCountDataHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  ConcurrentMark* _cm;
-  CardTableModRefBS* _ct_bs;
-  BitMap* _cm_card_bm;
-  uint _max_worker_id;
-
- public:
-  AggregateCountDataHRClosure(G1CollectedHeap* g1h,
-                              BitMap* cm_card_bm,
-                              uint max_worker_id) :
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
-    _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* start = hr->bottom();
-    HeapWord* limit = hr->next_top_at_mark_start();
-    HeapWord* end = hr->end();
-
-    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
-           "Preconditions not met - "
-           "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
-           "top: " PTR_FORMAT ", end: " PTR_FORMAT,
-           p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
-
-    assert(hr->next_marked_bytes() == 0, "Precondition");
-
-    if (start == limit) {
-      // NTAMS of this region has not been set so nothing to do.
-      return false;
-    }
-
-    // 'start' should be in the heap.
-    assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
-    // 'end' *may* be just beyond the end of the heap (if hr is the last region)
-    assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
-
-    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
-    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
-    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
-
-    // If ntams is not card aligned then we bump card bitmap index
-    // for limit so that we get the all the cards spanned by
-    // the object ending at ntams.
-    // Note: if this is the last region in the heap then ntams
-    // could be actually just beyond the end of the the heap;
-    // limit_idx will then  correspond to a (non-existent) card
-    // that is also outside the heap.
-    if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
-      limit_idx += 1;
-    }
-
-    assert(limit_idx <= end_idx, "or else use atomics");
-
-    // Aggregate the "stripe" in the count data associated with hr.
-    uint hrm_index = hr->hrm_index();
-    size_t marked_bytes = 0;
-
-    for (uint i = 0; i < _max_worker_id; i += 1) {
-      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
-      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
-
-      // Fetch the marked_bytes in this region for task i and
-      // add it to the running total for this region.
-      marked_bytes += marked_bytes_array[hrm_index];
-
-      // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
-      // into the global card bitmap.
-      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
-
-      while (scan_idx < limit_idx) {
-        assert(task_card_bm->at(scan_idx) == true, "should be");
-        _cm_card_bm->set_bit(scan_idx);
-        assert(_cm_card_bm->at(scan_idx) == true, "should be");
-
-        // BitMap::get_next_one_offset() can handle the case when
-        // its left_offset parameter is greater than its right_offset
-        // parameter. It does, however, have an early exit if
-        // left_offset == right_offset. So let's limit the value
-        // passed in for left offset here.
-        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
-        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
-      }
-    }
-
-    // Update the marked bytes for this region.
-    hr->add_to_marked_bytes(marked_bytes);
-
-    // Next heap region
-    return false;
-  }
-};
-
-class G1AggregateCountDataTask: public AbstractGangTask {
-protected:
-  G1CollectedHeap* _g1h;
-  ConcurrentMark* _cm;
-  BitMap* _cm_card_bm;
-  uint _max_worker_id;
-  uint _active_workers;
-  HeapRegionClaimer _hrclaimer;
-
-public:
-  G1AggregateCountDataTask(G1CollectedHeap* g1h,
-                           ConcurrentMark* cm,
-                           BitMap* cm_card_bm,
-                           uint max_worker_id,
-                           uint n_workers) :
-      AbstractGangTask("Count Aggregation"),
-      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
-      _max_worker_id(max_worker_id),
-      _active_workers(n_workers),
-      _hrclaimer(_active_workers) {
-  }
-
-  void work(uint worker_id) {
-    AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
-
-    _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
-  }
-};
-
-
-void ConcurrentMark::aggregate_count_data() {
-  uint n_workers = _g1h->workers()->active_workers();
-
-  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
-                                           _max_worker_id, n_workers);
-
-  _g1h->workers()->run_task(&g1_par_agg_task);
-}
-
-// Clear the per-worker arrays used to store the per-region counting data
-void ConcurrentMark::clear_all_count_data() {
-  // Clear the global card bitmap - it will be filled during
-  // liveness count aggregation (during remark) and the
-  // final counting task.
-  _card_bm.clear();
-
-  // Clear the global region bitmap - it will be filled as part
-  // of the final counting task.
-  _region_bm.clear();
-
-  uint max_regions = _g1h->max_regions();
-  assert(_max_worker_id > 0, "uninitialized");
-
-  for (uint i = 0; i < _max_worker_id; i += 1) {
-    BitMap* task_card_bm = count_card_bitmap_for(i);
-    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
-
-    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
-    assert(marked_bytes_array != NULL, "uninitialized");
-
-    memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
-    task_card_bm->clear();
-  }
-}
-
-void ConcurrentMark::print_stats() {
-  if (!log_is_enabled(Debug, gc, stats)) {
-    return;
-  }
-  log_debug(gc, stats)("---------------------------------------------------------------------");
-  for (size_t i = 0; i < _active_tasks; ++i) {
-    _tasks[i]->print_stats();
-    log_debug(gc, stats)("---------------------------------------------------------------------");
-  }
-}
-
-// abandon current marking iteration due to a Full GC
-void ConcurrentMark::abort() {
-  if (!cmThread()->during_cycle() || _has_aborted) {
-    // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
-    return;
-  }
-
-  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
-  // concurrent bitmap clearing.
-  _nextMarkBitMap->clearAll();
-
-  // Note we cannot clear the previous marking bitmap here
-  // since VerifyDuringGC verifies the objects marked during
-  // a full GC against the previous bitmap.
-
-  // Clear the liveness counting data
-  clear_all_count_data();
-  // Empty mark stack
-  reset_marking_state();
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    _tasks[i]->clear_region_fields();
-  }
-  _first_overflow_barrier_sync.abort();
-  _second_overflow_barrier_sync.abort();
-  _has_aborted = true;
-
-  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  satb_mq_set.abandon_partial_marking();
-  // This can be called either during or outside marking, we'll read
-  // the expected_active value from the SATB queue set.
-  satb_mq_set.set_active_all_threads(
-                                 false, /* new active value */
-                                 satb_mq_set.is_active() /* expected_active */);
-
-  _g1h->trace_heap_after_concurrent_cycle();
-
-  // Close any open concurrent phase timing
-  register_concurrent_phase_end();
-
-  _g1h->register_concurrent_cycle_end();
-}
-
-static void print_ms_time_info(const char* prefix, const char* name,
-                               NumberSeq& ns) {
-  log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
-                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
-  if (ns.num() > 0) {
-    log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
-                           prefix, ns.sd(), ns.maximum());
-  }
-}
-
-void ConcurrentMark::print_summary_info() {
-  LogHandle(gc, marking) log;
-  if (!log.is_trace()) {
-    return;
-  }
-
-  log.trace(" Concurrent marking:");
-  print_ms_time_info("  ", "init marks", _init_times);
-  print_ms_time_info("  ", "remarks", _remark_times);
-  {
-    print_ms_time_info("     ", "final marks", _remark_mark_times);
-    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
-
-  }
-  print_ms_time_info("  ", "cleanups", _cleanup_times);
-  log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
-            _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
-  if (G1ScrubRemSets) {
-    log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
-              _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
-  }
-  log.trace("  Total stop_world time = %8.2f s.",
-            (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
-  log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
-            cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
-}
-
-void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
-  _parallel_workers->print_worker_threads_on(st);
-}
-
-void ConcurrentMark::print_on_error(outputStream* st) const {
-  st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
-      p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
-  _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
-  _nextMarkBitMap->print_on_error(st, " Next Bits: ");
-}
-
-// We take a break if someone is trying to stop the world.
-bool ConcurrentMark::do_yield_check(uint worker_id) {
-  if (SuspendibleThreadSet::should_yield()) {
-    if (worker_id == 0) {
-      _g1h->g1_policy()->record_concurrent_pause();
-    }
-    SuspendibleThreadSet::yield();
-    return true;
-  } else {
-    return false;
-  }
-}
-
-// Closure for iteration over bitmaps
-class CMBitMapClosure : public BitMapClosure {
-private:
-  // the bitmap that is being iterated over
-  CMBitMap*                   _nextMarkBitMap;
-  ConcurrentMark*             _cm;
-  CMTask*                     _task;
-
-public:
-  CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
-    _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
-
-  bool do_bit(size_t offset) {
-    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
-    assert(_nextMarkBitMap->isMarked(addr), "invariant");
-    assert( addr < _cm->finger(), "invariant");
-    assert(addr >= _task->finger(), "invariant");
-
-    // We move that task's local finger along.
-    _task->move_finger_to(addr);
-
-    _task->scan_object(oop(addr));
-    // we only partially drain the local queue and global stack
-    _task->drain_local_queue(true);
-    _task->drain_global_stack(true);
-
-    // if the has_aborted flag has been raised, we need to bail out of
-    // the iteration
-    return !_task->has_aborted();
-  }
-};
-
-static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
-  ReferenceProcessor* result = NULL;
-  if (G1UseConcMarkReferenceProcessing) {
-    result = g1h->ref_processor_cm();
-    assert(result != NULL, "should not be NULL");
-  }
-  return result;
-}
-
-G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
-                               ConcurrentMark* cm,
-                               CMTask* task)
-  : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
-    _g1h(g1h), _cm(cm), _task(task)
-{ }
-
-void CMTask::setup_for_region(HeapRegion* hr) {
-  assert(hr != NULL,
-        "claim_region() should have filtered out NULL regions");
-  _curr_region  = hr;
-  _finger       = hr->bottom();
-  update_region_limit();
-}
-
-void CMTask::update_region_limit() {
-  HeapRegion* hr            = _curr_region;
-  HeapWord* bottom          = hr->bottom();
-  HeapWord* limit           = hr->next_top_at_mark_start();
-
-  if (limit == bottom) {
-    // The region was collected underneath our feet.
-    // We set the finger to bottom to ensure that the bitmap
-    // iteration that will follow this will not do anything.
-    // (this is not a condition that holds when we set the region up,
-    // as the region is not supposed to be empty in the first place)
-    _finger = bottom;
-  } else if (limit >= _region_limit) {
-    assert(limit >= _finger, "peace of mind");
-  } else {
-    assert(limit < _region_limit, "only way to get here");
-    // This can happen under some pretty unusual circumstances.  An
-    // evacuation pause empties the region underneath our feet (NTAMS
-    // at bottom). We then do some allocation in the region (NTAMS
-    // stays at bottom), followed by the region being used as a GC
-    // alloc region (NTAMS will move to top() and the objects
-    // originally below it will be grayed). All objects now marked in
-    // the region are explicitly grayed, if below the global finger,
-    // and we do not need in fact to scan anything else. So, we simply
-    // set _finger to be limit to ensure that the bitmap iteration
-    // doesn't do anything.
-    _finger = limit;
-  }
-
-  _region_limit = limit;
-}
-
-void CMTask::giveup_current_region() {
-  assert(_curr_region != NULL, "invariant");
-  clear_region_fields();
-}
-
-void CMTask::clear_region_fields() {
-  // Values for these three fields that indicate that we're not
-  // holding on to a region.
-  _curr_region   = NULL;
-  _finger        = NULL;
-  _region_limit  = NULL;
-}
-
-void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
-  if (cm_oop_closure == NULL) {
-    assert(_cm_oop_closure != NULL, "invariant");
-  } else {
-    assert(_cm_oop_closure == NULL, "invariant");
-  }
-  _cm_oop_closure = cm_oop_closure;
-}
-
-void CMTask::reset(CMBitMap* nextMarkBitMap) {
-  guarantee(nextMarkBitMap != NULL, "invariant");
-  _nextMarkBitMap                = nextMarkBitMap;
-  clear_region_fields();
-
-  _calls                         = 0;
-  _elapsed_time_ms               = 0.0;
-  _termination_time_ms           = 0.0;
-  _termination_start_time_ms     = 0.0;
-}
-
-bool CMTask::should_exit_termination() {
-  regular_clock_call();
-  // This is called when we are in the termination protocol. We should
-  // quit if, for some reason, this task wants to abort or the global
-  // stack is not empty (this means that we can get work from it).
-  return !_cm->mark_stack_empty() || has_aborted();
-}
-
-void CMTask::reached_limit() {
-  assert(_words_scanned >= _words_scanned_limit ||
-         _refs_reached >= _refs_reached_limit ,
-         "shouldn't have been called otherwise");
-  regular_clock_call();
-}
-
-void CMTask::regular_clock_call() {
-  if (has_aborted()) return;
-
-  // First, we need to recalculate the words scanned and refs reached
-  // limits for the next clock call.
-  recalculate_limits();
-
-  // During the regular clock call we do the following
-
-  // (1) If an overflow has been flagged, then we abort.
-  if (_cm->has_overflown()) {
-    set_has_aborted();
-    return;
-  }
-
-  // If we are not concurrent (i.e. we're doing remark) we don't need
-  // to check anything else. The other steps are only needed during
-  // the concurrent marking phase.
-  if (!concurrent()) return;
-
-  // (2) If marking has been aborted for Full GC, then we also abort.
-  if (_cm->has_aborted()) {
-    set_has_aborted();
-    return;
-  }
-
-  double curr_time_ms = os::elapsedVTime() * 1000.0;
-
-  // (4) We check whether we should yield. If we have to, then we abort.
-  if (SuspendibleThreadSet::should_yield()) {
-    // We should yield. To do this we abort the task. The caller is
-    // responsible for yielding.
-    set_has_aborted();
-    return;
-  }
-
-  // (5) We check whether we've reached our time quota. If we have,
-  // then we abort.
-  double elapsed_time_ms = curr_time_ms - _start_time_ms;
-  if (elapsed_time_ms > _time_target_ms) {
-    set_has_aborted();
-    _has_timed_out = true;
-    return;
-  }
-
-  // (6) Finally, we check whether there are enough completed STAB
-  // buffers available for processing. If there are, we abort.
-  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
-    // we do need to process SATB buffers, we'll abort and restart
-    // the marking task to do so
-    set_has_aborted();
-    return;
-  }
-}
-
-void CMTask::recalculate_limits() {
-  _real_words_scanned_limit = _words_scanned + words_scanned_period;
-  _words_scanned_limit      = _real_words_scanned_limit;
-
-  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
-  _refs_reached_limit       = _real_refs_reached_limit;
-}
-
-void CMTask::decrease_limits() {
-  // This is called when we believe that we're going to do an infrequent
-  // operation which will increase the per byte scanned cost (i.e. move
-  // entries to/from the global stack). It basically tries to decrease the
-  // scanning limit so that the clock is called earlier.
-
-  _words_scanned_limit = _real_words_scanned_limit -
-    3 * words_scanned_period / 4;
-  _refs_reached_limit  = _real_refs_reached_limit -
-    3 * refs_reached_period / 4;
-}
-
-void CMTask::move_entries_to_global_stack() {
-  // local array where we'll store the entries that will be popped
-  // from the local queue
-  oop buffer[global_stack_transfer_size];
-
-  int n = 0;
-  oop obj;
-  while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
-    buffer[n] = obj;
-    ++n;
-  }
-
-  if (n > 0) {
-    // we popped at least one entry from the local queue
-
-    if (!_cm->mark_stack_push(buffer, n)) {
-      set_has_aborted();
-    }
-  }
-
-  // this operation was quite expensive, so decrease the limits
-  decrease_limits();
-}
-
-void CMTask::get_entries_from_global_stack() {
-  // local array where we'll store the entries that will be popped
-  // from the global stack.
-  oop buffer[global_stack_transfer_size];
-  int n;
-  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
-  assert(n <= global_stack_transfer_size,
-         "we should not pop more than the given limit");
-  if (n > 0) {
-    // yes, we did actually pop at least one entry
-    for (int i = 0; i < n; ++i) {
-      bool success = _task_queue->push(buffer[i]);
-      // We only call this when the local queue is empty or under a
-      // given target limit. So, we do not expect this push to fail.
-      assert(success, "invariant");
-    }
-  }
-
-  // this operation was quite expensive, so decrease the limits
-  decrease_limits();
-}
-
-void CMTask::drain_local_queue(bool partially) {
-  if (has_aborted()) return;
-
-  // Decide what the target size is, depending whether we're going to
-  // drain it partially (so that other tasks can steal if they run out
-  // of things to do) or totally (at the very end).
-  size_t target_size;
-  if (partially) {
-    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
-  } else {
-    target_size = 0;
-  }
-
-  if (_task_queue->size() > target_size) {
-    oop obj;
-    bool ret = _task_queue->pop_local(obj);
-    while (ret) {
-      assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
-      assert(!_g1h->is_on_master_free_list(
-                  _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
-
-      scan_object(obj);
-
-      if (_task_queue->size() <= target_size || has_aborted()) {
-        ret = false;
-      } else {
-        ret = _task_queue->pop_local(obj);
-      }
-    }
-  }
-}
-
-void CMTask::drain_global_stack(bool partially) {
-  if (has_aborted()) return;
-
-  // We have a policy to drain the local queue before we attempt to
-  // drain the global stack.
-  assert(partially || _task_queue->size() == 0, "invariant");
-
-  // Decide what the target size is, depending whether we're going to
-  // drain it partially (so that other tasks can steal if they run out
-  // of things to do) or totally (at the very end).  Notice that,
-  // because we move entries from the global stack in chunks or
-  // because another task might be doing the same, we might in fact
-  // drop below the target. But, this is not a problem.
-  size_t target_size;
-  if (partially) {
-    target_size = _cm->partial_mark_stack_size_target();
-  } else {
-    target_size = 0;
-  }
-
-  if (_cm->mark_stack_size() > target_size) {
-    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
-      get_entries_from_global_stack();
-      drain_local_queue(partially);
-    }
-  }
-}
-
-// SATB Queue has several assumptions on whether to call the par or
-// non-par versions of the methods. this is why some of the code is
-// replicated. We should really get rid of the single-threaded version
-// of the code to simplify things.
-void CMTask::drain_satb_buffers() {
-  if (has_aborted()) return;
-
-  // We set this so that the regular clock knows that we're in the
-  // middle of draining buffers and doesn't set the abort flag when it
-  // notices that SATB buffers are available for draining. It'd be
-  // very counter productive if it did that. :-)
-  _draining_satb_buffers = true;
-
-  CMSATBBufferClosure satb_cl(this, _g1h);
-  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-
-  // This keeps claiming and applying the closure to completed buffers
-  // until we run out of buffers or we need to abort.
-  while (!has_aborted() &&
-         satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
-    regular_clock_call();
-  }
-
-  _draining_satb_buffers = false;
-
-  assert(has_aborted() ||
-         concurrent() ||
-         satb_mq_set.completed_buffers_num() == 0, "invariant");
-
-  // again, this was a potentially expensive operation, decrease the
-  // limits to get the regular clock call early
-  decrease_limits();
-}
-
-void CMTask::print_stats() {
-  log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
-                       _worker_id, _calls);
-  log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
-                       _elapsed_time_ms, _termination_time_ms);
-  log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
-                       _step_times_ms.num(), _step_times_ms.avg(),
-                       _step_times_ms.sd());
-  log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
-                       _step_times_ms.maximum(), _step_times_ms.sum());
-}
-
-bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
-  return _task_queues->steal(worker_id, hash_seed, obj);
-}
-
-/*****************************************************************************
-
-    The do_marking_step(time_target_ms, ...) method is the building
-    block of the parallel marking framework. It can be called in parallel
-    with other invocations of do_marking_step() on different tasks
-    (but only one per task, obviously) and concurrently with the
-    mutator threads, or during remark, hence it eliminates the need
-    for two versions of the code. When called during remark, it will
-    pick up from where the task left off during the concurrent marking
-    phase. Interestingly, tasks are also claimable during evacuation
-    pauses too, since do_marking_step() ensures that it aborts before
-    it needs to yield.
-
-    The data structures that it uses to do marking work are the
-    following:
-
-      (1) Marking Bitmap. If there are gray objects that appear only
-      on the bitmap (this happens either when dealing with an overflow
-      or when the initial marking phase has simply marked the roots
-      and didn't push them on the stack), then tasks claim heap
-      regions whose bitmap they then scan to find gray objects. A
-      global finger indicates where the end of the last claimed region
-      is. A local finger indicates how far into the region a task has
-      scanned. The two fingers are used to determine how to gray an
-      object (i.e. whether simply marking it is OK, as it will be
-      visited by a task in the future, or whether it needs to be also
-      pushed on a stack).
-
-      (2) Local Queue. The local queue of the task which is accessed
-      reasonably efficiently by the task. Other tasks can steal from
-      it when they run out of work. Throughout the marking phase, a
-      task attempts to keep its local queue short but not totally
-      empty, so that entries are available for stealing by other
-      tasks. Only when there is no more work, a task will totally
-      drain its local queue.
-
-      (3) Global Mark Stack. This handles local queue overflow. During
-      marking only sets of entries are moved between it and the local
-      queues, as access to it requires a mutex and more fine-grain
-      interaction with it which might cause contention. If it
-      overflows, then the marking phase should restart and iterate
-      over the bitmap to identify gray objects. Throughout the marking
-      phase, tasks attempt to keep the global mark stack at a small
-      length but not totally empty, so that entries are available for
-      popping by other tasks. Only when there is no more work, tasks
-      will totally drain the global mark stack.
-
-      (4) SATB Buffer Queue. This is where completed SATB buffers are
-      made available. Buffers are regularly removed from this queue
-      and scanned for roots, so that the queue doesn't get too
-      long. During remark, all completed buffers are processed, as
-      well as the filled in parts of any uncompleted buffers.
-
-    The do_marking_step() method tries to abort when the time target
-    has been reached. There are a few other cases when the
-    do_marking_step() method also aborts:
-
-      (1) When the marking phase has been aborted (after a Full GC).
-
-      (2) When a global overflow (on the global stack) has been
-      triggered. Before the task aborts, it will actually sync up with
-      the other tasks to ensure that all the marking data structures
-      (local queues, stacks, fingers etc.)  are re-initialized so that
-      when do_marking_step() completes, the marking phase can
-      immediately restart.
-
-      (3) When enough completed SATB buffers are available. The
-      do_marking_step() method only tries to drain SATB buffers right
-      at the beginning. So, if enough buffers are available, the
-      marking step aborts and the SATB buffers are processed at
-      the beginning of the next invocation.
-
-      (4) To yield. when we have to yield then we abort and yield
-      right at the end of do_marking_step(). This saves us from a lot
-      of hassle as, by yielding we might allow a Full GC. If this
-      happens then objects will be compacted underneath our feet, the
-      heap might shrink, etc. We save checking for this by just
-      aborting and doing the yield right at the end.
-
-    From the above it follows that the do_marking_step() method should
-    be called in a loop (or, otherwise, regularly) until it completes.
-
-    If a marking step completes without its has_aborted() flag being
-    true, it means it has completed the current marking phase (and
-    also all other marking tasks have done so and have all synced up).
-
-    A method called regular_clock_call() is invoked "regularly" (in
-    sub ms intervals) throughout marking. It is this clock method that
-    checks all the abort conditions which were mentioned above and
-    decides when the task should abort. A work-based scheme is used to
-    trigger this clock method: when the number of object words the
-    marking phase has scanned or the number of references the marking
-    phase has visited reach a given limit. Additional invocations to
-    the method clock have been planted in a few other strategic places
-    too. The initial reason for the clock method was to avoid calling
-    vtime too regularly, as it is quite expensive. So, once it was in
-    place, it was natural to piggy-back all the other conditions on it
-    too and not constantly check them throughout the code.
-
-    If do_termination is true then do_marking_step will enter its
-    termination protocol.
-
-    The value of is_serial must be true when do_marking_step is being
-    called serially (i.e. by the VMThread) and do_marking_step should
-    skip any synchronization in the termination and overflow code.
-    Examples include the serial remark code and the serial reference
-    processing closures.
-
-    The value of is_serial must be false when do_marking_step is
-    being called by any of the worker threads in a work gang.
-    Examples include the concurrent marking code (CMMarkingTask),
-    the MT remark code, and the MT reference processing closures.
-
- *****************************************************************************/
-
-void CMTask::do_marking_step(double time_target_ms,
-                             bool do_termination,
-                             bool is_serial) {
-  assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
-  assert(concurrent() == _cm->concurrent(), "they should be the same");
-
-  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
-  assert(_task_queues != NULL, "invariant");
-  assert(_task_queue != NULL, "invariant");
-  assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
-
-  assert(!_claimed,
-         "only one thread should claim this task at any one time");
-
-  // OK, this doesn't safeguard again all possible scenarios, as it is
-  // possible for two threads to set the _claimed flag at the same
-  // time. But it is only for debugging purposes anyway and it will
-  // catch most problems.
-  _claimed = true;
-
-  _start_time_ms = os::elapsedVTime() * 1000.0;
-
-  // If do_stealing is true then do_marking_step will attempt to
-  // steal work from the other CMTasks. It only makes sense to
-  // enable stealing when the termination protocol is enabled
-  // and do_marking_step() is not being called serially.
-  bool do_stealing = do_termination && !is_serial;
-
-  double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
-  _time_target_ms = time_target_ms - diff_prediction_ms;
-
-  // set up the variables that are used in the work-based scheme to
-  // call the regular clock method
-  _words_scanned = 0;
-  _refs_reached  = 0;
-  recalculate_limits();
-
-  // clear all flags
-  clear_has_aborted();
-  _has_timed_out = false;
-  _draining_satb_buffers = false;
-
-  ++_calls;
-
-  // Set up the bitmap and oop closures. Anything that uses them is
-  // eventually called from this method, so it is OK to allocate these
-  // statically.
-  CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
-  G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
-  set_cm_oop_closure(&cm_oop_closure);
-
-  if (_cm->has_overflown()) {
-    // This can happen if the mark stack overflows during a GC pause
-    // and this task, after a yield point, restarts. We have to abort
-    // as we need to get into the overflow protocol which happens
-    // right at the end of this task.
-    set_has_aborted();
-  }
-
-  // First drain any available SATB buffers. After this, we will not
-  // look at SATB buffers before the next invocation of this method.
-  // If enough completed SATB buffers are queued up, the regular clock
-  // will abort this task so that it restarts.
-  drain_satb_buffers();
-  // ...then partially drain the local queue and the global stack
-  drain_local_queue(true);
-  drain_global_stack(true);
-
-  do {
-    if (!has_aborted() && _curr_region != NULL) {
-      // This means that we're already holding on to a region.
-      assert(_finger != NULL, "if region is not NULL, then the finger "
-             "should not be NULL either");
-
-      // We might have restarted this task after an evacuation pause
-      // which might have evacuated the region we're holding on to
-      // underneath our feet. Let's read its limit again to make sure
-      // that we do not iterate over a region of the heap that
-      // contains garbage (update_region_limit() will also move
-      // _finger to the start of the region if it is found empty).
-      update_region_limit();
-      // We will start from _finger not from the start of the region,
-      // as we might be restarting this task after aborting half-way
-      // through scanning this region. In this case, _finger points to
-      // the address where we last found a marked object. If this is a
-      // fresh region, _finger points to start().
-      MemRegion mr = MemRegion(_finger, _region_limit);
-
-      assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
-             "humongous regions should go around loop once only");
-
-      // Some special cases:
-      // If the memory region is empty, we can just give up the region.
-      // If the current region is humongous then we only need to check
-      // the bitmap for the bit associated with the start of the object,
-      // scan the object if it's live, and give up the region.
-      // Otherwise, let's iterate over the bitmap of the part of the region
-      // that is left.
-      // If the iteration is successful, give up the region.
-      if (mr.is_empty()) {
-        giveup_current_region();
-        regular_clock_call();
-      } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
-        if (_nextMarkBitMap->isMarked(mr.start())) {
-          // The object is marked - apply the closure
-          BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
-          bitmap_closure.do_bit(offset);
-        }
-        // Even if this task aborted while scanning the humongous object
-        // we can (and should) give up the current region.
-        giveup_current_region();
-        regular_clock_call();
-      } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
-        giveup_current_region();
-        regular_clock_call();
-      } else {
-        assert(has_aborted(), "currently the only way to do so");
-        // The only way to abort the bitmap iteration is to return
-        // false from the do_bit() method. However, inside the
-        // do_bit() method we move the _finger to point to the
-        // object currently being looked at. So, if we bail out, we
-        // have definitely set _finger to something non-null.
-        assert(_finger != NULL, "invariant");
-
-        // Region iteration was actually aborted. So now _finger
-        // points to the address of the object we last scanned. If we
-        // leave it there, when we restart this task, we will rescan
-        // the object. It is easy to avoid this. We move the finger by
-        // enough to point to the next possible object header (the
-        // bitmap knows by how much we need to move it as it knows its
-        // granularity).
-        assert(_finger < _region_limit, "invariant");
-        HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
-        // Check if bitmap iteration was aborted while scanning the last object
-        if (new_finger >= _region_limit) {
-          giveup_current_region();
-        } else {
-          move_finger_to(new_finger);
-        }
-      }
-    }
-    // At this point we have either completed iterating over the
-    // region we were holding on to, or we have aborted.
-
-    // We then partially drain the local queue and the global stack.
-    // (Do we really need this?)
-    drain_local_queue(true);
-    drain_global_stack(true);
-
-    // Read the note on the claim_region() method on why it might
-    // return NULL with potentially more regions available for
-    // claiming and why we have to check out_of_regions() to determine
-    // whether we're done or not.
-    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
-      // We are going to try to claim a new region. We should have
-      // given up on the previous one.
-      // Separated the asserts so that we know which one fires.
-      assert(_curr_region  == NULL, "invariant");
-      assert(_finger       == NULL, "invariant");
-      assert(_region_limit == NULL, "invariant");
-      HeapRegion* claimed_region = _cm->claim_region(_worker_id);
-      if (claimed_region != NULL) {
-        // Yes, we managed to claim one
-        setup_for_region(claimed_region);
-        assert(_curr_region == claimed_region, "invariant");
-      }
-      // It is important to call the regular clock here. It might take
-      // a while to claim a region if, for example, we hit a large
-      // block of empty regions. So we need to call the regular clock
-      // method once round the loop to make sure it's called
-      // frequently enough.
-      regular_clock_call();
-    }
-
-    if (!has_aborted() && _curr_region == NULL) {
-      assert(_cm->out_of_regions(),
-             "at this point we should be out of regions");
-    }
-  } while ( _curr_region != NULL && !has_aborted());
-
-  if (!has_aborted()) {
-    // We cannot check whether the global stack is empty, since other
-    // tasks might be pushing objects to it concurrently.
-    assert(_cm->out_of_regions(),
-           "at this point we should be out of regions");
-    // Try to reduce the number of available SATB buffers so that
-    // remark has less work to do.
-    drain_satb_buffers();
-  }
-
-  // Since we've done everything else, we can now totally drain the
-  // local queue and global stack.
-  drain_local_queue(false);
-  drain_global_stack(false);
-
-  // Attempt at work stealing from other task's queues.
-  if (do_stealing && !has_aborted()) {
-    // We have not aborted. This means that we have finished all that
-    // we could. Let's try to do some stealing...
-
-    // We cannot check whether the global stack is empty, since other
-    // tasks might be pushing objects to it concurrently.
-    assert(_cm->out_of_regions() && _task_queue->size() == 0,
-           "only way to reach here");
-    while (!has_aborted()) {
-      oop obj;
-      if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
-        assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
-               "any stolen object should be marked");
-        scan_object(obj);
-
-        // And since we're towards the end, let's totally drain the
-        // local queue and global stack.
-        drain_local_queue(false);
-        drain_global_stack(false);
-      } else {
-        break;
-      }
-    }
-  }
-
-  // We still haven't aborted. Now, let's try to get into the
-  // termination protocol.
-  if (do_termination && !has_aborted()) {
-    // We cannot check whether the global stack is empty, since other
-    // tasks might be concurrently pushing objects on it.
-    // Separated the asserts so that we know which one fires.
-    assert(_cm->out_of_regions(), "only way to reach here");
-    assert(_task_queue->size() == 0, "only way to reach here");
-    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
-
-    // The CMTask class also extends the TerminatorTerminator class,
-    // hence its should_exit_termination() method will also decide
-    // whether to exit the termination protocol or not.
-    bool finished = (is_serial ||
-                     _cm->terminator()->offer_termination(this));
-    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
-    _termination_time_ms +=
-      termination_end_time_ms - _termination_start_time_ms;
-
-    if (finished) {
-      // We're all done.
-
-      if (_worker_id == 0) {
-        // let's allow task 0 to do this
-        if (concurrent()) {
-          assert(_cm->concurrent_marking_in_progress(), "invariant");
-          // we need to set this to false before the next
-          // safepoint. This way we ensure that the marking phase
-          // doesn't observe any more heap expansions.
-          _cm->clear_concurrent_marking_in_progress();
-        }
-      }
-
-      // We can now guarantee that the global stack is empty, since
-      // all other tasks have finished. We separated the guarantees so
-      // that, if a condition is false, we can immediately find out
-      // which one.
-      guarantee(_cm->out_of_regions(), "only way to reach here");
-      guarantee(_cm->mark_stack_empty(), "only way to reach here");
-      guarantee(_task_queue->size() == 0, "only way to reach here");
-      guarantee(!_cm->has_overflown(), "only way to reach here");
-      guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
-    } else {
-      // Apparently there's more work to do. Let's abort this task. It
-      // will restart it and we can hopefully find more things to do.
-      set_has_aborted();
-    }
-  }
-
-  // Mainly for debugging purposes to make sure that a pointer to the
-  // closure which was statically allocated in this frame doesn't
-  // escape it by accident.
-  set_cm_oop_closure(NULL);
-  double end_time_ms = os::elapsedVTime() * 1000.0;
-  double elapsed_time_ms = end_time_ms - _start_time_ms;
-  // Update the step history.
-  _step_times_ms.add(elapsed_time_ms);
-
-  if (has_aborted()) {
-    // The task was aborted for some reason.
-    if (_has_timed_out) {
-      double diff_ms = elapsed_time_ms - _time_target_ms;
-      // Keep statistics of how well we did with respect to hitting
-      // our target only if we actually timed out (if we aborted for
-      // other reasons, then the results might get skewed).
-      _marking_step_diffs_ms.add(diff_ms);
-    }
-
-    if (_cm->has_overflown()) {
-      // This is the interesting one. We aborted because a global
-      // overflow was raised. This means we have to restart the
-      // marking phase and start iterating over regions. However, in
-      // order to do this we have to make sure that all tasks stop
-      // what they are doing and re-initialize in a safe manner. We
-      // will achieve this with the use of two barrier sync points.
-
-      if (!is_serial) {
-        // We only need to enter the sync barrier if being called
-        // from a parallel context
-        _cm->enter_first_sync_barrier(_worker_id);
-
-        // When we exit this sync barrier we know that all tasks have
-        // stopped doing marking work. So, it's now safe to
-        // re-initialize our data structures. At the end of this method,
-        // task 0 will clear the global data structures.
-      }
-
-      // We clear the local state of this task...
-      clear_region_fields();
-
-      if (!is_serial) {
-        // ...and enter the second barrier.
-        _cm->enter_second_sync_barrier(_worker_id);
-      }
-      // At this point, if we're during the concurrent phase of
-      // marking, everything has been re-initialized and we're
-      // ready to restart.
-    }
-  }
-
-  _claimed = false;
-}
-
-CMTask::CMTask(uint worker_id,
-               ConcurrentMark* cm,
-               size_t* marked_bytes,
-               BitMap* card_bm,
-               CMTaskQueue* task_queue,
-               CMTaskQueueSet* task_queues)
-  : _g1h(G1CollectedHeap::heap()),
-    _worker_id(worker_id), _cm(cm),
-    _claimed(false),
-    _nextMarkBitMap(NULL), _hash_seed(17),
-    _task_queue(task_queue),
-    _task_queues(task_queues),
-    _cm_oop_closure(NULL),
-    _marked_bytes_array(marked_bytes),
-    _card_bm(card_bm) {
-  guarantee(task_queue != NULL, "invariant");
-  guarantee(task_queues != NULL, "invariant");
-
-  _marking_step_diffs_ms.add(0.5);
-}
-
-// These are formatting macros that are used below to ensure
-// consistent formatting. The *_H_* versions are used to format the
-// header for a particular value and they should be kept consistent
-// with the corresponding macro. Also note that most of the macros add
-// the necessary white space (as a prefix) which makes them a bit
-// easier to compose.
-
-// All the output lines are prefixed with this string to be able to
-// identify them easily in a large log file.
-#define G1PPRL_LINE_PREFIX            "###"
-
-#define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
-#ifdef _LP64
-#define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
-#else // _LP64
-#define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
-#endif // _LP64
-
-// For per-region info
-#define G1PPRL_TYPE_FORMAT            "   %-4s"
-#define G1PPRL_TYPE_H_FORMAT          "   %4s"
-#define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
-#define G1PPRL_BYTE_H_FORMAT          "  %9s"
-#define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
-#define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
-
-// For summary info
-#define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
-#define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
-#define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
-#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
-
-G1PrintRegionLivenessInfoClosure::
-G1PrintRegionLivenessInfoClosure(const char* phase_name)
-  : _total_used_bytes(0), _total_capacity_bytes(0),
-    _total_prev_live_bytes(0), _total_next_live_bytes(0),
-    _hum_used_bytes(0), _hum_capacity_bytes(0),
-    _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
-    _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  MemRegion g1_reserved = g1h->g1_reserved();
-  double now = os::elapsedTime();
-
-  // Print the header of the output.
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
-                          G1PPRL_SUM_ADDR_FORMAT("reserved")
-                          G1PPRL_SUM_BYTE_FORMAT("region-size"),
-                          p2i(g1_reserved.start()), p2i(g1_reserved.end()),
-                          HeapRegion::GrainBytes);
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
-                          G1PPRL_TYPE_H_FORMAT
-                          G1PPRL_ADDR_BASE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_DOUBLE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT,
-                          "type", "address-range",
-                          "used", "prev-live", "next-live", "gc-eff",
-                          "remset", "code-roots");
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
-                          G1PPRL_TYPE_H_FORMAT
-                          G1PPRL_ADDR_BASE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_DOUBLE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT
-                          G1PPRL_BYTE_H_FORMAT,
-                          "", "",
-                          "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
-                          "(bytes)", "(bytes)");
-}
-
-// It takes as a parameter a reference to one of the _hum_* fields, it
-// deduces the corresponding value for a region in a humongous region
-// series (either the region size, or what's left if the _hum_* field
-// is < the region size), and updates the _hum_* field accordingly.
-size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
-  size_t bytes = 0;
-  // The > 0 check is to deal with the prev and next live bytes which
-  // could be 0.
-  if (*hum_bytes > 0) {
-    bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
-    *hum_bytes -= bytes;
-  }
-  return bytes;
-}
-
-// It deduces the values for a region in a humongous region series
-// from the _hum_* fields and updates those accordingly. It assumes
-// that that _hum_* fields have already been set up from the "starts
-// humongous" region and we visit the regions in address order.
-void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
-                                                     size_t* capacity_bytes,
-                                                     size_t* prev_live_bytes,
-                                                     size_t* next_live_bytes) {
-  assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
-  *used_bytes      = get_hum_bytes(&_hum_used_bytes);
-  *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
-  *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
-  *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
-}
-
-bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
-  const char* type       = r->get_type_str();
-  HeapWord* bottom       = r->bottom();
-  HeapWord* end          = r->end();
-  size_t capacity_bytes  = r->capacity();
-  size_t used_bytes      = r->used();
-  size_t prev_live_bytes = r->live_bytes();
-  size_t next_live_bytes = r->next_live_bytes();
-  double gc_eff          = r->gc_efficiency();
-  size_t remset_bytes    = r->rem_set()->mem_size();
-  size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
-
-  if (r->is_starts_humongous()) {
-    assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
-           _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
-           "they should have been zeroed after the last time we used them");
-    // Set up the _hum_* fields.
-    _hum_capacity_bytes  = capacity_bytes;
-    _hum_used_bytes      = used_bytes;
-    _hum_prev_live_bytes = prev_live_bytes;
-    _hum_next_live_bytes = next_live_bytes;
-    get_hum_bytes(&used_bytes, &capacity_bytes,
-                  &prev_live_bytes, &next_live_bytes);
-    end = bottom + HeapRegion::GrainWords;
-  } else if (r->is_continues_humongous()) {
-    get_hum_bytes(&used_bytes, &capacity_bytes,
-                  &prev_live_bytes, &next_live_bytes);
-    assert(end == bottom + HeapRegion::GrainWords, "invariant");
-  }
-
-  _total_used_bytes      += used_bytes;
-  _total_capacity_bytes  += capacity_bytes;
-  _total_prev_live_bytes += prev_live_bytes;
-  _total_next_live_bytes += next_live_bytes;
-  _total_remset_bytes    += remset_bytes;
-  _total_strong_code_roots_bytes += strong_code_roots_bytes;
-
-  // Print a line for this particular region.
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
-                          G1PPRL_TYPE_FORMAT
-                          G1PPRL_ADDR_BASE_FORMAT
-                          G1PPRL_BYTE_FORMAT
-                          G1PPRL_BYTE_FORMAT
-                          G1PPRL_BYTE_FORMAT
-                          G1PPRL_DOUBLE_FORMAT
-                          G1PPRL_BYTE_FORMAT
-                          G1PPRL_BYTE_FORMAT,
-                          type, p2i(bottom), p2i(end),
-                          used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
-                          remset_bytes, strong_code_roots_bytes);
-
-  return false;
-}
-
-G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
-  // add static memory usages to remembered set sizes
-  _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
-  // Print the footer of the output.
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
-  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
-                         " SUMMARY"
-                         G1PPRL_SUM_MB_FORMAT("capacity")
-                         G1PPRL_SUM_MB_PERC_FORMAT("used")
-                         G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
-                         G1PPRL_SUM_MB_PERC_FORMAT("next-live")
-                         G1PPRL_SUM_MB_FORMAT("remset")
-                         G1PPRL_SUM_MB_FORMAT("code-roots"),
-                         bytes_to_mb(_total_capacity_bytes),
-                         bytes_to_mb(_total_used_bytes),
-                         perc(_total_used_bytes, _total_capacity_bytes),
-                         bytes_to_mb(_total_prev_live_bytes),
-                         perc(_total_prev_live_bytes, _total_capacity_bytes),
-                         bytes_to_mb(_total_next_live_bytes),
-                         perc(_total_next_live_bytes, _total_capacity_bytes),
-                         bytes_to_mb(_total_remset_bytes),
-                         bytes_to_mb(_total_strong_code_roots_bytes));
-}
--- a/hotspot/src/share/vm/gc/g1/concurrentMark.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1024 +0,0 @@
-/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_G1_CONCURRENTMARK_HPP
-#define SHARE_VM_GC_G1_CONCURRENTMARK_HPP
-
-#include "classfile/javaClasses.hpp"
-#include "gc/g1/g1RegionToSpaceMapper.hpp"
-#include "gc/g1/heapRegionSet.hpp"
-#include "gc/shared/taskqueue.hpp"
-
-class G1CollectedHeap;
-class CMBitMap;
-class CMTask;
-class ConcurrentMark;
-typedef GenericTaskQueue<oop, mtGC>            CMTaskQueue;
-typedef GenericTaskQueueSet<CMTaskQueue, mtGC> CMTaskQueueSet;
-
-// Closure used by CM during concurrent reference discovery
-// and reference processing (during remarking) to determine
-// if a particular object is alive. It is primarily used
-// to determine if referents of discovered reference objects
-// are alive. An instance is also embedded into the
-// reference processor as the _is_alive_non_header field
-class G1CMIsAliveClosure: public BoolObjectClosure {
-  G1CollectedHeap* _g1;
- public:
-  G1CMIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { }
-
-  bool do_object_b(oop obj);
-};
-
-// A generic CM bit map.  This is essentially a wrapper around the BitMap
-// class, with one bit per (1<<_shifter) HeapWords.
-
-class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
- protected:
-  HeapWord* _bmStartWord;      // base address of range covered by map
-  size_t    _bmWordSize;       // map size (in #HeapWords covered)
-  const int _shifter;          // map to char or bit
-  BitMap    _bm;               // the bit map itself
-
- public:
-  // constructor
-  CMBitMapRO(int shifter);
-
-  // inquiries
-  HeapWord* startWord()   const { return _bmStartWord; }
-  // the following is one past the last word in space
-  HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
-
-  // read marks
-
-  bool isMarked(HeapWord* addr) const {
-    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
-           "outside underlying space?");
-    return _bm.at(heapWordToOffset(addr));
-  }
-
-  // iteration
-  inline bool iterate(BitMapClosure* cl, MemRegion mr);
-
-  // Return the address corresponding to the next marked bit at or after
-  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
-  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
-  HeapWord* getNextMarkedWordAddress(const HeapWord* addr,
-                                     const HeapWord* limit = NULL) const;
-
-  // conversion utilities
-  HeapWord* offsetToHeapWord(size_t offset) const {
-    return _bmStartWord + (offset << _shifter);
-  }
-  size_t heapWordToOffset(const HeapWord* addr) const {
-    return pointer_delta(addr, _bmStartWord) >> _shifter;
-  }
-
-  // The argument addr should be the start address of a valid object
-  inline HeapWord* nextObject(HeapWord* addr);
-
-  void print_on_error(outputStream* st, const char* prefix) const;
-
-  // debugging
-  NOT_PRODUCT(bool covers(MemRegion rs) const;)
-};
-
-class CMBitMapMappingChangedListener : public G1MappingChangedListener {
- private:
-  CMBitMap* _bm;
- public:
-  CMBitMapMappingChangedListener() : _bm(NULL) {}
-
-  void set_bitmap(CMBitMap* bm) { _bm = bm; }
-
-  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
-};
-
-class CMBitMap : public CMBitMapRO {
- private:
-  CMBitMapMappingChangedListener _listener;
-
- public:
-  static size_t compute_size(size_t heap_size);
-  // Returns the amount of bytes on the heap between two marks in the bitmap.
-  static size_t mark_distance();
-  // Returns how many bytes (or bits) of the heap a single byte (or bit) of the
-  // mark bitmap corresponds to. This is the same as the mark distance above.
-  static size_t heap_map_factor() {
-    return mark_distance();
-  }
-
-  CMBitMap() : CMBitMapRO(LogMinObjAlignment), _listener() { _listener.set_bitmap(this); }
-
-  // Initializes the underlying BitMap to cover the given area.
-  void initialize(MemRegion heap, G1RegionToSpaceMapper* storage);
-
-  // Write marks.
-  inline void mark(HeapWord* addr);
-  inline void clear(HeapWord* addr);
-  inline bool parMark(HeapWord* addr);
-
-  void clearRange(MemRegion mr);
-
-  // Clear the whole mark bitmap.
-  void clearAll();
-};
-
-// Represents a marking stack used by ConcurrentMarking in the G1 collector.
-class CMMarkStack VALUE_OBJ_CLASS_SPEC {
-  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
-  ConcurrentMark* _cm;
-  oop* _base;        // bottom of stack
-  jint _index;       // one more than last occupied index
-  jint _capacity;    // max #elements
-  jint _saved_index; // value of _index saved at start of GC
-
-  bool  _overflow;
-  bool  _should_expand;
-
- public:
-  CMMarkStack(ConcurrentMark* cm);
-  ~CMMarkStack();
-
-  bool allocate(size_t capacity);
-
-  // Pushes the first "n" elements of "ptr_arr" on the stack.
-  // Locking impl: concurrency is allowed only with
-  // "par_push_arr" and/or "par_pop_arr" operations, which use the same
-  // locking strategy.
-  void par_push_arr(oop* ptr_arr, int n);
-
-  // If returns false, the array was empty.  Otherwise, removes up to "max"
-  // elements from the stack, and transfers them to "ptr_arr" in an
-  // unspecified order.  The actual number transferred is given in "n" ("n
-  // == 0" is deliberately redundant with the return value.)  Locking impl:
-  // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
-  // operations, which use the same locking strategy.
-  bool par_pop_arr(oop* ptr_arr, int max, int* n);
-
-  bool isEmpty()    { return _index == 0; }
-  int  maxElems()   { return _capacity; }
-
-  bool overflow() { return _overflow; }
-  void clear_overflow() { _overflow = false; }
-
-  bool should_expand() const { return _should_expand; }
-  void set_should_expand();
-
-  // Expand the stack, typically in response to an overflow condition
-  void expand();
-
-  int  size() { return _index; }
-
-  void setEmpty()   { _index = 0; clear_overflow(); }
-
-  // Record the current index.
-  void note_start_of_gc();
-
-  // Make sure that we have not added any entries to the stack during GC.
-  void note_end_of_gc();
-
-  // Apply fn to each oop in the mark stack, up to the bound recorded
-  // via one of the above "note" functions.  The mark stack must not
-  // be modified while iterating.
-  template<typename Fn> void iterate(Fn fn);
-};
-
-class YoungList;
-
-// Root Regions are regions that are not empty at the beginning of a
-// marking cycle and which we might collect during an evacuation pause
-// while the cycle is active. Given that, during evacuation pauses, we
-// do not copy objects that are explicitly marked, what we have to do
-// for the root regions is to scan them and mark all objects reachable
-// from them. According to the SATB assumptions, we only need to visit
-// each object once during marking. So, as long as we finish this scan
-// before the next evacuation pause, we can copy the objects from the
-// root regions without having to mark them or do anything else to them.
-//
-// Currently, we only support root region scanning once (at the start
-// of the marking cycle) and the root regions are all the survivor
-// regions populated during the initial-mark pause.
-class CMRootRegions VALUE_OBJ_CLASS_SPEC {
-private:
-  YoungList*           _young_list;
-  ConcurrentMark*      _cm;
-
-  volatile bool        _scan_in_progress;
-  volatile bool        _should_abort;
-  HeapRegion* volatile _next_survivor;
-
-public:
-  CMRootRegions();
-  // We actually do most of the initialization in this method.
-  void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
-
-  // Reset the claiming / scanning of the root regions.
-  void prepare_for_scan();
-
-  // Forces get_next() to return NULL so that the iteration aborts early.
-  void abort() { _should_abort = true; }
-
-  // Return true if the CM thread are actively scanning root regions,
-  // false otherwise.
-  bool scan_in_progress() { return _scan_in_progress; }
-
-  // Claim the next root region to scan atomically, or return NULL if
-  // all have been claimed.
-  HeapRegion* claim_next();
-
-  // Flag that we're done with root region scanning and notify anyone
-  // who's waiting on it. If aborted is false, assume that all regions
-  // have been claimed.
-  void scan_finished();
-
-  // If CM threads are still scanning root regions, wait until they
-  // are done. Return true if we had to wait, false otherwise.
-  bool wait_until_scan_finished();
-};
-
-class ConcurrentMarkThread;
-
-class ConcurrentMark: public CHeapObj<mtGC> {
-  friend class CMMarkStack;
-  friend class ConcurrentMarkThread;
-  friend class CMTask;
-  friend class CMBitMapClosure;
-  friend class CMRemarkTask;
-  friend class CMConcurrentMarkingTask;
-  friend class G1ParNoteEndTask;
-  friend class CalcLiveObjectsClosure;
-  friend class G1CMRefProcTaskProxy;
-  friend class G1CMRefProcTaskExecutor;
-  friend class G1CMKeepAliveAndDrainClosure;
-  friend class G1CMDrainMarkingStackClosure;
-
-protected:
-  ConcurrentMarkThread* _cmThread;   // The thread doing the work
-  G1CollectedHeap*      _g1h;        // The heap
-  uint                  _parallel_marking_threads; // The number of marking
-                                                   // threads we're using
-  uint                  _max_parallel_marking_threads; // Max number of marking
-                                                       // threads we'll ever use
-  double                _sleep_factor; // How much we have to sleep, with
-                                       // respect to the work we just did, to
-                                       // meet the marking overhead goal
-  double                _marking_task_overhead; // Marking target overhead for
-                                                // a single task
-
-  FreeRegionList        _cleanup_list;
-
-  // Concurrent marking support structures
-  CMBitMap                _markBitMap1;
-  CMBitMap                _markBitMap2;
-  CMBitMapRO*             _prevMarkBitMap; // Completed mark bitmap
-  CMBitMap*               _nextMarkBitMap; // Under-construction mark bitmap
-
-  BitMap                  _region_bm;
-  BitMap                  _card_bm;
-
-  // Heap bounds
-  HeapWord*               _heap_start;
-  HeapWord*               _heap_end;
-
-  // Root region tracking and claiming
-  CMRootRegions           _root_regions;
-
-  // For gray objects
-  CMMarkStack             _markStack; // Grey objects behind global finger
-  HeapWord* volatile      _finger;  // The global finger, region aligned,
-                                    // always points to the end of the
-                                    // last claimed region
-
-  // Marking tasks
-  uint                    _max_worker_id;// Maximum worker id
-  uint                    _active_tasks; // Task num currently active
-  CMTask**                _tasks;        // Task queue array (max_worker_id len)
-  CMTaskQueueSet*         _task_queues;  // Task queue set
-  ParallelTaskTerminator  _terminator;   // For termination
-
-  // Two sync barriers that are used to synchronize tasks when an
-  // overflow occurs. The algorithm is the following. All tasks enter
-  // the first one to ensure that they have all stopped manipulating
-  // the global data structures. After they exit it, they re-initialize
-  // their data structures and task 0 re-initializes the global data
-  // structures. Then, they enter the second sync barrier. This
-  // ensure, that no task starts doing work before all data
-  // structures (local and global) have been re-initialized. When they
-  // exit it, they are free to start working again.
-  WorkGangBarrierSync     _first_overflow_barrier_sync;
-  WorkGangBarrierSync     _second_overflow_barrier_sync;
-
-  // This is set by any task, when an overflow on the global data
-  // structures is detected
-  volatile bool           _has_overflown;
-  // True: marking is concurrent, false: we're in remark
-  volatile bool           _concurrent;
-  // Set at the end of a Full GC so that marking aborts
-  volatile bool           _has_aborted;
-
-  // Used when remark aborts due to an overflow to indicate that
-  // another concurrent marking phase should start
-  volatile bool           _restart_for_overflow;
-
-  // This is true from the very start of concurrent marking until the
-  // point when all the tasks complete their work. It is really used
-  // to determine the points between the end of concurrent marking and
-  // time of remark.
-  volatile bool           _concurrent_marking_in_progress;
-
-  // Keep track of whether we have started concurrent phase or not.
-  bool                    _concurrent_phase_started;
-
-  // All of these times are in ms
-  NumberSeq _init_times;
-  NumberSeq _remark_times;
-  NumberSeq _remark_mark_times;
-  NumberSeq _remark_weak_ref_times;
-  NumberSeq _cleanup_times;
-  double    _total_counting_time;
-  double    _total_rs_scrub_time;
-
-  double*   _accum_task_vtime;   // Accumulated task vtime
-
-  WorkGang* _parallel_workers;
-
-  void weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes);
-  void weakRefsWork(bool clear_all_soft_refs);
-
-  void swapMarkBitMaps();
-
-  // It resets the global marking data structures, as well as the
-  // task local ones; should be called during initial mark.
-  void reset();
-
-  // Resets all the marking data structures. Called when we have to restart
-  // marking or when marking completes (via set_non_marking_state below).
-  void reset_marking_state(bool clear_overflow = true);
-
-  // We do this after we're done with marking so that the marking data
-  // structures are initialized to a sensible and predictable state.
-  void set_non_marking_state();
-
-  // Called to indicate how many threads are currently active.
-  void set_concurrency(uint active_tasks);
-
-  // It should be called to indicate which phase we're in (concurrent
-  // mark or remark) and how many threads are currently active.
-  void set_concurrency_and_phase(uint active_tasks, bool concurrent);
-
-  // Prints all gathered CM-related statistics
-  void print_stats();
-
-  bool cleanup_list_is_empty() {
-    return _cleanup_list.is_empty();
-  }
-
-  // Accessor methods
-  uint parallel_marking_threads() const     { return _parallel_marking_threads; }
-  uint max_parallel_marking_threads() const { return _max_parallel_marking_threads;}
-  double sleep_factor()                     { return _sleep_factor; }
-  double marking_task_overhead()            { return _marking_task_overhead;}
-
-  HeapWord*               finger()          { return _finger;   }
-  bool                    concurrent()      { return _concurrent; }
-  uint                    active_tasks()    { return _active_tasks; }
-  ParallelTaskTerminator* terminator()      { return &_terminator; }
-
-  // It claims the next available region to be scanned by a marking
-  // task/thread. It might return NULL if the next region is empty or
-  // we have run out of regions. In the latter case, out_of_regions()
-  // determines whether we've really run out of regions or the task
-  // should call claim_region() again. This might seem a bit
-  // awkward. Originally, the code was written so that claim_region()
-  // either successfully returned with a non-empty region or there
-  // were no more regions to be claimed. The problem with this was
-  // that, in certain circumstances, it iterated over large chunks of
-  // the heap finding only empty regions and, while it was working, it
-  // was preventing the calling task to call its regular clock
-  // method. So, this way, each task will spend very little time in
-  // claim_region() and is allowed to call the regular clock method
-  // frequently.
-  HeapRegion* claim_region(uint worker_id);
-
-  // It determines whether we've run out of regions to scan. Note that
-  // the finger can point past the heap end in case the heap was expanded
-  // to satisfy an allocation without doing a GC. This is fine, because all
-  // objects in those regions will be considered live anyway because of
-  // SATB guarantees (i.e. their TAMS will be equal to bottom).
-  bool        out_of_regions() { return _finger >= _heap_end; }
-
-  // Returns the task with the given id
-  CMTask* task(int id) {
-    assert(0 <= id && id < (int) _active_tasks,
-           "task id not within active bounds");
-    return _tasks[id];
-  }
-
-  // Returns the task queue with the given id
-  CMTaskQueue* task_queue(int id) {
-    assert(0 <= id && id < (int) _active_tasks,
-           "task queue id not within active bounds");
-    return (CMTaskQueue*) _task_queues->queue(id);
-  }
-
-  // Returns the task queue set
-  CMTaskQueueSet* task_queues()  { return _task_queues; }
-
-  // Access / manipulation of the overflow flag which is set to
-  // indicate that the global stack has overflown
-  bool has_overflown()           { return _has_overflown; }
-  void set_has_overflown()       { _has_overflown = true; }
-  void clear_has_overflown()     { _has_overflown = false; }
-  bool restart_for_overflow()    { return _restart_for_overflow; }
-
-  // Methods to enter the two overflow sync barriers
-  void enter_first_sync_barrier(uint worker_id);
-  void enter_second_sync_barrier(uint worker_id);
-
-  // Live Data Counting data structures...
-  // These data structures are initialized at the start of
-  // marking. They are written to while marking is active.
-  // They are aggregated during remark; the aggregated values
-  // are then used to populate the _region_bm, _card_bm, and
-  // the total live bytes, which are then subsequently updated
-  // during cleanup.
-
-  // An array of bitmaps (one bit map per task). Each bitmap
-  // is used to record the cards spanned by the live objects
-  // marked by that task/worker.
-  BitMap*  _count_card_bitmaps;
-
-  // Used to record the number of marked live bytes
-  // (for each region, by worker thread).
-  size_t** _count_marked_bytes;
-
-  // Card index of the bottom of the G1 heap. Used for biasing indices into
-  // the card bitmaps.
-  intptr_t _heap_bottom_card_num;
-
-  // Set to true when initialization is complete
-  bool _completed_initialization;
-
-public:
-  // Manipulation of the global mark stack.
-  // The push and pop operations are used by tasks for transfers
-  // between task-local queues and the global mark stack, and use
-  // locking for concurrency safety.
-  bool mark_stack_push(oop* arr, int n) {
-    _markStack.par_push_arr(arr, n);
-    if (_markStack.overflow()) {
-      set_has_overflown();
-      return false;
-    }
-    return true;
-  }
-  void mark_stack_pop(oop* arr, int max, int* n) {
-    _markStack.par_pop_arr(arr, max, n);
-  }
-  size_t mark_stack_size()                { return _markStack.size(); }
-  size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
-  bool mark_stack_overflow()              { return _markStack.overflow(); }
-  bool mark_stack_empty()                 { return _markStack.isEmpty(); }
-
-  CMRootRegions* root_regions() { return &_root_regions; }
-
-  bool concurrent_marking_in_progress() {
-    return _concurrent_marking_in_progress;
-  }
-  void set_concurrent_marking_in_progress() {
-    _concurrent_marking_in_progress = true;
-  }
-  void clear_concurrent_marking_in_progress() {
-    _concurrent_marking_in_progress = false;
-  }
-
-  void register_concurrent_phase_start(const char* title);
-  void register_concurrent_phase_end();
-
-  void update_accum_task_vtime(int i, double vtime) {
-    _accum_task_vtime[i] += vtime;
-  }
-
-  double all_task_accum_vtime() {
-    double ret = 0.0;
-    for (uint i = 0; i < _max_worker_id; ++i)
-      ret += _accum_task_vtime[i];
-    return ret;
-  }
-
-  // Attempts to steal an object from the task queues of other tasks
-  bool try_stealing(uint worker_id, int* hash_seed, oop& obj);
-
-  ConcurrentMark(G1CollectedHeap* g1h,
-                 G1RegionToSpaceMapper* prev_bitmap_storage,
-                 G1RegionToSpaceMapper* next_bitmap_storage);
-  ~ConcurrentMark();
-
-  ConcurrentMarkThread* cmThread() { return _cmThread; }
-
-  CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
-  CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
-
-  // Returns the number of GC threads to be used in a concurrent
-  // phase based on the number of GC threads being used in a STW
-  // phase.
-  uint scale_parallel_threads(uint n_par_threads);
-
-  // Calculates the number of GC threads to be used in a concurrent phase.
-  uint calc_parallel_marking_threads();
-
-  // The following three are interaction between CM and
-  // G1CollectedHeap
-
-  // This notifies CM that a root during initial-mark needs to be
-  // grayed. It is MT-safe. word_size is the size of the object in
-  // words. It is passed explicitly as sometimes we cannot calculate
-  // it from the given object because it might be in an inconsistent
-  // state (e.g., in to-space and being copied). So the caller is
-  // responsible for dealing with this issue (e.g., get the size from
-  // the from-space image when the to-space image might be
-  // inconsistent) and always passing the size. hr is the region that
-  // contains the object and it's passed optionally from callers who
-  // might already have it (no point in recalculating it).
-  inline void grayRoot(oop obj,
-                       size_t word_size,
-                       uint worker_id,
-                       HeapRegion* hr = NULL);
-
-  // Clear the next marking bitmap (will be called concurrently).
-  void clearNextBitmap();
-
-  // Return whether the next mark bitmap has no marks set. To be used for assertions
-  // only. Will not yield to pause requests.
-  bool nextMarkBitmapIsClear();
-
-  // These two do the work that needs to be done before and after the
-  // initial root checkpoint. Since this checkpoint can be done at two
-  // different points (i.e. an explicit pause or piggy-backed on a
-  // young collection), then it's nice to be able to easily share the
-  // pre/post code. It might be the case that we can put everything in
-  // the post method. TP
-  void checkpointRootsInitialPre();
-  void checkpointRootsInitialPost();
-
-  // Scan all the root regions and mark everything reachable from
-  // them.
-  void scanRootRegions();
-
-  // Scan a single root region and mark everything reachable from it.
-  void scanRootRegion(HeapRegion* hr, uint worker_id);
-
-  // Do concurrent phase of marking, to a tentative transitive closure.
-  void markFromRoots();
-
-  void checkpointRootsFinal(bool clear_all_soft_refs);
-  void checkpointRootsFinalWork();
-  void cleanup();
-  void completeCleanup();
-
-  // Mark in the previous bitmap.  NB: this is usually read-only, so use
-  // this carefully!
-  inline void markPrev(oop p);
-
-  // Clears marks for all objects in the given range, for the prev or
-  // next bitmaps.  NB: the previous bitmap is usually
-  // read-only, so use this carefully!
-  void clearRangePrevBitmap(MemRegion mr);
-
-  // Notify data structures that a GC has started.
-  void note_start_of_gc() {
-    _markStack.note_start_of_gc();
-  }
-
-  // Notify data structures that a GC is finished.
-  void note_end_of_gc() {
-    _markStack.note_end_of_gc();
-  }
-
-  // Verify that there are no CSet oops on the stacks (taskqueues /
-  // global mark stack) and fingers (global / per-task).
-  // If marking is not in progress, it's a no-op.
-  void verify_no_cset_oops() PRODUCT_RETURN;
-
-  inline bool isPrevMarked(oop p) const;
-
-  inline bool do_yield_check(uint worker_i = 0);
-
-  // Called to abort the marking cycle after a Full GC takes place.
-  void abort();
-
-  bool has_aborted()      { return _has_aborted; }
-
-  void print_summary_info();
-
-  void print_worker_threads_on(outputStream* st) const;
-
-  void print_on_error(outputStream* st) const;
-
-  // Liveness counting
-
-  // Utility routine to set an exclusive range of cards on the given
-  // card liveness bitmap
-  inline void set_card_bitmap_range(BitMap* card_bm,
-                                    BitMap::idx_t start_idx,
-                                    BitMap::idx_t end_idx,
-                                    bool is_par);
-
-  // Returns the card number of the bottom of the G1 heap.
-  // Used in biasing indices into accounting card bitmaps.
-  intptr_t heap_bottom_card_num() const {
-    return _heap_bottom_card_num;
-  }
-
-  // Returns the card bitmap for a given task or worker id.
-  BitMap* count_card_bitmap_for(uint worker_id) {
-    assert(worker_id < _max_worker_id, "oob");
-    assert(_count_card_bitmaps != NULL, "uninitialized");
-    BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
-    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
-    return task_card_bm;
-  }
-
-  // Returns the array containing the marked bytes for each region,
-  // for the given worker or task id.
-  size_t* count_marked_bytes_array_for(uint worker_id) {
-    assert(worker_id < _max_worker_id, "oob");
-    assert(_count_marked_bytes != NULL, "uninitialized");
-    size_t* marked_bytes_array = _count_marked_bytes[worker_id];
-    assert(marked_bytes_array != NULL, "uninitialized");
-    return marked_bytes_array;
-  }
-
-  // Returns the index in the liveness accounting card table bitmap
-  // for the given address
-  inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
-
-  // Counts the size of the given memory region in the the given
-  // marked_bytes array slot for the given HeapRegion.
-  // Sets the bits in the given card bitmap that are associated with the
-  // cards that are spanned by the memory region.
-  inline void count_region(MemRegion mr,
-                           HeapRegion* hr,
-                           size_t* marked_bytes_array,
-                           BitMap* task_card_bm);
-
-  // Counts the given object in the given task/worker counting
-  // data structures.
-  inline void count_object(oop obj,
-                           HeapRegion* hr,
-                           size_t* marked_bytes_array,
-                           BitMap* task_card_bm,
-                           size_t word_size);
-
-  // Attempts to mark the given object and, if successful, counts
-  // the object in the given task/worker counting structures.
-  inline bool par_mark_and_count(oop obj,
-                                 HeapRegion* hr,
-                                 size_t* marked_bytes_array,
-                                 BitMap* task_card_bm);
-
-  // Attempts to mark the given object and, if successful, counts
-  // the object in the task/worker counting structures for the
-  // given worker id.
-  inline bool par_mark_and_count(oop obj,
-                                 size_t word_size,
-                                 HeapRegion* hr,
-                                 uint worker_id);
-
-  // Returns true if initialization was successfully completed.
-  bool completed_initialization() const {
-    return _completed_initialization;
-  }
-
-protected:
-  // Clear all the per-task bitmaps and arrays used to store the
-  // counting data.
-  void clear_all_count_data();
-
-  // Aggregates the counting data for each worker/task
-  // that was constructed while marking. Also sets
-  // the amount of marked bytes for each region and
-  // the top at concurrent mark count.
-  void aggregate_count_data();
-
-  // Verification routine
-  void verify_count_data();
-};
-
-// A class representing a marking task.
-class CMTask : public TerminatorTerminator {
-private:
-  enum PrivateConstants {
-    // the regular clock call is called once the scanned words reaches
-    // this limit
-    words_scanned_period          = 12*1024,
-    // the regular clock call is called once the number of visited
-    // references reaches this limit
-    refs_reached_period           = 384,
-    // initial value for the hash seed, used in the work stealing code
-    init_hash_seed                = 17,
-    // how many entries will be transferred between global stack and
-    // local queues
-    global_stack_transfer_size    = 16
-  };
-
-  uint                        _worker_id;
-  G1CollectedHeap*            _g1h;
-  ConcurrentMark*             _cm;
-  CMBitMap*                   _nextMarkBitMap;
-  // the task queue of this task
-  CMTaskQueue*                _task_queue;
-private:
-  // the task queue set---needed for stealing
-  CMTaskQueueSet*             _task_queues;
-  // indicates whether the task has been claimed---this is only  for
-  // debugging purposes
-  bool                        _claimed;
-
-  // number of calls to this task
-  int                         _calls;
-
-  // when the virtual timer reaches this time, the marking step should
-  // exit
-  double                      _time_target_ms;
-  // the start time of the current marking step
-  double                      _start_time_ms;
-
-  // the oop closure used for iterations over oops
-  G1CMOopClosure*             _cm_oop_closure;
-
-  // the region this task is scanning, NULL if we're not scanning any
-  HeapRegion*                 _curr_region;
-  // the local finger of this task, NULL if we're not scanning a region
-  HeapWord*                   _finger;
-  // limit of the region this task is scanning, NULL if we're not scanning one
-  HeapWord*                   _region_limit;
-
-  // the number of words this task has scanned
-  size_t                      _words_scanned;
-  // When _words_scanned reaches this limit, the regular clock is
-  // called. Notice that this might be decreased under certain
-  // circumstances (i.e. when we believe that we did an expensive
-  // operation).
-  size_t                      _words_scanned_limit;
-  // the initial value of _words_scanned_limit (i.e. what it was
-  // before it was decreased).
-  size_t                      _real_words_scanned_limit;
-
-  // the number of references this task has visited
-  size_t                      _refs_reached;
-  // When _refs_reached reaches this limit, the regular clock is
-  // called. Notice this this might be decreased under certain
-  // circumstances (i.e. when we believe that we did an expensive
-  // operation).
-  size_t                      _refs_reached_limit;
-  // the initial value of _refs_reached_limit (i.e. what it was before
-  // it was decreased).
-  size_t                      _real_refs_reached_limit;
-
-  // used by the work stealing stuff
-  int                         _hash_seed;
-  // if this is true, then the task has aborted for some reason
-  bool                        _has_aborted;
-  // set when the task aborts because it has met its time quota
-  bool                        _has_timed_out;
-  // true when we're draining SATB buffers; this avoids the task
-  // aborting due to SATB buffers being available (as we're already
-  // dealing with them)
-  bool                        _draining_satb_buffers;
-
-  // number sequence of past step times
-  NumberSeq                   _step_times_ms;
-  // elapsed time of this task
-  double                      _elapsed_time_ms;
-  // termination time of this task
-  double                      _termination_time_ms;
-  // when this task got into the termination protocol
-  double                      _termination_start_time_ms;
-
-  // true when the task is during a concurrent phase, false when it is
-  // in the remark phase (so, in the latter case, we do not have to
-  // check all the things that we have to check during the concurrent
-  // phase, i.e. SATB buffer availability...)
-  bool                        _concurrent;
-
-  TruncatedSeq                _marking_step_diffs_ms;
-
-  // Counting data structures. Embedding the task's marked_bytes_array
-  // and card bitmap into the actual task saves having to go through
-  // the ConcurrentMark object.
-  size_t*                     _marked_bytes_array;
-  BitMap*                     _card_bm;
-
-  // it updates the local fields after this task has claimed
-  // a new region to scan
-  void setup_for_region(HeapRegion* hr);
-  // it brings up-to-date the limit of the region
-  void update_region_limit();
-
-  // called when either the words scanned or the refs visited limit
-  // has been reached
-  void reached_limit();
-  // recalculates the words scanned and refs visited limits
-  void recalculate_limits();
-  // decreases the words scanned and refs visited limits when we reach
-  // an expensive operation
-  void decrease_limits();
-  // it checks whether the words scanned or refs visited reached their
-  // respective limit and calls reached_limit() if they have
-  void check_limits() {
-    if (_words_scanned >= _words_scanned_limit ||
-        _refs_reached >= _refs_reached_limit) {
-      reached_limit();
-    }
-  }
-  // this is supposed to be called regularly during a marking step as
-  // it checks a bunch of conditions that might cause the marking step
-  // to abort
-  void regular_clock_call();
-  bool concurrent() { return _concurrent; }
-
-  // Test whether obj might have already been passed over by the
-  // mark bitmap scan, and so needs to be pushed onto the mark stack.
-  bool is_below_finger(oop obj, HeapWord* global_finger) const;
-
-  template<bool scan> void process_grey_object(oop obj);
-
-public:
-  // It resets the task; it should be called right at the beginning of
-  // a marking phase.
-  void reset(CMBitMap* _nextMarkBitMap);
-  // it clears all the fields that correspond to a claimed region.
-  void clear_region_fields();
-
-  void set_concurrent(bool concurrent) { _concurrent = concurrent; }
-
-  // The main method of this class which performs a marking step
-  // trying not to exceed the given duration. However, it might exit
-  // prematurely, according to some conditions (i.e. SATB buffers are
-  // available for processing).
-  void do_marking_step(double target_ms,
-                       bool do_termination,
-                       bool is_serial);
-
-  // These two calls start and stop the timer
-  void record_start_time() {
-    _elapsed_time_ms = os::elapsedTime() * 1000.0;
-  }
-  void record_end_time() {
-    _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
-  }
-
-  // returns the worker ID associated with this task.
-  uint worker_id() { return _worker_id; }
-
-  // From TerminatorTerminator. It determines whether this task should
-  // exit the termination protocol after it's entered it.
-  virtual bool should_exit_termination();
-
-  // Resets the local region fields after a task has finished scanning a
-  // region; or when they have become stale as a result of the region
-  // being evacuated.
-  void giveup_current_region();
-
-  HeapWord* finger()            { return _finger; }
-
-  bool has_aborted()            { return _has_aborted; }
-  void set_has_aborted()        { _has_aborted = true; }
-  void clear_has_aborted()      { _has_aborted = false; }
-  bool has_timed_out()          { return _has_timed_out; }
-  bool claimed()                { return _claimed; }
-
-  void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
-
-  // Increment the number of references this task has visited.
-  void increment_refs_reached() { ++_refs_reached; }
-
-  // Grey the object by marking it.  If not already marked, push it on
-  // the local queue if below the finger.
-  // Precondition: obj is in region.
-  // Precondition: obj is below region's NTAMS.
-  inline void make_reference_grey(oop obj, HeapRegion* region);
-
-  // Grey the object (by calling make_grey_reference) if required,
-  // e.g. obj is below its containing region's NTAMS.
-  // Precondition: obj is a valid heap object.
-  inline void deal_with_reference(oop obj);
-
-  // It scans an object and visits its children.
-  inline void scan_object(oop obj);
-
-  // It pushes an object on the local queue.
-  inline void push(oop obj);
-
-  // These two move entries to/from the global stack.
-  void move_entries_to_global_stack();
-  void get_entries_from_global_stack();
-
-  // It pops and scans objects from the local queue. If partially is
-  // true, then it stops when the queue size is of a given limit. If
-  // partially is false, then it stops when the queue is empty.
-  void drain_local_queue(bool partially);
-  // It moves entries from the global stack to the local queue and
-  // drains the local queue. If partially is true, then it stops when
-  // both the global stack and the local queue reach a given size. If
-  // partially if false, it tries to empty them totally.
-  void drain_global_stack(bool partially);
-  // It keeps picking SATB buffers and processing them until no SATB
-  // buffers are available.
-  void drain_satb_buffers();
-
-  // moves the local finger to a new location
-  inline void move_finger_to(HeapWord* new_finger) {
-    assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
-    _finger = new_finger;
-  }
-
-  CMTask(uint worker_id,
-         ConcurrentMark *cm,
-         size_t* marked_bytes,
-         BitMap* card_bm,
-         CMTaskQueue* task_queue,
-         CMTaskQueueSet* task_queues);
-
-  // it prints statistics associated with this task
-  void print_stats();
-};
-
-// Class that's used to to print out per-region liveness
-// information. It's currently used at the end of marking and also
-// after we sort the old regions at the end of the cleanup operation.
-class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
-private:
-  // Accumulators for these values.
-  size_t _total_used_bytes;
-  size_t _total_capacity_bytes;
-  size_t _total_prev_live_bytes;
-  size_t _total_next_live_bytes;
-
-  // These are set up when we come across a "stars humongous" region
-  // (as this is where most of this information is stored, not in the
-  // subsequent "continues humongous" regions). After that, for every
-  // region in a given humongous region series we deduce the right
-  // values for it by simply subtracting the appropriate amount from
-  // these fields. All these values should reach 0 after we've visited
-  // the last region in the series.
-  size_t _hum_used_bytes;
-  size_t _hum_capacity_bytes;
-  size_t _hum_prev_live_bytes;
-  size_t _hum_next_live_bytes;
-
-  // Accumulator for the remembered set size
-  size_t _total_remset_bytes;
-
-  // Accumulator for strong code roots memory size
-  size_t _total_strong_code_roots_bytes;
-
-  static double perc(size_t val, size_t total) {
-    if (total == 0) {
-      return 0.0;
-    } else {
-      return 100.0 * ((double) val / (double) total);
-    }
-  }
-
-  static double bytes_to_mb(size_t val) {
-    return (double) val / (double) M;
-  }
-
-  // See the .cpp file.
-  size_t get_hum_bytes(size_t* hum_bytes);
-  void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
-                     size_t* prev_live_bytes, size_t* next_live_bytes);
-
-public:
-  // The header and footer are printed in the constructor and
-  // destructor respectively.
-  G1PrintRegionLivenessInfoClosure(const char* phase_name);
-  virtual bool doHeapRegion(HeapRegion* r);
-  ~G1PrintRegionLivenessInfoClosure();
-};
-
-#endif // SHARE_VM_GC_G1_CONCURRENTMARK_HPP
--- a/hotspot/src/share/vm/gc/g1/concurrentMark.inline.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,394 +0,0 @@
-/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_G1_CONCURRENTMARK_INLINE_HPP
-#define SHARE_VM_GC_G1_CONCURRENTMARK_INLINE_HPP
-
-#include "gc/g1/concurrentMark.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/shared/taskqueue.inline.hpp"
-
-// Utility routine to set an exclusive range of cards on the given
-// card liveness bitmap
-inline void ConcurrentMark::set_card_bitmap_range(BitMap* card_bm,
-                                                  BitMap::idx_t start_idx,
-                                                  BitMap::idx_t end_idx,
-                                                  bool is_par) {
-
-  // Set the exclusive bit range [start_idx, end_idx).
-  assert((end_idx - start_idx) > 0, "at least one card");
-  assert(end_idx <= card_bm->size(), "sanity");
-
-  // Silently clip the end index
-  end_idx = MIN2(end_idx, card_bm->size());
-
-  // For small ranges use a simple loop; otherwise use set_range or
-  // use par_at_put_range (if parallel). The range is made up of the
-  // cards that are spanned by an object/mem region so 8 cards will
-  // allow up to object sizes up to 4K to be handled using the loop.
-  if ((end_idx - start_idx) <= 8) {
-    for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
-      if (is_par) {
-        card_bm->par_set_bit(i);
-      } else {
-        card_bm->set_bit(i);
-      }
-    }
-  } else {
-    // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive.
-    if (is_par) {
-      card_bm->par_at_put_range(start_idx, end_idx, true);
-    } else {
-      card_bm->set_range(start_idx, end_idx);
-    }
-  }
-}
-
-// Returns the index in the liveness accounting card bitmap
-// for the given address
-inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
-  // Below, the term "card num" means the result of shifting an address
-  // by the card shift -- address 0 corresponds to card number 0.  One
-  // must subtract the card num of the bottom of the heap to obtain a
-  // card table index.
-  intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
-  return card_num - heap_bottom_card_num();
-}
-
-// Counts the given memory region in the given task/worker
-// counting data structures.
-inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
-                                         size_t* marked_bytes_array,
-                                         BitMap* task_card_bm) {
-  G1CollectedHeap* g1h = _g1h;
-  CardTableModRefBS* ct_bs = g1h->g1_barrier_set();
-
-  HeapWord* start = mr.start();
-  HeapWord* end = mr.end();
-  size_t region_size_bytes = mr.byte_size();
-  uint index = hr->hrm_index();
-
-  assert(hr == g1h->heap_region_containing(start), "sanity");
-  assert(marked_bytes_array != NULL, "pre-condition");
-  assert(task_card_bm != NULL, "pre-condition");
-
-  // Add to the task local marked bytes for this region.
-  marked_bytes_array[index] += region_size_bytes;
-
-  BitMap::idx_t start_idx = card_bitmap_index_for(start);
-  BitMap::idx_t end_idx = card_bitmap_index_for(end);
-
-  // Note: if we're looking at the last region in heap - end
-  // could be actually just beyond the end of the heap; end_idx
-  // will then correspond to a (non-existent) card that is also
-  // just beyond the heap.
-  if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) {
-    // end of region is not card aligned - increment to cover
-    // all the cards spanned by the region.
-    end_idx += 1;
-  }
-  // The card bitmap is task/worker specific => no need to use
-  // the 'par' BitMap routines.
-  // Set bits in the exclusive bit range [start_idx, end_idx).
-  set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */);
-}
-
-// Counts the given object in the given task/worker counting data structures.
-inline void ConcurrentMark::count_object(oop obj,
-                                         HeapRegion* hr,
-                                         size_t* marked_bytes_array,
-                                         BitMap* task_card_bm,
-                                         size_t word_size) {
-  assert(!hr->is_continues_humongous(), "Cannot enter count_object with continues humongous");
-  if (!hr->is_starts_humongous()) {
-    MemRegion mr((HeapWord*)obj, word_size);
-    count_region(mr, hr, marked_bytes_array, task_card_bm);
-  } else {
-    do {
-      MemRegion mr(hr->bottom(), hr->top());
-      count_region(mr, hr, marked_bytes_array, task_card_bm);
-      hr = _g1h->next_region_in_humongous(hr);
-    } while (hr != NULL);
-  }
-}
-
-// Attempts to mark the given object and, if successful, counts
-// the object in the given task/worker counting structures.
-inline bool ConcurrentMark::par_mark_and_count(oop obj,
-                                               HeapRegion* hr,
-                                               size_t* marked_bytes_array,
-                                               BitMap* task_card_bm) {
-  if (_nextMarkBitMap->parMark((HeapWord*)obj)) {
-    // Update the task specific count data for the object.
-    count_object(obj, hr, marked_bytes_array, task_card_bm, obj->size());
-    return true;
-  }
-  return false;
-}
-
-// Attempts to mark the given object and, if successful, counts
-// the object in the task/worker counting structures for the
-// given worker id.
-inline bool ConcurrentMark::par_mark_and_count(oop obj,
-                                               size_t word_size,
-                                               HeapRegion* hr,
-                                               uint worker_id) {
-  if (_nextMarkBitMap->parMark((HeapWord*)obj)) {
-    size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
-    BitMap* task_card_bm = count_card_bitmap_for(worker_id);
-    count_object(obj, hr, marked_bytes_array, task_card_bm, word_size);
-    return true;
-  }
-  return false;
-}
-
-inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
-  HeapWord* start_addr = MAX2(startWord(), mr.start());
-  HeapWord* end_addr = MIN2(endWord(), mr.end());
-
-  if (end_addr > start_addr) {
-    // Right-open interval [start-offset, end-offset).
-    BitMap::idx_t start_offset = heapWordToOffset(start_addr);
-    BitMap::idx_t end_offset = heapWordToOffset(end_addr);
-
-    start_offset = _bm.get_next_one_offset(start_offset, end_offset);
-    while (start_offset < end_offset) {
-      if (!cl->do_bit(start_offset)) {
-        return false;
-      }
-      HeapWord* next_addr = MIN2(nextObject(offsetToHeapWord(start_offset)), end_addr);
-      BitMap::idx_t next_offset = heapWordToOffset(next_addr);
-      start_offset = _bm.get_next_one_offset(next_offset, end_offset);
-    }
-  }
-  return true;
-}
-
-// The argument addr should be the start address of a valid object
-HeapWord* CMBitMapRO::nextObject(HeapWord* addr) {
-  oop obj = (oop) addr;
-  HeapWord* res =  addr + obj->size();
-  assert(offsetToHeapWord(heapWordToOffset(res)) == res, "sanity");
-  return res;
-}
-
-#define check_mark(addr)                                                       \
-  assert(_bmStartWord <= (addr) && (addr) < (_bmStartWord + _bmWordSize),      \
-         "outside underlying space?");                                         \
-  assert(G1CollectedHeap::heap()->is_in_exact(addr),                           \
-         "Trying to access not available bitmap " PTR_FORMAT                   \
-         " corresponding to " PTR_FORMAT " (%u)",                              \
-         p2i(this), p2i(addr), G1CollectedHeap::heap()->addr_to_region(addr));
-
-inline void CMBitMap::mark(HeapWord* addr) {
-  check_mark(addr);
-  _bm.set_bit(heapWordToOffset(addr));
-}
-
-inline void CMBitMap::clear(HeapWord* addr) {
-  check_mark(addr);
-  _bm.clear_bit(heapWordToOffset(addr));
-}
-
-inline bool CMBitMap::parMark(HeapWord* addr) {
-  check_mark(addr);
-  return _bm.par_set_bit(heapWordToOffset(addr));
-}
-
-#undef check_mark
-
-template<typename Fn>
-inline void CMMarkStack::iterate(Fn fn) {
-  assert(_saved_index == _index, "saved index: %d index: %d", _saved_index, _index);
-  for (int i = 0; i < _index; ++i) {
-    fn(_base[i]);
-  }
-}
-
-// It scans an object and visits its children.
-inline void CMTask::scan_object(oop obj) { process_grey_object<true>(obj); }
-
-inline void CMTask::push(oop obj) {
-  HeapWord* objAddr = (HeapWord*) obj;
-  assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
-  assert(!_g1h->is_on_master_free_list(
-              _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
-  assert(!_g1h->is_obj_ill(obj), "invariant");
-  assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
-
-  if (!_task_queue->push(obj)) {
-    // The local task queue looks full. We need to push some entries
-    // to the global stack.
-    move_entries_to_global_stack();
-
-    // this should succeed since, even if we overflow the global
-    // stack, we should have definitely removed some entries from the
-    // local queue. So, there must be space on it.
-    bool success = _task_queue->push(obj);
-    assert(success, "invariant");
-  }
-}
-
-inline bool CMTask::is_below_finger(oop obj, HeapWord* global_finger) const {
-  // If obj is above the global finger, then the mark bitmap scan
-  // will find it later, and no push is needed.  Similarly, if we have
-  // a current region and obj is between the local finger and the
-  // end of the current region, then no push is needed.  The tradeoff
-  // of checking both vs only checking the global finger is that the
-  // local check will be more accurate and so result in fewer pushes,
-  // but may also be a little slower.
-  HeapWord* objAddr = (HeapWord*)obj;
-  if (_finger != NULL) {
-    // We have a current region.
-
-    // Finger and region values are all NULL or all non-NULL.  We
-    // use _finger to check since we immediately use its value.
-    assert(_curr_region != NULL, "invariant");
-    assert(_region_limit != NULL, "invariant");
-    assert(_region_limit <= global_finger, "invariant");
-
-    // True if obj is less than the local finger, or is between
-    // the region limit and the global finger.
-    if (objAddr < _finger) {
-      return true;
-    } else if (objAddr < _region_limit) {
-      return false;
-    } // Else check global finger.
-  }
-  // Check global finger.
-  return objAddr < global_finger;
-}
-
-template<bool scan>
-inline void CMTask::process_grey_object(oop obj) {
-  assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
-  assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
-
-  size_t obj_size = obj->size();
-  _words_scanned += obj_size;
-
-  if (scan) {
-    obj->oop_iterate(_cm_oop_closure);
-  }
-  check_limits();
-}
-
-
-
-inline void CMTask::make_reference_grey(oop obj, HeapRegion* hr) {
-  if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
-    // No OrderAccess:store_load() is needed. It is implicit in the
-    // CAS done in CMBitMap::parMark() call in the routine above.
-    HeapWord* global_finger = _cm->finger();
-
-    // We only need to push a newly grey object on the mark
-    // stack if it is in a section of memory the mark bitmap
-    // scan has already examined.  Mark bitmap scanning
-    // maintains progress "fingers" for determining that.
-    //
-    // Notice that the global finger might be moving forward
-    // concurrently. This is not a problem. In the worst case, we
-    // mark the object while it is above the global finger and, by
-    // the time we read the global finger, it has moved forward
-    // past this object. In this case, the object will probably
-    // be visited when a task is scanning the region and will also
-    // be pushed on the stack. So, some duplicate work, but no
-    // correctness problems.
-    if (is_below_finger(obj, global_finger)) {
-      if (obj->is_typeArray()) {
-        // Immediately process arrays of primitive types, rather
-        // than pushing on the mark stack.  This keeps us from
-        // adding humongous objects to the mark stack that might
-        // be reclaimed before the entry is processed - see
-        // selection of candidates for eager reclaim of humongous
-        // objects.  The cost of the additional type test is
-        // mitigated by avoiding a trip through the mark stack,
-        // by only doing a bookkeeping update and avoiding the
-        // actual scan of the object - a typeArray contains no
-        // references, and the metadata is built-in.
-        process_grey_object<false>(obj);
-      } else {
-        push(obj);
-      }
-    }
-  }
-}
-
-inline void CMTask::deal_with_reference(oop obj) {
-  increment_refs_reached();
-
-  HeapWord* objAddr = (HeapWord*) obj;
-  assert(obj->is_oop_or_null(true /* ignore mark word */), "Expected an oop or NULL at " PTR_FORMAT, p2i(obj));
-  if (_g1h->is_in_g1_reserved(objAddr)) {
-    assert(obj != NULL, "null check is implicit");
-    if (!_nextMarkBitMap->isMarked(objAddr)) {
-      // Only get the containing region if the object is not marked on the
-      // bitmap (otherwise, it's a waste of time since we won't do
-      // anything with it).
-      HeapRegion* hr = _g1h->heap_region_containing(obj);
-      if (!hr->obj_allocated_since_next_marking(obj)) {
-        make_reference_grey(obj, hr);
-      }
-    }
-  }
-}
-
-inline void ConcurrentMark::markPrev(oop p) {
-  assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
-  // Note we are overriding the read-only view of the prev map here, via
-  // the cast.
-  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
-}
-
-bool ConcurrentMark::isPrevMarked(oop p) const {
-  assert(p != NULL && p->is_oop(), "expected an oop");
-  HeapWord* addr = (HeapWord*)p;
-  assert(addr >= _prevMarkBitMap->startWord() ||
-         addr < _prevMarkBitMap->endWord(), "in a region");
-
-  return _prevMarkBitMap->isMarked(addr);
-}
-
-inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
-                                     uint worker_id, HeapRegion* hr) {
-  assert(obj != NULL, "pre-condition");
-  HeapWord* addr = (HeapWord*) obj;
-  if (hr == NULL) {
-    hr = _g1h->heap_region_containing(addr);
-  } else {
-    assert(hr->is_in(addr), "pre-condition");
-  }
-  assert(hr != NULL, "sanity");
-  // Given that we're looking for a region that contains an object
-  // header it's impossible to get back a HC region.
-  assert(!hr->is_continues_humongous(), "sanity");
-
-  if (addr < hr->next_top_at_mark_start()) {
-    if (!_nextMarkBitMap->isMarked(addr)) {
-      par_mark_and_count(obj, word_size, hr, worker_id);
-    }
-  }
-}
-
-#endif // SHARE_VM_GC_G1_CONCURRENTMARK_INLINE_HPP
--- a/hotspot/src/share/vm/gc/g1/concurrentMarkThread.cpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/concurrentMarkThread.cpp	Fri Feb 05 18:37:18 2016 +0100
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,7 +43,7 @@
 SurrogateLockerThread*
      ConcurrentMarkThread::_slt = NULL;
 
-ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) :
+ConcurrentMarkThread::ConcurrentMarkThread(G1ConcurrentMark* cm) :
   ConcurrentGCThread(),
   _cm(cm),
   _state(Idle),
@@ -56,10 +56,10 @@
 
 class CMCheckpointRootsFinalClosure: public VoidClosure {
 
-  ConcurrentMark* _cm;
+  G1ConcurrentMark* _cm;
 public:
 
-  CMCheckpointRootsFinalClosure(ConcurrentMark* cm) :
+  CMCheckpointRootsFinalClosure(G1ConcurrentMark* cm) :
     _cm(cm) {}
 
   void do_void(){
@@ -68,10 +68,10 @@
 };
 
 class CMCleanUp: public VoidClosure {
-  ConcurrentMark* _cm;
+  G1ConcurrentMark* _cm;
 public:
 
-  CMCleanUp(ConcurrentMark* cm) :
+  CMCleanUp(G1ConcurrentMark* cm) :
     _cm(cm) {}
 
   void do_void(){
@@ -92,10 +92,10 @@
 }
 
 class GCConcPhaseTimer : StackObj {
-  ConcurrentMark* _cm;
+  G1ConcurrentMark* _cm;
 
  public:
-  GCConcPhaseTimer(ConcurrentMark* cm, const char* title) : _cm(cm) {
+  GCConcPhaseTimer(G1ConcurrentMark* cm, const char* title) : _cm(cm) {
     _cm->register_concurrent_phase_start(title);
   }
 
--- a/hotspot/src/share/vm/gc/g1/concurrentMarkThread.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/concurrentMarkThread.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,10 +27,10 @@
 
 #include "gc/shared/concurrentGCThread.hpp"
 
-// The Concurrent Mark GC Thread triggers the parallel CMConcurrentMarkingTasks
+// The Concurrent Mark GC Thread triggers the parallel G1CMConcurrentMarkingTasks
 // as well as handling various marking cleanup.
 
-class ConcurrentMark;
+class G1ConcurrentMark;
 class G1CollectorPolicy;
 
 class ConcurrentMarkThread: public ConcurrentGCThread {
@@ -45,7 +45,7 @@
   virtual void run();
 
  private:
-  ConcurrentMark*                  _cm;
+  G1ConcurrentMark*                _cm;
 
   enum State {
     Idle,
@@ -65,7 +65,7 @@
 
  public:
   // Constructor
-  ConcurrentMarkThread(ConcurrentMark* cm);
+  ConcurrentMarkThread(G1ConcurrentMark* cm);
 
   static void makeSurrogateLockerThread(TRAPS);
   static SurrogateLockerThread* slt() { return _slt; }
@@ -75,7 +75,7 @@
   // Marking virtual time so far this thread and concurrent marking tasks.
   double vtime_mark_accum();
 
-  ConcurrentMark* cm()     { return _cm; }
+  G1ConcurrentMark* cm()   { return _cm; }
 
   void set_idle()          { assert(_state != Started, "must not be starting a new cycle"); _state = Idle; }
   bool idle()              { return _state == Idle; }
--- a/hotspot/src/share/vm/gc/g1/concurrentMarkThread.inline.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/concurrentMarkThread.inline.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -25,8 +25,8 @@
 #ifndef SHARE_VM_GC_G1_CONCURRENTMARKTHREAD_INLINE_HPP
 #define SHARE_VM_GC_G1_CONCURRENTMARKTHREAD_INLINE_HPP
 
-#include "gc/g1/concurrentMark.hpp"
 #include "gc/g1/concurrentMarkThread.hpp"
+#include "gc/g1/g1ConcurrentMark.hpp"
 
   // Total virtual time so far.
 inline double ConcurrentMarkThread::vtime_accum() {
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Fri Feb 05 18:37:18 2016 +0100
@@ -1417,13 +1417,13 @@
 
       // Clear the previous marking bitmap, if needed for bitmap verification.
       // Note we cannot do this when we clear the next marking bitmap in
-      // ConcurrentMark::abort() above since VerifyDuringGC verifies the
+      // G1ConcurrentMark::abort() above since VerifyDuringGC verifies the
       // objects marked during a full GC against the previous bitmap.
       // But we need to clear it before calling check_bitmaps below since
       // the full GC has compacted objects and updated TAMS but not updated
       // the prev bitmap.
       if (G1VerifyBitmaps) {
-        ((CMBitMap*) concurrent_mark()->prevMarkBitMap())->clearAll();
+        ((G1CMBitMap*) concurrent_mark()->prevMarkBitMap())->clearAll();
       }
       _verifier->check_bitmaps("Full GC End");
 
@@ -1924,11 +1924,11 @@
                              G1CardCounts::compute_size(g1_rs.size() / HeapWordSize),
                              G1CardCounts::heap_map_factor());
 
-  size_t bitmap_size = CMBitMap::compute_size(g1_rs.size());
+  size_t bitmap_size = G1CMBitMap::compute_size(g1_rs.size());
   G1RegionToSpaceMapper* prev_bitmap_storage =
-    create_aux_memory_mapper("Prev Bitmap", bitmap_size, CMBitMap::heap_map_factor());
+    create_aux_memory_mapper("Prev Bitmap", bitmap_size, G1CMBitMap::heap_map_factor());
   G1RegionToSpaceMapper* next_bitmap_storage =
-    create_aux_memory_mapper("Next Bitmap", bitmap_size, CMBitMap::heap_map_factor());
+    create_aux_memory_mapper("Next Bitmap", bitmap_size, G1CMBitMap::heap_map_factor());
 
   _hrm.initialize(heap_storage, prev_bitmap_storage, next_bitmap_storage, bot_storage, cardtable_storage, card_counts_storage);
   g1_barrier_set()->initialize(cardtable_storage);
@@ -1960,11 +1960,11 @@
     _humongous_reclaim_candidates.initialize(start, end, granularity);
   }
 
-  // Create the ConcurrentMark data structure and thread.
+  // Create the G1ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
-  _cm = new ConcurrentMark(this, prev_bitmap_storage, next_bitmap_storage);
+  _cm = new G1ConcurrentMark(this, prev_bitmap_storage, next_bitmap_storage);
   if (_cm == NULL || !_cm->completed_initialization()) {
-    vm_shutdown_during_initialization("Could not create/initialize ConcurrentMark");
+    vm_shutdown_during_initialization("Could not create/initialize G1ConcurrentMark");
     return JNI_ENOMEM;
   }
   _cmThread = _cm->cmThread();
@@ -4992,7 +4992,7 @@
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
     oop obj = (oop)r->bottom();
-    CMBitMap* next_bitmap = g1h->concurrent_mark()->nextMarkBitMap();
+    G1CMBitMap* next_bitmap = g1h->concurrent_mark()->nextMarkBitMap();
 
     // The following checks whether the humongous object is live are sufficient.
     // The main additional check (in addition to having a reference from the roots
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,11 +25,11 @@
 #ifndef SHARE_VM_GC_G1_G1COLLECTEDHEAP_HPP
 #define SHARE_VM_GC_G1_G1COLLECTEDHEAP_HPP
 
-#include "gc/g1/concurrentMark.hpp"
 #include "gc/g1/evacuationInfo.hpp"
 #include "gc/g1/g1AllocationContext.hpp"
 #include "gc/g1/g1BiasedArray.hpp"
 #include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1ConcurrentMark.hpp"
 #include "gc/g1/g1HRPrinter.hpp"
 #include "gc/g1/g1InCSetState.hpp"
 #include "gc/g1/g1MonitoringSupport.hpp"
@@ -68,7 +68,7 @@
 class G1CollectorPolicy;
 class G1RemSet;
 class HeapRegionRemSetIterator;
-class ConcurrentMark;
+class G1ConcurrentMark;
 class ConcurrentMarkThread;
 class ConcurrentG1Refine;
 class ConcurrentGCTimer;
@@ -771,7 +771,7 @@
   void abandon_collection_set(HeapRegion* cs_head);
 
   // The concurrent marker (and the thread it runs in.)
-  ConcurrentMark* _cm;
+  G1ConcurrentMark* _cm;
   ConcurrentMarkThread* _cmThread;
 
   // The concurrent refiner.
@@ -1380,7 +1380,7 @@
 
   inline bool is_obj_ill(const oop obj) const;
 
-  ConcurrentMark* concurrent_mark() const { return _cm; }
+  G1ConcurrentMark* concurrent_mark() const { return _cm; }
 
   // Refinement
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,10 @@
 #ifndef SHARE_VM_GC_G1_G1COLLECTEDHEAP_INLINE_HPP
 #define SHARE_VM_GC_G1_G1COLLECTEDHEAP_INLINE_HPP
 
-#include "gc/g1/concurrentMark.hpp"
 #include "gc/g1/g1CollectedHeap.hpp"
 #include "gc/g1/g1CollectorPolicy.hpp"
 #include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1ConcurrentMark.hpp"
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/heapRegionManager.inline.hpp"
 #include "gc/g1/heapRegionSet.inline.hpp"
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Fri Feb 05 18:37:18 2016 +0100
@@ -24,10 +24,10 @@
 
 #include "precompiled.hpp"
 #include "gc/g1/concurrentG1Refine.hpp"
-#include "gc/g1/concurrentMark.hpp"
 #include "gc/g1/concurrentMarkThread.inline.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectorPolicy.hpp"
+#include "gc/g1/g1ConcurrentMark.hpp"
 #include "gc/g1/g1IHOPControl.hpp"
 #include "gc/g1/g1GCPhaseTimes.hpp"
 #include "gc/g1/heapRegion.inline.hpp"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Fri Feb 05 18:37:18 2016 +0100
@@ -0,0 +1,3682 @@
+/*
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/metadataOnStackMark.hpp"
+#include "classfile/symbolTable.hpp"
+#include "code/codeCache.hpp"
+#include "gc/g1/concurrentMarkThread.inline.hpp"
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1CollectorPolicy.hpp"
+#include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
+#include "gc/g1/g1HeapVerifier.hpp"
+#include "gc/g1/g1OopClosures.inline.hpp"
+#include "gc/g1/g1StringDedup.hpp"
+#include "gc/g1/heapRegion.inline.hpp"
+#include "gc/g1/heapRegionRemSet.hpp"
+#include "gc/g1/heapRegionSet.inline.hpp"
+#include "gc/g1/suspendibleThreadSet.hpp"
+#include "gc/shared/gcId.hpp"
+#include "gc/shared/gcTimer.hpp"
+#include "gc/shared/gcTrace.hpp"
+#include "gc/shared/gcTraceTime.inline.hpp"
+#include "gc/shared/genOopClosures.inline.hpp"
+#include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/strongRootsScope.hpp"
+#include "gc/shared/taskqueue.inline.hpp"
+#include "gc/shared/vmGCOperations.hpp"
+#include "logging/log.hpp"
+#include "memory/allocation.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/atomic.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/java.hpp"
+#include "runtime/prefetch.inline.hpp"
+#include "services/memTracker.hpp"
+
+// Concurrent marking bit map wrapper
+
+G1CMBitMapRO::G1CMBitMapRO(int shifter) :
+  _bm(),
+  _shifter(shifter) {
+  _bmStartWord = 0;
+  _bmWordSize = 0;
+}
+
+HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
+                                                 const HeapWord* limit) const {
+  // First we must round addr *up* to a possible object boundary.
+  addr = (HeapWord*)align_size_up((intptr_t)addr,
+                                  HeapWordSize << _shifter);
+  size_t addrOffset = heapWordToOffset(addr);
+  assert(limit != NULL, "limit must not be NULL");
+  size_t limitOffset = heapWordToOffset(limit);
+  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
+  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
+  assert(nextAddr >= addr, "get_next_one postcondition");
+  assert(nextAddr == limit || isMarked(nextAddr),
+         "get_next_one postcondition");
+  return nextAddr;
+}
+
+#ifndef PRODUCT
+bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
+  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
+  assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
+         "size inconsistency");
+  return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
+         _bmWordSize  == heap_rs.word_size();
+}
+#endif
+
+void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
+  _bm.print_on_error(st, prefix);
+}
+
+size_t G1CMBitMap::compute_size(size_t heap_size) {
+  return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
+}
+
+size_t G1CMBitMap::mark_distance() {
+  return MinObjAlignmentInBytes * BitsPerByte;
+}
+
+void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
+  _bmStartWord = heap.start();
+  _bmWordSize = heap.word_size();
+
+  _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
+  _bm.set_size(_bmWordSize >> _shifter);
+
+  storage->set_mapping_changed_listener(&_listener);
+}
+
+void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
+  if (zero_filled) {
+    return;
+  }
+  // We need to clear the bitmap on commit, removing any existing information.
+  MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
+  _bm->clearRange(mr);
+}
+
+// Closure used for clearing the given mark bitmap.
+class ClearBitmapHRClosure : public HeapRegionClosure {
+ private:
+  G1ConcurrentMark* _cm;
+  G1CMBitMap* _bitmap;
+  bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
+ public:
+  ClearBitmapHRClosure(G1ConcurrentMark* cm, G1CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
+    assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    size_t const chunk_size_in_words = M / HeapWordSize;
+
+    HeapWord* cur = r->bottom();
+    HeapWord* const end = r->end();
+
+    while (cur < end) {
+      MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
+      _bitmap->clearRange(mr);
+
+      cur += chunk_size_in_words;
+
+      // Abort iteration if after yielding the marking has been aborted.
+      if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
+        return true;
+      }
+      // Repeat the asserts from before the start of the closure. We will do them
+      // as asserts here to minimize their overhead on the product. However, we
+      // will have them as guarantees at the beginning / end of the bitmap
+      // clearing to get some checking in the product.
+      assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
+      assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
+    }
+
+    return false;
+  }
+};
+
+class ParClearNextMarkBitmapTask : public AbstractGangTask {
+  ClearBitmapHRClosure* _cl;
+  HeapRegionClaimer     _hrclaimer;
+  bool                  _suspendible; // If the task is suspendible, workers must join the STS.
+
+public:
+  ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
+      _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
+
+  void work(uint worker_id) {
+    SuspendibleThreadSetJoiner sts_join(_suspendible);
+    G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
+  }
+};
+
+void G1CMBitMap::clearAll() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
+  uint n_workers = g1h->workers()->active_workers();
+  ParClearNextMarkBitmapTask task(&cl, n_workers, false);
+  g1h->workers()->run_task(&task);
+  guarantee(cl.complete(), "Must have completed iteration.");
+  return;
+}
+
+void G1CMBitMap::clearRange(MemRegion mr) {
+  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
+  assert(!mr.is_empty(), "unexpected empty region");
+  // convert address range into offset range
+  _bm.at_put_range(heapWordToOffset(mr.start()),
+                   heapWordToOffset(mr.end()), false);
+}
+
+G1CMMarkStack::G1CMMarkStack(G1ConcurrentMark* cm) :
+  _base(NULL), _cm(cm)
+{}
+
+bool G1CMMarkStack::allocate(size_t capacity) {
+  // allocate a stack of the requisite depth
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
+  if (!rs.is_reserved()) {
+    warning("ConcurrentMark MarkStack allocation failure");
+    return false;
+  }
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+  if (!_virtual_space.initialize(rs, rs.size())) {
+    warning("ConcurrentMark MarkStack backing store failure");
+    // Release the virtual memory reserved for the marking stack
+    rs.release();
+    return false;
+  }
+  assert(_virtual_space.committed_size() == rs.size(),
+         "Didn't reserve backing store for all of G1ConcurrentMark stack?");
+  _base = (oop*) _virtual_space.low();
+  setEmpty();
+  _capacity = (jint) capacity;
+  _saved_index = -1;
+  _should_expand = false;
+  return true;
+}
+
+void G1CMMarkStack::expand() {
+  // Called, during remark, if we've overflown the marking stack during marking.
+  assert(isEmpty(), "stack should been emptied while handling overflow");
+  assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
+  // Clear expansion flag
+  _should_expand = false;
+  if (_capacity == (jint) MarkStackSizeMax) {
+    log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
+    return;
+  }
+  // Double capacity if possible
+  jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
+  // Do not give up existing stack until we have managed to
+  // get the double capacity that we desired.
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
+                                                           sizeof(oop)));
+  if (rs.is_reserved()) {
+    // Release the backing store associated with old stack
+    _virtual_space.release();
+    // Reinitialize virtual space for new stack
+    if (!_virtual_space.initialize(rs, rs.size())) {
+      fatal("Not enough swap for expanded marking stack capacity");
+    }
+    _base = (oop*)(_virtual_space.low());
+    _index = 0;
+    _capacity = new_capacity;
+  } else {
+    // Failed to double capacity, continue;
+    log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
+                  _capacity / K, new_capacity / K);
+  }
+}
+
+void G1CMMarkStack::set_should_expand() {
+  // If we're resetting the marking state because of an
+  // marking stack overflow, record that we should, if
+  // possible, expand the stack.
+  _should_expand = _cm->has_overflown();
+}
+
+G1CMMarkStack::~G1CMMarkStack() {
+  if (_base != NULL) {
+    _base = NULL;
+    _virtual_space.release();
+  }
+}
+
+void G1CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint start = _index;
+  jint next_index = start + n;
+  if (next_index > _capacity) {
+    _overflow = true;
+    return;
+  }
+  // Otherwise.
+  _index = next_index;
+  for (int i = 0; i < n; i++) {
+    int ind = start + i;
+    assert(ind < _capacity, "By overflow test above.");
+    _base[ind] = ptr_arr[i];
+  }
+}
+
+bool G1CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint index = _index;
+  if (index == 0) {
+    *n = 0;
+    return false;
+  } else {
+    int k = MIN2(max, index);
+    jint  new_ind = index - k;
+    for (int j = 0; j < k; j++) {
+      ptr_arr[j] = _base[new_ind + j];
+    }
+    _index = new_ind;
+    *n = k;
+    return true;
+  }
+}
+
+void G1CMMarkStack::note_start_of_gc() {
+  assert(_saved_index == -1,
+         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
+  _saved_index = _index;
+}
+
+void G1CMMarkStack::note_end_of_gc() {
+  // This is intentionally a guarantee, instead of an assert. If we
+  // accidentally add something to the mark stack during GC, it
+  // will be a correctness issue so it's better if we crash. we'll
+  // only check this once per GC anyway, so it won't be a performance
+  // issue in any way.
+  guarantee(_saved_index == _index,
+            "saved index: %d index: %d", _saved_index, _index);
+  _saved_index = -1;
+}
+
+G1CMRootRegions::G1CMRootRegions() :
+  _young_list(NULL), _cm(NULL), _scan_in_progress(false),
+  _should_abort(false),  _next_survivor(NULL) { }
+
+void G1CMRootRegions::init(G1CollectedHeap* g1h, G1ConcurrentMark* cm) {
+  _young_list = g1h->young_list();
+  _cm = cm;
+}
+
+void G1CMRootRegions::prepare_for_scan() {
+  assert(!scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  assert(_next_survivor == NULL, "pre-condition");
+  _next_survivor = _young_list->first_survivor_region();
+  _scan_in_progress = (_next_survivor != NULL);
+  _should_abort = false;
+}
+
+HeapRegion* G1CMRootRegions::claim_next() {
+  if (_should_abort) {
+    // If someone has set the should_abort flag, we return NULL to
+    // force the caller to bail out of their loop.
+    return NULL;
+  }
+
+  // Currently, only survivors can be root regions.
+  HeapRegion* res = _next_survivor;
+  if (res != NULL) {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    // Read it again in case it changed while we were waiting for the lock.
+    res = _next_survivor;
+    if (res != NULL) {
+      if (res == _young_list->last_survivor_region()) {
+        // We just claimed the last survivor so store NULL to indicate
+        // that we're done.
+        _next_survivor = NULL;
+      } else {
+        _next_survivor = res->get_next_young_region();
+      }
+    } else {
+      // Someone else claimed the last survivor while we were trying
+      // to take the lock so nothing else to do.
+    }
+  }
+  assert(res == NULL || res->is_survivor(), "post-condition");
+
+  return res;
+}
+
+void G1CMRootRegions::scan_finished() {
+  assert(scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  if (!_should_abort) {
+    assert(_next_survivor == NULL, "we should have claimed all survivors");
+  }
+  _next_survivor = NULL;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    _scan_in_progress = false;
+    RootRegionScan_lock->notify_all();
+  }
+}
+
+bool G1CMRootRegions::wait_until_scan_finished() {
+  if (!scan_in_progress()) return false;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    while (scan_in_progress()) {
+      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+  return true;
+}
+
+uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
+  return MAX2((n_par_threads + 2) / 4, 1U);
+}
+
+G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
+  _g1h(g1h),
+  _markBitMap1(),
+  _markBitMap2(),
+  _parallel_marking_threads(0),
+  _max_parallel_marking_threads(0),
+  _sleep_factor(0.0),
+  _marking_task_overhead(1.0),
+  _cleanup_list("Cleanup List"),
+  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
+  _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
+            CardTableModRefBS::card_shift,
+            false /* in_resource_area*/),
+
+  _prevMarkBitMap(&_markBitMap1),
+  _nextMarkBitMap(&_markBitMap2),
+
+  _markStack(this),
+  // _finger set in set_non_marking_state
+
+  _max_worker_id(ParallelGCThreads),
+  // _active_tasks set in set_non_marking_state
+  // _tasks set inside the constructor
+  _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
+  _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
+
+  _has_overflown(false),
+  _concurrent(false),
+  _has_aborted(false),
+  _restart_for_overflow(false),
+  _concurrent_marking_in_progress(false),
+  _concurrent_phase_started(false),
+
+  // _verbose_level set below
+
+  _init_times(),
+  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
+  _cleanup_times(),
+  _total_counting_time(0.0),
+  _total_rs_scrub_time(0.0),
+
+  _parallel_workers(NULL),
+
+  _count_card_bitmaps(NULL),
+  _count_marked_bytes(NULL),
+  _completed_initialization(false) {
+
+  _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
+  _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
+
+  // Create & start a ConcurrentMark thread.
+  _cmThread = new ConcurrentMarkThread(this);
+  assert(cmThread() != NULL, "CM Thread should have been created");
+  assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
+  if (_cmThread->osthread() == NULL) {
+      vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
+  }
+
+  assert(CGC_lock != NULL, "Where's the CGC_lock?");
+  assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
+  assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+  satb_qs.set_buffer_size(G1SATBBufferSize);
+
+  _root_regions.init(_g1h, this);
+
+  if (ConcGCThreads > ParallelGCThreads) {
+    warning("Can't have more ConcGCThreads (%u) "
+            "than ParallelGCThreads (%u).",
+            ConcGCThreads, ParallelGCThreads);
+    return;
+  }
+  if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
+    // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
+    // if both are set
+    _sleep_factor             = 0.0;
+    _marking_task_overhead    = 1.0;
+  } else if (G1MarkingOverheadPercent > 0) {
+    // We will calculate the number of parallel marking threads based
+    // on a target overhead with respect to the soft real-time goal
+    double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
+    double overall_cm_overhead =
+      (double) MaxGCPauseMillis * marking_overhead /
+      (double) GCPauseIntervalMillis;
+    double cpu_ratio = 1.0 / (double) os::processor_count();
+    double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
+    double marking_task_overhead =
+      overall_cm_overhead / marking_thread_num *
+                                              (double) os::processor_count();
+    double sleep_factor =
+                       (1.0 - marking_task_overhead) / marking_task_overhead;
+
+    FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
+    _sleep_factor             = sleep_factor;
+    _marking_task_overhead    = marking_task_overhead;
+  } else {
+    // Calculate the number of parallel marking threads by scaling
+    // the number of parallel GC threads.
+    uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
+    FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
+    _sleep_factor             = 0.0;
+    _marking_task_overhead    = 1.0;
+  }
+
+  assert(ConcGCThreads > 0, "Should have been set");
+  _parallel_marking_threads = ConcGCThreads;
+  _max_parallel_marking_threads = _parallel_marking_threads;
+
+  _parallel_workers = new WorkGang("G1 Marker",
+       _max_parallel_marking_threads, false, true);
+  if (_parallel_workers == NULL) {
+    vm_exit_during_initialization("Failed necessary allocation.");
+  } else {
+    _parallel_workers->initialize_workers();
+  }
+
+  if (FLAG_IS_DEFAULT(MarkStackSize)) {
+    size_t mark_stack_size =
+      MIN2(MarkStackSizeMax,
+          MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
+    // Verify that the calculated value for MarkStackSize is in range.
+    // It would be nice to use the private utility routine from Arguments.
+    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
+      warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
+              "must be between 1 and " SIZE_FORMAT,
+              mark_stack_size, MarkStackSizeMax);
+      return;
+    }
+    FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
+  } else {
+    // Verify MarkStackSize is in range.
+    if (FLAG_IS_CMDLINE(MarkStackSize)) {
+      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
+                  "must be between 1 and " SIZE_FORMAT,
+                  MarkStackSize, MarkStackSizeMax);
+          return;
+        }
+      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
+                  " or for MarkStackSizeMax (" SIZE_FORMAT ")",
+                  MarkStackSize, MarkStackSizeMax);
+          return;
+        }
+      }
+    }
+  }
+
+  if (!_markStack.allocate(MarkStackSize)) {
+    warning("Failed to allocate CM marking stack");
+    return;
+  }
+
+  _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
+
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
+
+  BitMap::idx_t card_bm_size = _card_bm.size();
+
+  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
+  _active_tasks = _max_worker_id;
+
+  uint max_regions = _g1h->max_regions();
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    G1CMTaskQueue* task_queue = new G1CMTaskQueue();
+    task_queue->initialize();
+    _task_queues->register_queue(i, task_queue);
+
+    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
+
+    _tasks[i] = new G1CMTask(i, this,
+                             _count_marked_bytes[i],
+                             &_count_card_bitmaps[i],
+                             task_queue, _task_queues);
+
+    _accum_task_vtime[i] = 0.0;
+  }
+
+  // Calculate the card number for the bottom of the heap. Used
+  // in biasing indexes into the accounting card bitmaps.
+  _heap_bottom_card_num =
+    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
+                                CardTableModRefBS::card_shift);
+
+  // Clear all the liveness counting data
+  clear_all_count_data();
+
+  // so that the call below can read a sensible value
+  _heap_start = g1h->reserved_region().start();
+  set_non_marking_state();
+  _completed_initialization = true;
+}
+
+void G1ConcurrentMark::reset() {
+  // Starting values for these two. This should be called in a STW
+  // phase.
+  MemRegion reserved = _g1h->g1_reserved();
+  _heap_start = reserved.start();
+  _heap_end   = reserved.end();
+
+  // Separated the asserts so that we know which one fires.
+  assert(_heap_start != NULL, "heap bounds should look ok");
+  assert(_heap_end != NULL, "heap bounds should look ok");
+  assert(_heap_start < _heap_end, "heap bounds should look ok");
+
+  // Reset all the marking data structures and any necessary flags
+  reset_marking_state();
+
+  // We do reset all of them, since different phases will use
+  // different number of active threads. So, it's easiest to have all
+  // of them ready.
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    _tasks[i]->reset(_nextMarkBitMap);
+  }
+
+  // we need this to make sure that the flag is on during the evac
+  // pause with initial mark piggy-backed
+  set_concurrent_marking_in_progress();
+}
+
+
+void G1ConcurrentMark::reset_marking_state(bool clear_overflow) {
+  _markStack.set_should_expand();
+  _markStack.setEmpty();        // Also clears the _markStack overflow flag
+  if (clear_overflow) {
+    clear_has_overflown();
+  } else {
+    assert(has_overflown(), "pre-condition");
+  }
+  _finger = _heap_start;
+
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    G1CMTaskQueue* queue = _task_queues->queue(i);
+    queue->set_empty();
+  }
+}
+
+void G1ConcurrentMark::set_concurrency(uint active_tasks) {
+  assert(active_tasks <= _max_worker_id, "we should not have more");
+
+  _active_tasks = active_tasks;
+  // Need to update the three data structures below according to the
+  // number of active threads for this phase.
+  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
+  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
+  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
+}
+
+void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
+  set_concurrency(active_tasks);
+
+  _concurrent = concurrent;
+  // We propagate this to all tasks, not just the active ones.
+  for (uint i = 0; i < _max_worker_id; ++i)
+    _tasks[i]->set_concurrent(concurrent);
+
+  if (concurrent) {
+    set_concurrent_marking_in_progress();
+  } else {
+    // We currently assume that the concurrent flag has been set to
+    // false before we start remark. At this point we should also be
+    // in a STW phase.
+    assert(!concurrent_marking_in_progress(), "invariant");
+    assert(out_of_regions(),
+           "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
+           p2i(_finger), p2i(_heap_end));
+  }
+}
+
+void G1ConcurrentMark::set_non_marking_state() {
+  // We set the global marking state to some default values when we're
+  // not doing marking.
+  reset_marking_state();
+  _active_tasks = 0;
+  clear_concurrent_marking_in_progress();
+}
+
+G1ConcurrentMark::~G1ConcurrentMark() {
+  // The G1ConcurrentMark instance is never freed.
+  ShouldNotReachHere();
+}
+
+void G1ConcurrentMark::clearNextBitmap() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // Make sure that the concurrent mark thread looks to still be in
+  // the current cycle.
+  guarantee(cmThread()->during_cycle(), "invariant");
+
+  // We are finishing up the current cycle by clearing the next
+  // marking bitmap and getting it ready for the next cycle. During
+  // this time no other cycle can start. So, let's make sure that this
+  // is the case.
+  guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
+
+  ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
+  ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
+  _parallel_workers->run_task(&task);
+
+  // Clear the liveness counting data. If the marking has been aborted, the abort()
+  // call already did that.
+  if (cl.complete()) {
+    clear_all_count_data();
+  }
+
+  // Repeat the asserts from above.
+  guarantee(cmThread()->during_cycle(), "invariant");
+  guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
+}
+
+class CheckBitmapClearHRClosure : public HeapRegionClosure {
+  G1CMBitMap* _bitmap;
+  bool _error;
+ public:
+  CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    // This closure can be called concurrently to the mutator, so we must make sure
+    // that the result of the getNextMarkedWordAddress() call is compared to the
+    // value passed to it as limit to detect any found bits.
+    // end never changes in G1.
+    HeapWord* end = r->end();
+    return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
+  }
+};
+
+bool G1ConcurrentMark::nextMarkBitmapIsClear() {
+  CheckBitmapClearHRClosure cl(_nextMarkBitMap);
+  _g1h->heap_region_iterate(&cl);
+  return cl.complete();
+}
+
+class NoteStartOfMarkHRClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    r->note_start_of_marking();
+    return false;
+  }
+};
+
+void G1ConcurrentMark::checkpointRootsInitialPre() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+
+  _has_aborted = false;
+
+  // Initialize marking structures. This has to be done in a STW phase.
+  reset();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
+}
+
+
+void G1ConcurrentMark::checkpointRootsInitialPost() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+
+  // Start Concurrent Marking weak-reference discovery.
+  ReferenceProcessor* rp = g1h->ref_processor_cm();
+  // enable ("weak") refs discovery
+  rp->enable_discovery();
+  rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  // This is the start of  the marking cycle, we're expected all
+  // threads to have SATB queues with active set to false.
+  satb_mq_set.set_active_all_threads(true, /* new active value */
+                                     false /* expected_active */);
+
+  _root_regions.prepare_for_scan();
+
+  // update_g1_committed() will be called at the end of an evac pause
+  // when marking is on. So, it's also called at the end of the
+  // initial-mark pause to update the heap end, if the heap expands
+  // during it. No need to call it here.
+}
+
+/*
+ * Notice that in the next two methods, we actually leave the STS
+ * during the barrier sync and join it immediately afterwards. If we
+ * do not do this, the following deadlock can occur: one thread could
+ * be in the barrier sync code, waiting for the other thread to also
+ * sync up, whereas another one could be trying to yield, while also
+ * waiting for the other threads to sync up too.
+ *
+ * Note, however, that this code is also used during remark and in
+ * this case we should not attempt to leave / enter the STS, otherwise
+ * we'll either hit an assert (debug / fastdebug) or deadlock
+ * (product). So we should only leave / enter the STS if we are
+ * operating concurrently.
+ *
+ * Because the thread that does the sync barrier has left the STS, it
+ * is possible to be suspended for a Full GC or an evacuation pause
+ * could occur. This is actually safe, since the entering the sync
+ * barrier is one of the last things do_marking_step() does, and it
+ * doesn't manipulate any data structures afterwards.
+ */
+
+void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
+  bool barrier_aborted;
+  {
+    SuspendibleThreadSetLeaver sts_leave(concurrent());
+    barrier_aborted = !_first_overflow_barrier_sync.enter();
+  }
+
+  // at this point everyone should have synced up and not be doing any
+  // more work
+
+  if (barrier_aborted) {
+    // If the barrier aborted we ignore the overflow condition and
+    // just abort the whole marking phase as quickly as possible.
+    return;
+  }
+
+  // If we're executing the concurrent phase of marking, reset the marking
+  // state; otherwise the marking state is reset after reference processing,
+  // during the remark pause.
+  // If we reset here as a result of an overflow during the remark we will
+  // see assertion failures from any subsequent set_concurrency_and_phase()
+  // calls.
+  if (concurrent()) {
+    // let the task associated with with worker 0 do this
+    if (worker_id == 0) {
+      // task 0 is responsible for clearing the global data structures
+      // We should be here because of an overflow. During STW we should
+      // not clear the overflow flag since we rely on it being true when
+      // we exit this method to abort the pause and restart concurrent
+      // marking.
+      reset_marking_state(true /* clear_overflow */);
+
+      log_info(gc)("Concurrent Mark reset for overflow");
+    }
+  }
+
+  // after this, each task should reset its own data structures then
+  // then go into the second barrier
+}
+
+void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
+  SuspendibleThreadSetLeaver sts_leave(concurrent());
+  _second_overflow_barrier_sync.enter();
+
+  // at this point everything should be re-initialized and ready to go
+}
+
+class G1CMConcurrentMarkingTask: public AbstractGangTask {
+private:
+  G1ConcurrentMark*     _cm;
+  ConcurrentMarkThread* _cmt;
+
+public:
+  void work(uint worker_id) {
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");
+    ResourceMark rm;
+
+    double start_vtime = os::elapsedVTime();
+
+    {
+      SuspendibleThreadSetJoiner sts_join;
+
+      assert(worker_id < _cm->active_tasks(), "invariant");
+      G1CMTask* the_task = _cm->task(worker_id);
+      the_task->record_start_time();
+      if (!_cm->has_aborted()) {
+        do {
+          double start_vtime_sec = os::elapsedVTime();
+          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
+
+          the_task->do_marking_step(mark_step_duration_ms,
+                                    true  /* do_termination */,
+                                    false /* is_serial*/);
+
+          double end_vtime_sec = os::elapsedVTime();
+          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+          _cm->clear_has_overflown();
+
+          _cm->do_yield_check(worker_id);
+
+          jlong sleep_time_ms;
+          if (!_cm->has_aborted() && the_task->has_aborted()) {
+            sleep_time_ms =
+              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
+            {
+              SuspendibleThreadSetLeaver sts_leave;
+              os::sleep(Thread::current(), sleep_time_ms, false);
+            }
+          }
+        } while (!_cm->has_aborted() && the_task->has_aborted());
+      }
+      the_task->record_end_time();
+      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
+    }
+
+    double end_vtime = os::elapsedVTime();
+    _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
+  }
+
+  G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
+                            ConcurrentMarkThread* cmt) :
+      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
+
+  ~G1CMConcurrentMarkingTask() { }
+};
+
+// Calculates the number of active workers for a concurrent
+// phase.
+uint G1ConcurrentMark::calc_parallel_marking_threads() {
+  uint n_conc_workers = 0;
+  if (!UseDynamicNumberOfGCThreads ||
+      (!FLAG_IS_DEFAULT(ConcGCThreads) &&
+       !ForceDynamicNumberOfGCThreads)) {
+    n_conc_workers = max_parallel_marking_threads();
+  } else {
+    n_conc_workers =
+      AdaptiveSizePolicy::calc_default_active_workers(
+                                   max_parallel_marking_threads(),
+                                   1, /* Minimum workers */
+                                   parallel_marking_threads(),
+                                   Threads::number_of_non_daemon_threads());
+    // Don't scale down "n_conc_workers" by scale_parallel_threads() because
+    // that scaling has already gone into "_max_parallel_marking_threads".
+  }
+  assert(n_conc_workers > 0, "Always need at least 1");
+  return n_conc_workers;
+}
+
+void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
+  // Currently, only survivors can be root regions.
+  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
+  G1RootRegionScanClosure cl(_g1h, this, worker_id);
+
+  const uintx interval = PrefetchScanIntervalInBytes;
+  HeapWord* curr = hr->bottom();
+  const HeapWord* end = hr->top();
+  while (curr < end) {
+    Prefetch::read(curr, interval);
+    oop obj = oop(curr);
+    int size = obj->oop_iterate_size(&cl);
+    assert(size == obj->size(), "sanity");
+    curr += size;
+  }
+}
+
+class G1CMRootRegionScanTask : public AbstractGangTask {
+private:
+  G1ConcurrentMark* _cm;
+
+public:
+  G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
+    AbstractGangTask("Root Region Scan"), _cm(cm) { }
+
+  void work(uint worker_id) {
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");
+
+    G1CMRootRegions* root_regions = _cm->root_regions();
+    HeapRegion* hr = root_regions->claim_next();
+    while (hr != NULL) {
+      _cm->scanRootRegion(hr, worker_id);
+      hr = root_regions->claim_next();
+    }
+  }
+};
+
+void G1ConcurrentMark::scanRootRegions() {
+  // Start of concurrent marking.
+  ClassLoaderDataGraph::clear_claimed_marks();
+
+  // scan_in_progress() will have been set to true only if there was
+  // at least one root region to scan. So, if it's false, we
+  // should not attempt to do any further work.
+  if (root_regions()->scan_in_progress()) {
+    GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
+
+    _parallel_marking_threads = calc_parallel_marking_threads();
+    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+           "Maximum number of marking threads exceeded");
+    uint active_workers = MAX2(1U, parallel_marking_threads());
+
+    G1CMRootRegionScanTask task(this);
+    _parallel_workers->set_active_workers(active_workers);
+    _parallel_workers->run_task(&task);
+
+    // It's possible that has_aborted() is true here without actually
+    // aborting the survivor scan earlier. This is OK as it's
+    // mainly used for sanity checking.
+    root_regions()->scan_finished();
+  }
+}
+
+void G1ConcurrentMark::register_concurrent_phase_start(const char* title) {
+  assert(!_concurrent_phase_started, "Sanity");
+  _concurrent_phase_started = true;
+  _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
+}
+
+void G1ConcurrentMark::register_concurrent_phase_end() {
+  if (_concurrent_phase_started) {
+    _concurrent_phase_started = false;
+    _g1h->gc_timer_cm()->register_gc_concurrent_end();
+  }
+}
+
+void G1ConcurrentMark::markFromRoots() {
+  // we might be tempted to assert that:
+  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
+  //        "inconsistent argument?");
+  // However that wouldn't be right, because it's possible that
+  // a safepoint is indeed in progress as a younger generation
+  // stop-the-world GC happens even as we mark in this generation.
+
+  _restart_for_overflow = false;
+
+  // _g1h has _n_par_threads
+  _parallel_marking_threads = calc_parallel_marking_threads();
+  assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+    "Maximum number of marking threads exceeded");
+
+  uint active_workers = MAX2(1U, parallel_marking_threads());
+  assert(active_workers > 0, "Should have been set");
+
+  // Parallel task terminator is set in "set_concurrency_and_phase()"
+  set_concurrency_and_phase(active_workers, true /* concurrent */);
+
+  G1CMConcurrentMarkingTask markingTask(this, cmThread());
+  _parallel_workers->set_active_workers(active_workers);
+  _parallel_workers->run_task(&markingTask);
+  print_stats();
+}
+
+void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
+    return;
+  }
+
+  SvcGCMarker sgcm(SvcGCMarker::OTHER);
+
+  if (VerifyDuringGC) {
+    HandleMark hm;  // handle scope
+    g1h->prepare_for_verify();
+    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
+  }
+  g1h->verifier()->check_bitmaps("Remark Start");
+
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  g1p->record_concurrent_mark_remark_start();
+
+  double start = os::elapsedTime();
+
+  checkpointRootsFinalWork();
+
+  double mark_work_end = os::elapsedTime();
+
+  weakRefsWork(clear_all_soft_refs);
+
+  if (has_overflown()) {
+    // Oops.  We overflowed.  Restart concurrent marking.
+    _restart_for_overflow = true;
+    log_develop_trace(gc)("Remark led to restart for overflow.");
+
+    // Verify the heap w.r.t. the previous marking bitmap.
+    if (VerifyDuringGC) {
+      HandleMark hm;  // handle scope
+      g1h->prepare_for_verify();
+      Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
+    }
+
+    // Clear the marking state because we will be restarting
+    // marking due to overflowing the global mark stack.
+    reset_marking_state();
+  } else {
+    {
+      GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm());
+
+      // Aggregate the per-task counting data that we have accumulated
+      // while marking.
+      aggregate_count_data();
+    }
+
+    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+    // We're done with marking.
+    // This is the end of  the marking cycle, we're expected all
+    // threads to have SATB queues with active set to true.
+    satb_mq_set.set_active_all_threads(false, /* new active value */
+                                       true /* expected_active */);
+
+    if (VerifyDuringGC) {
+      HandleMark hm;  // handle scope
+      g1h->prepare_for_verify();
+      Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
+    }
+    g1h->verifier()->check_bitmaps("Remark End");
+    assert(!restart_for_overflow(), "sanity");
+    // Completely reset the marking state since marking completed
+    set_non_marking_state();
+  }
+
+  // Expand the marking stack, if we have to and if we can.
+  if (_markStack.should_expand()) {
+    _markStack.expand();
+  }
+
+  // Statistics
+  double now = os::elapsedTime();
+  _remark_mark_times.add((mark_work_end - start) * 1000.0);
+  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
+  _remark_times.add((now - start) * 1000.0);
+
+  g1p->record_concurrent_mark_remark_end();
+
+  G1CMIsAliveClosure is_alive(g1h);
+  g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
+}
+
+// Base class of the closures that finalize and verify the
+// liveness counting data.
+class G1CMCountDataClosureBase: public HeapRegionClosure {
+protected:
+  G1CollectedHeap* _g1h;
+  G1ConcurrentMark* _cm;
+  CardTableModRefBS* _ct_bs;
+
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+
+  // Takes a region that's not empty (i.e., it has at least one
+  // live object in it and sets its corresponding bit on the region
+  // bitmap to 1.
+  void set_bit_for_region(HeapRegion* hr) {
+    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
+    _region_bm->par_at_put(index, true);
+  }
+
+public:
+  G1CMCountDataClosureBase(G1CollectedHeap* g1h,
+                           BitMap* region_bm, BitMap* card_bm):
+    _g1h(g1h), _cm(g1h->concurrent_mark()),
+    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
+    _region_bm(region_bm), _card_bm(card_bm) { }
+};
+
+// Closure that calculates the # live objects per region. Used
+// for verification purposes during the cleanup pause.
+class CalcLiveObjectsClosure: public G1CMCountDataClosureBase {
+  G1CMBitMapRO* _bm;
+  size_t _region_marked_bytes;
+
+public:
+  CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h,
+                         BitMap* region_bm, BitMap* card_bm) :
+    G1CMCountDataClosureBase(g1h, region_bm, card_bm),
+    _bm(bm), _region_marked_bytes(0) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* start = hr->bottom();
+
+    assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
+           "Preconditions not met - "
+           "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
+           p2i(start), p2i(ntams), p2i(hr->end()));
+
+    // Find the first marked object at or after "start".
+    start = _bm->getNextMarkedWordAddress(start, ntams);
+
+    size_t marked_bytes = 0;
+
+    while (start < ntams) {
+      oop obj = oop(start);
+      int obj_sz = obj->size();
+      HeapWord* obj_end = start + obj_sz;
+
+      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
+
+      // Note: if we're looking at the last region in heap - obj_end
+      // could be actually just beyond the end of the heap; end_idx
+      // will then correspond to a (non-existent) card that is also
+      // just beyond the heap.
+      if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
+        // end of object is not card aligned - increment to cover
+        // all the cards spanned by the object
+        end_idx += 1;
+      }
+
+      // Set the bits in the card BM for the cards spanned by this object.
+      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
+
+      // Add the size of this object to the number of marked bytes.
+      marked_bytes += (size_t)obj_sz * HeapWordSize;
+
+      // This will happen if we are handling a humongous object that spans
+      // several heap regions.
+      if (obj_end > hr->end()) {
+        break;
+      }
+      // Find the next marked object after this one.
+      start = _bm->getNextMarkedWordAddress(obj_end, ntams);
+    }
+
+    // Mark the allocated-since-marking portion...
+    HeapWord* top = hr->top();
+    if (ntams < top) {
+      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
+      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
+
+      // Note: if we're looking at the last region in heap - top
+      // could be actually just beyond the end of the heap; end_idx
+      // will then correspond to a (non-existent) card that is also
+      // just beyond the heap.
+      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
+        // end of object is not card aligned - increment to cover
+        // all the cards spanned by the object
+        end_idx += 1;
+      }
+      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
+
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
+    }
+
+    // Update the live region bitmap.
+    if (marked_bytes > 0) {
+      set_bit_for_region(hr);
+    }
+
+    // Set the marked bytes for the current region so that
+    // it can be queried by a calling verification routine
+    _region_marked_bytes = marked_bytes;
+
+    return false;
+  }
+
+  size_t region_marked_bytes() const { return _region_marked_bytes; }
+};
+
+// Heap region closure used for verifying the counting data
+// that was accumulated concurrently and aggregated during
+// the remark pause. This closure is applied to the heap
+// regions during the STW cleanup pause.
+
+class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  G1ConcurrentMark* _cm;
+  CalcLiveObjectsClosure _calc_cl;
+  BitMap* _region_bm;   // Region BM to be verified
+  BitMap* _card_bm;     // Card BM to be verified
+
+  BitMap* _exp_region_bm; // Expected Region BM values
+  BitMap* _exp_card_bm;   // Expected card BM values
+
+  int _failures;
+
+public:
+  VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
+                                BitMap* region_bm,
+                                BitMap* card_bm,
+                                BitMap* exp_region_bm,
+                                BitMap* exp_card_bm) :
+    _g1h(g1h), _cm(g1h->concurrent_mark()),
+    _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
+    _region_bm(region_bm), _card_bm(card_bm),
+    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
+    _failures(0) { }
+
+  int failures() const { return _failures; }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    int failures = 0;
+
+    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
+    // this region and set the corresponding bits in the expected region
+    // and card bitmaps.
+    bool res = _calc_cl.doHeapRegion(hr);
+    assert(res == false, "should be continuing");
+
+    // Verify the marked bytes for this region.
+    size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
+    size_t act_marked_bytes = hr->next_marked_bytes();
+
+    if (exp_marked_bytes > act_marked_bytes) {
+      if (hr->is_starts_humongous()) {
+        // For start_humongous regions, the size of the whole object will be
+        // in exp_marked_bytes.
+        HeapRegion* region = hr;
+        int num_regions;
+        for (num_regions = 0; region != NULL; num_regions++) {
+          region = _g1h->next_region_in_humongous(region);
+        }
+        if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
+          failures += 1;
+        } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
+          failures += 1;
+        }
+      } else {
+        // We're not OK if expected marked bytes > actual marked bytes. It means
+        // we have missed accounting some objects during the actual marking.
+        failures += 1;
+      }
+    }
+
+    // Verify the bit, for this region, in the actual and expected
+    // (which was just calculated) region bit maps.
+    // We're not OK if the bit in the calculated expected region
+    // bitmap is set and the bit in the actual region bitmap is not.
+    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
+
+    bool expected = _exp_region_bm->at(index);
+    bool actual = _region_bm->at(index);
+    if (expected && !actual) {
+      failures += 1;
+    }
+
+    // Verify that the card bit maps for the cards spanned by the current
+    // region match. We have an error if we have a set bit in the expected
+    // bit map and the corresponding bit in the actual bitmap is not set.
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
+
+    for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
+      expected = _exp_card_bm->at(i);
+      actual = _card_bm->at(i);
+
+      if (expected && !actual) {
+        failures += 1;
+      }
+    }
+
+    _failures += failures;
+
+    // We could stop iteration over the heap when we
+    // find the first violating region by returning true.
+    return false;
+  }
+};
+
+class G1ParVerifyFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  G1ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
+  uint    _n_workers;
+
+  BitMap* _expected_region_bm;
+  BitMap* _expected_card_bm;
+
+  int  _failures;
+
+  HeapRegionClaimer _hrclaimer;
+
+public:
+  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
+                            BitMap* region_bm, BitMap* card_bm,
+                            BitMap* expected_region_bm, BitMap* expected_card_bm)
+    : AbstractGangTask("G1 verify final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
+      _failures(0),
+      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
+    assert(VerifyDuringGC, "don't call this otherwise");
+    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
+    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
+  }
+
+  void work(uint worker_id) {
+    assert(worker_id < _n_workers, "invariant");
+
+    VerifyLiveObjectDataHRClosure verify_cl(_g1h,
+                                            _actual_region_bm, _actual_card_bm,
+                                            _expected_region_bm,
+                                            _expected_card_bm);
+
+    _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
+
+    Atomic::add(verify_cl.failures(), &_failures);
+  }
+
+  int failures() const { return _failures; }
+};
+
+// Closure that finalizes the liveness counting data.
+// Used during the cleanup pause.
+// Sets the bits corresponding to the interval [NTAMS, top]
+// (which contains the implicitly live objects) in the
+// card liveness bitmap. Also sets the bit for each region,
+// containing live data, in the region liveness bitmap.
+
+class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase {
+ public:
+  FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
+                              BitMap* region_bm,
+                              BitMap* card_bm) :
+    G1CMCountDataClosureBase(g1h, region_bm, card_bm) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* top   = hr->top();
+
+    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
+
+    // Mark the allocated-since-marking portion...
+    if (ntams < top) {
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
+
+      // Now set the bits in the card bitmap for [ntams, top)
+      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
+      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
+
+      // Note: if we're looking at the last region in heap - top
+      // could be actually just beyond the end of the heap; end_idx
+      // will then correspond to a (non-existent) card that is also
+      // just beyond the heap.
+      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
+        // end of object is not card aligned - increment to cover
+        // all the cards spanned by the object
+        end_idx += 1;
+      }
+
+      assert(end_idx <= _card_bm->size(),
+             "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
+             end_idx, _card_bm->size());
+      assert(start_idx < _card_bm->size(),
+             "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
+             start_idx, _card_bm->size());
+
+      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
+    }
+
+    // Set the bit for the region if it contains live data
+    if (hr->next_marked_bytes() > 0) {
+      set_bit_for_region(hr);
+    }
+
+    return false;
+  }
+};
+
+class G1ParFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  G1ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
+  uint    _n_workers;
+  HeapRegionClaimer _hrclaimer;
+
+public:
+  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
+    : AbstractGangTask("G1 final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
+  }
+
+  void work(uint worker_id) {
+    assert(worker_id < _n_workers, "invariant");
+
+    FinalCountDataUpdateClosure final_update_cl(_g1h,
+                                                _actual_region_bm,
+                                                _actual_card_bm);
+
+    _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
+  }
+};
+
+class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  size_t _freed_bytes;
+  FreeRegionList* _local_cleanup_list;
+  uint _old_regions_removed;
+  uint _humongous_regions_removed;
+  HRRSCleanupTask* _hrrs_cleanup_task;
+
+public:
+  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
+                             FreeRegionList* local_cleanup_list,
+                             HRRSCleanupTask* hrrs_cleanup_task) :
+    _g1(g1),
+    _freed_bytes(0),
+    _local_cleanup_list(local_cleanup_list),
+    _old_regions_removed(0),
+    _humongous_regions_removed(0),
+    _hrrs_cleanup_task(hrrs_cleanup_task) { }
+
+  size_t freed_bytes() { return _freed_bytes; }
+  const uint old_regions_removed() { return _old_regions_removed; }
+  const uint humongous_regions_removed() { return _humongous_regions_removed; }
+
+  bool doHeapRegion(HeapRegion *hr) {
+    if (hr->is_archive()) {
+      return false;
+    }
+    // We use a claim value of zero here because all regions
+    // were claimed with value 1 in the FinalCount task.
+    _g1->reset_gc_time_stamps(hr);
+    hr->note_end_of_marking();
+
+    if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
+      _freed_bytes += hr->used();
+      hr->set_containing_set(NULL);
+      if (hr->is_humongous()) {
+        _humongous_regions_removed++;
+        _g1->free_humongous_region(hr, _local_cleanup_list, true);
+      } else {
+        _old_regions_removed++;
+        _g1->free_region(hr, _local_cleanup_list, true);
+      }
+    } else {
+      hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
+    }
+
+    return false;
+  }
+};
+
+class G1ParNoteEndTask: public AbstractGangTask {
+  friend class G1NoteEndOfConcMarkClosure;
+
+protected:
+  G1CollectedHeap* _g1h;
+  FreeRegionList* _cleanup_list;
+  HeapRegionClaimer _hrclaimer;
+
+public:
+  G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
+      AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
+  }
+
+  void work(uint worker_id) {
+    FreeRegionList local_cleanup_list("Local Cleanup List");
+    HRRSCleanupTask hrrs_cleanup_task;
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
+                                           &hrrs_cleanup_task);
+    _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
+    assert(g1_note_end.complete(), "Shouldn't have yielded!");
+
+    // Now update the lists
+    _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
+    {
+      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
+
+      // If we iterate over the global cleanup list at the end of
+      // cleanup to do this printing we will not guarantee to only
+      // generate output for the newly-reclaimed regions (the list
+      // might not be empty at the beginning of cleanup; we might
+      // still be working on its previous contents). So we do the
+      // printing here, before we append the new regions to the global
+      // cleanup list.
+
+      G1HRPrinter* hr_printer = _g1h->hr_printer();
+      if (hr_printer->is_active()) {
+        FreeRegionListIterator iter(&local_cleanup_list);
+        while (iter.more_available()) {
+          HeapRegion* hr = iter.get_next();
+          hr_printer->cleanup(hr);
+        }
+      }
+
+      _cleanup_list->add_ordered(&local_cleanup_list);
+      assert(local_cleanup_list.is_empty(), "post-condition");
+
+      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
+    }
+  }
+};
+
+void G1ConcurrentMark::cleanup() {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
+    return;
+  }
+
+  g1h->verifier()->verify_region_sets_optional();
+
+  if (VerifyDuringGC) {
+    HandleMark hm;  // handle scope
+    g1h->prepare_for_verify();
+    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
+  }
+  g1h->verifier()->check_bitmaps("Cleanup Start");
+
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  g1p->record_concurrent_mark_cleanup_start();
+
+  double start = os::elapsedTime();
+
+  HeapRegionRemSet::reset_for_cleanup_tasks();
+
+  // Do counting once more with the world stopped for good measure.
+  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
+
+  g1h->workers()->run_task(&g1_par_count_task);
+
+  if (VerifyDuringGC) {
+    // Verify that the counting data accumulated during marking matches
+    // that calculated by walking the marking bitmap.
+
+    // Bitmaps to hold expected values
+    BitMap expected_region_bm(_region_bm.size(), true);
+    BitMap expected_card_bm(_card_bm.size(), true);
+
+    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
+                                                 &_region_bm,
+                                                 &_card_bm,
+                                                 &expected_region_bm,
+                                                 &expected_card_bm);
+
+    g1h->workers()->run_task(&g1_par_verify_task);
+
+    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
+  }
+
+  size_t start_used_bytes = g1h->used();
+  g1h->collector_state()->set_mark_in_progress(false);
+
+  double count_end = os::elapsedTime();
+  double this_final_counting_time = (count_end - start);
+  _total_counting_time += this_final_counting_time;
+
+  if (log_is_enabled(Trace, gc, liveness)) {
+    G1PrintRegionLivenessInfoClosure cl("Post-Marking");
+    _g1h->heap_region_iterate(&cl);
+  }
+
+  // Install newly created mark bitMap as "prev".
+  swapMarkBitMaps();
+
+  g1h->reset_gc_time_stamp();
+
+  uint n_workers = _g1h->workers()->active_workers();
+
+  // Note end of marking in all heap regions.
+  G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
+  g1h->workers()->run_task(&g1_par_note_end_task);
+  g1h->check_gc_time_stamps();
+
+  if (!cleanup_list_is_empty()) {
+    // The cleanup list is not empty, so we'll have to process it
+    // concurrently. Notify anyone else that might be wanting free
+    // regions that there will be more free regions coming soon.
+    g1h->set_free_regions_coming();
+  }
+
+  // call below, since it affects the metric by which we sort the heap
+  // regions.
+  if (G1ScrubRemSets) {
+    double rs_scrub_start = os::elapsedTime();
+    g1h->scrub_rem_set(&_region_bm, &_card_bm);
+    _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
+  }
+
+  // this will also free any regions totally full of garbage objects,
+  // and sort the regions.
+  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
+
+  // Statistics.
+  double end = os::elapsedTime();
+  _cleanup_times.add((end - start) * 1000.0);
+
+  // Clean up will have freed any regions completely full of garbage.
+  // Update the soft reference policy with the new heap occupancy.
+  Universe::update_heap_info_at_gc();
+
+  if (VerifyDuringGC) {
+    HandleMark hm;  // handle scope
+    g1h->prepare_for_verify();
+    Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
+  }
+
+  g1h->verifier()->check_bitmaps("Cleanup End");
+
+  g1h->verifier()->verify_region_sets_optional();
+
+  // We need to make this be a "collection" so any collection pause that
+  // races with it goes around and waits for completeCleanup to finish.
+  g1h->increment_total_collections();
+
+  // Clean out dead classes and update Metaspace sizes.
+  if (ClassUnloadingWithConcurrentMark) {
+    ClassLoaderDataGraph::purge();
+  }
+  MetaspaceGC::compute_new_size();
+
+  // We reclaimed old regions so we should calculate the sizes to make
+  // sure we update the old gen/space data.
+  g1h->g1mm()->update_sizes();
+  g1h->allocation_context_stats().update_after_mark();
+
+  g1h->trace_heap_after_concurrent_cycle();
+}
+
+void G1ConcurrentMark::completeCleanup() {
+  if (has_aborted()) return;
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  _cleanup_list.verify_optional();
+  FreeRegionList tmp_free_list("Tmp Free List");
+
+  log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
+                                  "cleanup list has %u entries",
+                                  _cleanup_list.length());
+
+  // No one else should be accessing the _cleanup_list at this point,
+  // so it is not necessary to take any locks
+  while (!_cleanup_list.is_empty()) {
+    HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
+    assert(hr != NULL, "Got NULL from a non-empty list");
+    hr->par_clear();
+    tmp_free_list.add_ordered(hr);
+
+    // Instead of adding one region at a time to the secondary_free_list,
+    // we accumulate them in the local list and move them a few at a
+    // time. This also cuts down on the number of notify_all() calls
+    // we do during this process. We'll also append the local list when
+    // _cleanup_list is empty (which means we just removed the last
+    // region from the _cleanup_list).
+    if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
+        _cleanup_list.is_empty()) {
+      log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
+                                      "appending %u entries to the secondary_free_list, "
+                                      "cleanup list still has %u entries",
+                                      tmp_free_list.length(),
+                                      _cleanup_list.length());
+
+      {
+        MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
+        g1h->secondary_free_list_add(&tmp_free_list);
+        SecondaryFreeList_lock->notify_all();
+      }
+#ifndef PRODUCT
+      if (G1StressConcRegionFreeing) {
+        for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
+          os::sleep(Thread::current(), (jlong) 1, false);
+        }
+      }
+#endif
+    }
+  }
+  assert(tmp_free_list.is_empty(), "post-condition");
+}
+
+// Supporting Object and Oop closures for reference discovery
+// and processing in during marking
+
+bool G1CMIsAliveClosure::do_object_b(oop obj) {
+  HeapWord* addr = (HeapWord*)obj;
+  return addr != NULL &&
+         (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
+}
+
+// 'Keep Alive' oop closure used by both serial parallel reference processing.
+// Uses the G1CMTask associated with a worker thread (for serial reference
+// processing the G1CMTask for worker 0 is used) to preserve (mark) and
+// trace referent objects.
+//
+// Using the G1CMTask and embedded local queues avoids having the worker
+// threads operating on the global mark stack. This reduces the risk
+// of overflowing the stack - which we would rather avoid at this late
+// state. Also using the tasks' local queues removes the potential
+// of the workers interfering with each other that could occur if
+// operating on the global stack.
+
+class G1CMKeepAliveAndDrainClosure: public OopClosure {
+  G1ConcurrentMark* _cm;
+  G1CMTask*         _task;
+  int               _ref_counter_limit;
+  int               _ref_counter;
+  bool              _is_serial;
+ public:
+  G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
+    _cm(cm), _task(task), _is_serial(is_serial),
+    _ref_counter_limit(G1RefProcDrainInterval) {
+    assert(_ref_counter_limit > 0, "sanity");
+    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
+    _ref_counter = _ref_counter_limit;
+  }
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T* p) {
+    if (!_cm->has_overflown()) {
+      oop obj = oopDesc::load_decode_heap_oop(p);
+      _task->deal_with_reference(obj);
+      _ref_counter--;
+
+      if (_ref_counter == 0) {
+        // We have dealt with _ref_counter_limit references, pushing them
+        // and objects reachable from them on to the local stack (and
+        // possibly the global stack). Call G1CMTask::do_marking_step() to
+        // process these entries.
+        //
+        // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
+        // there's nothing more to do (i.e. we're done with the entries that
+        // were pushed as a result of the G1CMTask::deal_with_reference() calls
+        // above) or we overflow.
+        //
+        // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
+        // flag while there may still be some work to do. (See the comment at
+        // the beginning of G1CMTask::do_marking_step() for those conditions -
+        // one of which is reaching the specified time target.) It is only
+        // when G1CMTask::do_marking_step() returns without setting the
+        // has_aborted() flag that the marking step has completed.
+        do {
+          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
+          _task->do_marking_step(mark_step_duration_ms,
+                                 false      /* do_termination */,
+                                 _is_serial);
+        } while (_task->has_aborted() && !_cm->has_overflown());
+        _ref_counter = _ref_counter_limit;
+      }
+    }
+  }
+};
+
+// 'Drain' oop closure used by both serial and parallel reference processing.
+// Uses the G1CMTask associated with a given worker thread (for serial
+// reference processing the G1CMtask for worker 0 is used). Calls the
+// do_marking_step routine, with an unbelievably large timeout value,
+// to drain the marking data structures of the remaining entries
+// added by the 'keep alive' oop closure above.
+
+class G1CMDrainMarkingStackClosure: public VoidClosure {
+  G1ConcurrentMark* _cm;
+  G1CMTask*         _task;
+  bool              _is_serial;
+ public:
+  G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
+    _cm(cm), _task(task), _is_serial(is_serial) {
+    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
+  }
+
+  void do_void() {
+    do {
+      // We call G1CMTask::do_marking_step() to completely drain the local
+      // and global marking stacks of entries pushed by the 'keep alive'
+      // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
+      //
+      // G1CMTask::do_marking_step() is called in a loop, which we'll exit
+      // if there's nothing more to do (i.e. we've completely drained the
+      // entries that were pushed as a a result of applying the 'keep alive'
+      // closure to the entries on the discovered ref lists) or we overflow
+      // the global marking stack.
+      //
+      // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
+      // flag while there may still be some work to do. (See the comment at
+      // the beginning of G1CMTask::do_marking_step() for those conditions -
+      // one of which is reaching the specified time target.) It is only
+      // when G1CMTask::do_marking_step() returns without setting the
+      // has_aborted() flag that the marking step has completed.
+
+      _task->do_marking_step(1000000000.0 /* something very large */,
+                             true         /* do_termination */,
+                             _is_serial);
+    } while (_task->has_aborted() && !_cm->has_overflown());
+  }
+};
+
+// Implementation of AbstractRefProcTaskExecutor for parallel
+// reference processing at the end of G1 concurrent marking
+
+class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
+private:
+  G1CollectedHeap*  _g1h;
+  G1ConcurrentMark* _cm;
+  WorkGang*         _workers;
+  uint              _active_workers;
+
+public:
+  G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
+                          G1ConcurrentMark* cm,
+                          WorkGang* workers,
+                          uint n_workers) :
+    _g1h(g1h), _cm(cm),
+    _workers(workers), _active_workers(n_workers) { }
+
+  // Executes the given task using concurrent marking worker threads.
+  virtual void execute(ProcessTask& task);
+  virtual void execute(EnqueueTask& task);
+};
+
+class G1CMRefProcTaskProxy: public AbstractGangTask {
+  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
+  ProcessTask&      _proc_task;
+  G1CollectedHeap*  _g1h;
+  G1ConcurrentMark* _cm;
+
+public:
+  G1CMRefProcTaskProxy(ProcessTask& proc_task,
+                       G1CollectedHeap* g1h,
+                       G1ConcurrentMark* cm) :
+    AbstractGangTask("Process reference objects in parallel"),
+    _proc_task(proc_task), _g1h(g1h), _cm(cm) {
+    ReferenceProcessor* rp = _g1h->ref_processor_cm();
+    assert(rp->processing_is_mt(), "shouldn't be here otherwise");
+  }
+
+  virtual void work(uint worker_id) {
+    ResourceMark rm;
+    HandleMark hm;
+    G1CMTask* task = _cm->task(worker_id);
+    G1CMIsAliveClosure g1_is_alive(_g1h);
+    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
+    G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
+
+    _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
+  }
+};
+
+void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
+  assert(_workers != NULL, "Need parallel worker threads.");
+  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
+
+  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
+
+  // We need to reset the concurrency level before each
+  // proxy task execution, so that the termination protocol
+  // and overflow handling in G1CMTask::do_marking_step() knows
+  // how many workers to wait for.
+  _cm->set_concurrency(_active_workers);
+  _workers->run_task(&proc_task_proxy);
+}
+
+class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
+  typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
+  EnqueueTask& _enq_task;
+
+public:
+  G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
+    AbstractGangTask("Enqueue reference objects in parallel"),
+    _enq_task(enq_task) { }
+
+  virtual void work(uint worker_id) {
+    _enq_task.work(worker_id);
+  }
+};
+
+void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
+  assert(_workers != NULL, "Need parallel worker threads.");
+  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
+
+  G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
+
+  // Not strictly necessary but...
+  //
+  // We need to reset the concurrency level before each
+  // proxy task execution, so that the termination protocol
+  // and overflow handling in G1CMTask::do_marking_step() knows
+  // how many workers to wait for.
+  _cm->set_concurrency(_active_workers);
+  _workers->run_task(&enq_task_proxy);
+}
+
+void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
+  G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
+}
+
+void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
+  if (has_overflown()) {
+    // Skip processing the discovered references if we have
+    // overflown the global marking stack. Reference objects
+    // only get discovered once so it is OK to not
+    // de-populate the discovered reference lists. We could have,
+    // but the only benefit would be that, when marking restarts,
+    // less reference objects are discovered.
+    return;
+  }
+
+  ResourceMark rm;
+  HandleMark   hm;
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // Is alive closure.
+  G1CMIsAliveClosure g1_is_alive(g1h);
+
+  // Inner scope to exclude the cleaning of the string and symbol
+  // tables from the displayed time.
+  {
+    GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm());
+
+    ReferenceProcessor* rp = g1h->ref_processor_cm();
+
+    // See the comment in G1CollectedHeap::ref_processing_init()
+    // about how reference processing currently works in G1.
+
+    // Set the soft reference policy
+    rp->setup_policy(clear_all_soft_refs);
+    assert(_markStack.isEmpty(), "mark stack should be empty");
+
+    // Instances of the 'Keep Alive' and 'Complete GC' closures used
+    // in serial reference processing. Note these closures are also
+    // used for serially processing (by the the current thread) the
+    // JNI references during parallel reference processing.
+    //
+    // These closures do not need to synchronize with the worker
+    // threads involved in parallel reference processing as these
+    // instances are executed serially by the current thread (e.g.
+    // reference processing is not multi-threaded and is thus
+    // performed by the current thread instead of a gang worker).
+    //
+    // The gang tasks involved in parallel reference processing create
+    // their own instances of these closures, which do their own
+    // synchronization among themselves.
+    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
+    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
+
+    // We need at least one active thread. If reference processing
+    // is not multi-threaded we use the current (VMThread) thread,
+    // otherwise we use the work gang from the G1CollectedHeap and
+    // we utilize all the worker threads we can.
+    bool processing_is_mt = rp->processing_is_mt();
+    uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
+    active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
+
+    // Parallel processing task executor.
+    G1CMRefProcTaskExecutor par_task_executor(g1h, this,
+                                              g1h->workers(), active_workers);
+    AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
+
+    // Set the concurrency level. The phase was already set prior to
+    // executing the remark task.
+    set_concurrency(active_workers);
+
+    // Set the degree of MT processing here.  If the discovery was done MT,
+    // the number of threads involved during discovery could differ from
+    // the number of active workers.  This is OK as long as the discovered
+    // Reference lists are balanced (see balance_all_queues() and balance_queues()).
+    rp->set_active_mt_degree(active_workers);
+
+    // Process the weak references.
+    const ReferenceProcessorStats& stats =
+        rp->process_discovered_references(&g1_is_alive,
+                                          &g1_keep_alive,
+                                          &g1_drain_mark_stack,
+                                          executor,
+                                          g1h->gc_timer_cm());
+    g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
+
+    // The do_oop work routines of the keep_alive and drain_marking_stack
+    // oop closures will set the has_overflown flag if we overflow the
+    // global marking stack.
+
+    assert(_markStack.overflow() || _markStack.isEmpty(),
+            "mark stack should be empty (unless it overflowed)");
+
+    if (_markStack.overflow()) {
+      // This should have been done already when we tried to push an
+      // entry on to the global mark stack. But let's do it again.
+      set_has_overflown();
+    }
+
+    assert(rp->num_q() == active_workers, "why not");
+
+    rp->enqueue_discovered_references(executor);
+
+    rp->verify_no_references_recorded();
+    assert(!rp->discovery_enabled(), "Post condition");
+  }
+
+  if (has_overflown()) {
+    // We can not trust g1_is_alive if the marking stack overflowed
+    return;
+  }
+
+  assert(_markStack.isEmpty(), "Marking should have completed");
+
+  // Unload Klasses, String, Symbols, Code Cache, etc.
+  {
+    GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
+
+    if (ClassUnloadingWithConcurrentMark) {
+      bool purged_classes;
+
+      {
+        GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
+        purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
+      }
+
+      {
+        GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
+        weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
+      }
+    }
+
+    if (G1StringDedup::is_enabled()) {
+      GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
+      G1StringDedup::unlink(&g1_is_alive);
+    }
+  }
+}
+
+void G1ConcurrentMark::swapMarkBitMaps() {
+  G1CMBitMapRO* temp = _prevMarkBitMap;
+  _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
+  _nextMarkBitMap    = (G1CMBitMap*)  temp;
+}
+
+// Closure for marking entries in SATB buffers.
+class G1CMSATBBufferClosure : public SATBBufferClosure {
+private:
+  G1CMTask* _task;
+  G1CollectedHeap* _g1h;
+
+  // This is very similar to G1CMTask::deal_with_reference, but with
+  // more relaxed requirements for the argument, so this must be more
+  // circumspect about treating the argument as an object.
+  void do_entry(void* entry) const {
+    _task->increment_refs_reached();
+    HeapRegion* hr = _g1h->heap_region_containing(entry);
+    if (entry < hr->next_top_at_mark_start()) {
+      // Until we get here, we don't know whether entry refers to a valid
+      // object; it could instead have been a stale reference.
+      oop obj = static_cast<oop>(entry);
+      assert(obj->is_oop(true /* ignore mark word */),
+             "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
+      _task->make_reference_grey(obj, hr);
+    }
+  }
+
+public:
+  G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
+    : _task(task), _g1h(g1h) { }
+
+  virtual void do_buffer(void** buffer, size_t size) {
+    for (size_t i = 0; i < size; ++i) {
+      do_entry(buffer[i]);
+    }
+  }
+};
+
+class G1RemarkThreadsClosure : public ThreadClosure {
+  G1CMSATBBufferClosure _cm_satb_cl;
+  G1CMOopClosure _cm_cl;
+  MarkingCodeBlobClosure _code_cl;
+  int _thread_parity;
+
+ public:
+  G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
+    _cm_satb_cl(task, g1h),
+    _cm_cl(g1h, g1h->concurrent_mark(), task),
+    _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
+    _thread_parity(Threads::thread_claim_parity()) {}
+
+  void do_thread(Thread* thread) {
+    if (thread->is_Java_thread()) {
+      if (thread->claim_oops_do(true, _thread_parity)) {
+        JavaThread* jt = (JavaThread*)thread;
+
+        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
+        // however the liveness of oops reachable from nmethods have very complex lifecycles:
+        // * Alive if on the stack of an executing method
+        // * Weakly reachable otherwise
+        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
+        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
+        jt->nmethods_do(&_code_cl);
+
+        jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
+      }
+    } else if (thread->is_VM_thread()) {
+      if (thread->claim_oops_do(true, _thread_parity)) {
+        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
+      }
+    }
+  }
+};
+
+class G1CMRemarkTask: public AbstractGangTask {
+private:
+  G1ConcurrentMark* _cm;
+public:
+  void work(uint worker_id) {
+    // Since all available tasks are actually started, we should
+    // only proceed if we're supposed to be active.
+    if (worker_id < _cm->active_tasks()) {
+      G1CMTask* task = _cm->task(worker_id);
+      task->record_start_time();
+      {
+        ResourceMark rm;
+        HandleMark hm;
+
+        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
+        Threads::threads_do(&threads_f);
+      }
+
+      do {
+        task->do_marking_step(1000000000.0 /* something very large */,
+                              true         /* do_termination       */,
+                              false        /* is_serial            */);
+      } while (task->has_aborted() && !_cm->has_overflown());
+      // If we overflow, then we do not want to restart. We instead
+      // want to abort remark and do concurrent marking again.
+      task->record_end_time();
+    }
+  }
+
+  G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
+    AbstractGangTask("Par Remark"), _cm(cm) {
+    _cm->terminator()->reset_for_reuse(active_workers);
+  }
+};
+
+void G1ConcurrentMark::checkpointRootsFinalWork() {
+  ResourceMark rm;
+  HandleMark   hm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
+
+  g1h->ensure_parsability(false);
+
+  // this is remark, so we'll use up all active threads
+  uint active_workers = g1h->workers()->active_workers();
+  set_concurrency_and_phase(active_workers, false /* concurrent */);
+  // Leave _parallel_marking_threads at it's
+  // value originally calculated in the G1ConcurrentMark
+  // constructor and pass values of the active workers
+  // through the gang in the task.
+
+  {
+    StrongRootsScope srs(active_workers);
+
+    G1CMRemarkTask remarkTask(this, active_workers);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    g1h->workers()->run_task(&remarkTask);
+  }
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  guarantee(has_overflown() ||
+            satb_mq_set.completed_buffers_num() == 0,
+            "Invariant: has_overflown = %s, num buffers = %d",
+            BOOL_TO_STR(has_overflown()),
+            satb_mq_set.completed_buffers_num());
+
+  print_stats();
+}
+
+void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((G1CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+}
+
+HeapRegion*
+G1ConcurrentMark::claim_region(uint worker_id) {
+  // "checkpoint" the finger
+  HeapWord* finger = _finger;
+
+  // _heap_end will not change underneath our feet; it only changes at
+  // yield points.
+  while (finger < _heap_end) {
+    assert(_g1h->is_in_g1_reserved(finger), "invariant");
+
+    HeapRegion* curr_region = _g1h->heap_region_containing(finger);
+
+    // Above heap_region_containing may return NULL as we always scan claim
+    // until the end of the heap. In this case, just jump to the next region.
+    HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
+
+    // Is the gap between reading the finger and doing the CAS too long?
+    HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
+    if (res == finger && curr_region != NULL) {
+      // we succeeded
+      HeapWord*   bottom        = curr_region->bottom();
+      HeapWord*   limit         = curr_region->next_top_at_mark_start();
+
+      // notice that _finger == end cannot be guaranteed here since,
+      // someone else might have moved the finger even further
+      assert(_finger >= end, "the finger should have moved forward");
+
+      if (limit > bottom) {
+        return curr_region;
+      } else {
+        assert(limit == bottom,
+               "the region limit should be at bottom");
+        // we return NULL and the caller should try calling
+        // claim_region() again.
+        return NULL;
+      }
+    } else {
+      assert(_finger > finger, "the finger should have moved forward");
+      // read it again
+      finger = _finger;
+    }
+  }
+
+  return NULL;
+}
+
+#ifndef PRODUCT
+class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
+private:
+  G1CollectedHeap* _g1h;
+  const char* _phase;
+  int _info;
+
+public:
+  VerifyNoCSetOops(const char* phase, int info = -1) :
+    _g1h(G1CollectedHeap::heap()),
+    _phase(phase),
+    _info(info)
+  { }
+
+  void operator()(oop obj) const {
+    guarantee(obj->is_oop(),
+              "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
+              p2i(obj), _phase, _info);
+    guarantee(!_g1h->obj_in_cs(obj),
+              "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
+              p2i(obj), _phase, _info);
+  }
+};
+
+void G1ConcurrentMark::verify_no_cset_oops() {
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+  if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
+    return;
+  }
+
+  // Verify entries on the global mark stack
+  _markStack.iterate(VerifyNoCSetOops("Stack"));
+
+  // Verify entries on the task queues
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    G1CMTaskQueue* queue = _task_queues->queue(i);
+    queue->iterate(VerifyNoCSetOops("Queue", i));
+  }
+
+  // Verify the global finger
+  HeapWord* global_finger = finger();
+  if (global_finger != NULL && global_finger < _heap_end) {
+    // Since we always iterate over all regions, we might get a NULL HeapRegion
+    // here.
+    HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
+    guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
+              "global finger: " PTR_FORMAT " region: " HR_FORMAT,
+              p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
+  }
+
+  // Verify the task fingers
+  assert(parallel_marking_threads() <= _max_worker_id, "sanity");
+  for (uint i = 0; i < parallel_marking_threads(); ++i) {
+    G1CMTask* task = _tasks[i];
+    HeapWord* task_finger = task->finger();
+    if (task_finger != NULL && task_finger < _heap_end) {
+      // See above note on the global finger verification.
+      HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
+      guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
+                !task_hr->in_collection_set(),
+                "task finger: " PTR_FORMAT " region: " HR_FORMAT,
+                p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
+    }
+  }
+}
+#endif // PRODUCT
+
+// Aggregate the counting data that was constructed concurrently
+// with marking.
+class AggregateCountDataHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  G1ConcurrentMark* _cm;
+  CardTableModRefBS* _ct_bs;
+  BitMap* _cm_card_bm;
+  uint _max_worker_id;
+
+ public:
+  AggregateCountDataHRClosure(G1CollectedHeap* g1h,
+                              BitMap* cm_card_bm,
+                              uint max_worker_id) :
+    _g1h(g1h), _cm(g1h->concurrent_mark()),
+    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
+    _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    HeapWord* start = hr->bottom();
+    HeapWord* limit = hr->next_top_at_mark_start();
+    HeapWord* end = hr->end();
+
+    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
+           "Preconditions not met - "
+           "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
+           "top: " PTR_FORMAT ", end: " PTR_FORMAT,
+           p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
+
+    assert(hr->next_marked_bytes() == 0, "Precondition");
+
+    if (start == limit) {
+      // NTAMS of this region has not been set so nothing to do.
+      return false;
+    }
+
+    // 'start' should be in the heap.
+    assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
+    // 'end' *may* be just beyond the end of the heap (if hr is the last region)
+    assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
+
+    // If ntams is not card aligned then we bump card bitmap index
+    // for limit so that we get the all the cards spanned by
+    // the object ending at ntams.
+    // Note: if this is the last region in the heap then ntams
+    // could be actually just beyond the end of the the heap;
+    // limit_idx will then  correspond to a (non-existent) card
+    // that is also outside the heap.
+    if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
+      limit_idx += 1;
+    }
+
+    assert(limit_idx <= end_idx, "or else use atomics");
+
+    // Aggregate the "stripe" in the count data associated with hr.
+    uint hrm_index = hr->hrm_index();
+    size_t marked_bytes = 0;
+
+    for (uint i = 0; i < _max_worker_id; i += 1) {
+      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
+      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
+
+      // Fetch the marked_bytes in this region for task i and
+      // add it to the running total for this region.
+      marked_bytes += marked_bytes_array[hrm_index];
+
+      // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
+      // into the global card bitmap.
+      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
+
+      while (scan_idx < limit_idx) {
+        assert(task_card_bm->at(scan_idx) == true, "should be");
+        _cm_card_bm->set_bit(scan_idx);
+        assert(_cm_card_bm->at(scan_idx) == true, "should be");
+
+        // BitMap::get_next_one_offset() can handle the case when
+        // its left_offset parameter is greater than its right_offset
+        // parameter. It does, however, have an early exit if
+        // left_offset == right_offset. So let's limit the value
+        // passed in for left offset here.
+        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
+        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
+      }
+    }
+
+    // Update the marked bytes for this region.
+    hr->add_to_marked_bytes(marked_bytes);
+
+    // Next heap region
+    return false;
+  }
+};
+
+class G1AggregateCountDataTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  G1ConcurrentMark* _cm;
+  BitMap* _cm_card_bm;
+  uint _max_worker_id;
+  uint _active_workers;
+  HeapRegionClaimer _hrclaimer;
+
+public:
+  G1AggregateCountDataTask(G1CollectedHeap* g1h,
+                           G1ConcurrentMark* cm,
+                           BitMap* cm_card_bm,
+                           uint max_worker_id,
+                           uint n_workers) :
+      AbstractGangTask("Count Aggregation"),
+      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
+      _max_worker_id(max_worker_id),
+      _active_workers(n_workers),
+      _hrclaimer(_active_workers) {
+  }
+
+  void work(uint worker_id) {
+    AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
+
+    _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
+  }
+};
+
+
+void G1ConcurrentMark::aggregate_count_data() {
+  uint n_workers = _g1h->workers()->active_workers();
+
+  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
+                                           _max_worker_id, n_workers);
+
+  _g1h->workers()->run_task(&g1_par_agg_task);
+}
+
+// Clear the per-worker arrays used to store the per-region counting data
+void G1ConcurrentMark::clear_all_count_data() {
+  // Clear the global card bitmap - it will be filled during
+  // liveness count aggregation (during remark) and the
+  // final counting task.
+  _card_bm.clear();
+
+  // Clear the global region bitmap - it will be filled as part
+  // of the final counting task.
+  _region_bm.clear();
+
+  uint max_regions = _g1h->max_regions();
+  assert(_max_worker_id > 0, "uninitialized");
+
+  for (uint i = 0; i < _max_worker_id; i += 1) {
+    BitMap* task_card_bm = count_card_bitmap_for(i);
+    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
+
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    assert(marked_bytes_array != NULL, "uninitialized");
+
+    memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
+    task_card_bm->clear();
+  }
+}
+
+void G1ConcurrentMark::print_stats() {
+  if (!log_is_enabled(Debug, gc, stats)) {
+    return;
+  }
+  log_debug(gc, stats)("---------------------------------------------------------------------");
+  for (size_t i = 0; i < _active_tasks; ++i) {
+    _tasks[i]->print_stats();
+    log_debug(gc, stats)("---------------------------------------------------------------------");
+  }
+}
+
+// abandon current marking iteration due to a Full GC
+void G1ConcurrentMark::abort() {
+  if (!cmThread()->during_cycle() || _has_aborted) {
+    // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
+    return;
+  }
+
+  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
+  // concurrent bitmap clearing.
+  _nextMarkBitMap->clearAll();
+
+  // Note we cannot clear the previous marking bitmap here
+  // since VerifyDuringGC verifies the objects marked during
+  // a full GC against the previous bitmap.
+
+  // Clear the liveness counting data
+  clear_all_count_data();
+  // Empty mark stack
+  reset_marking_state();
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    _tasks[i]->clear_region_fields();
+  }
+  _first_overflow_barrier_sync.abort();
+  _second_overflow_barrier_sync.abort();
+  _has_aborted = true;
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.abandon_partial_marking();
+  // This can be called either during or outside marking, we'll read
+  // the expected_active value from the SATB queue set.
+  satb_mq_set.set_active_all_threads(
+                                 false, /* new active value */
+                                 satb_mq_set.is_active() /* expected_active */);
+
+  _g1h->trace_heap_after_concurrent_cycle();
+
+  // Close any open concurrent phase timing
+  register_concurrent_phase_end();
+
+  _g1h->register_concurrent_cycle_end();
+}
+
+static void print_ms_time_info(const char* prefix, const char* name,
+                               NumberSeq& ns) {
+  log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
+                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
+  if (ns.num() > 0) {
+    log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
+                           prefix, ns.sd(), ns.maximum());
+  }
+}
+
+void G1ConcurrentMark::print_summary_info() {
+  LogHandle(gc, marking) log;
+  if (!log.is_trace()) {
+    return;
+  }
+
+  log.trace(" Concurrent marking:");
+  print_ms_time_info("  ", "init marks", _init_times);
+  print_ms_time_info("  ", "remarks", _remark_times);
+  {
+    print_ms_time_info("     ", "final marks", _remark_mark_times);
+    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
+
+  }
+  print_ms_time_info("  ", "cleanups", _cleanup_times);
+  log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
+            _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
+  if (G1ScrubRemSets) {
+    log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
+              _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
+  }
+  log.trace("  Total stop_world time = %8.2f s.",
+            (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
+  log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
+            cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
+}
+
+void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
+  _parallel_workers->print_worker_threads_on(st);
+}
+
+void G1ConcurrentMark::print_on_error(outputStream* st) const {
+  st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
+      p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
+  _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
+  _nextMarkBitMap->print_on_error(st, " Next Bits: ");
+}
+
+// We take a break if someone is trying to stop the world.
+bool G1ConcurrentMark::do_yield_check(uint worker_id) {
+  if (SuspendibleThreadSet::should_yield()) {
+    if (worker_id == 0) {
+      _g1h->g1_policy()->record_concurrent_pause();
+    }
+    SuspendibleThreadSet::yield();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Closure for iteration over bitmaps
+class G1CMBitMapClosure : public BitMapClosure {
+private:
+  // the bitmap that is being iterated over
+  G1CMBitMap*                 _nextMarkBitMap;
+  G1ConcurrentMark*           _cm;
+  G1CMTask*                   _task;
+
+public:
+  G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
+    _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
+
+  bool do_bit(size_t offset) {
+    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
+    assert(_nextMarkBitMap->isMarked(addr), "invariant");
+    assert( addr < _cm->finger(), "invariant");
+    assert(addr >= _task->finger(), "invariant");
+
+    // We move that task's local finger along.
+    _task->move_finger_to(addr);
+
+    _task->scan_object(oop(addr));
+    // we only partially drain the local queue and global stack
+    _task->drain_local_queue(true);
+    _task->drain_global_stack(true);
+
+    // if the has_aborted flag has been raised, we need to bail out of
+    // the iteration
+    return !_task->has_aborted();
+  }
+};
+
+static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
+  ReferenceProcessor* result = NULL;
+  if (G1UseConcMarkReferenceProcessing) {
+    result = g1h->ref_processor_cm();
+    assert(result != NULL, "should not be NULL");
+  }
+  return result;
+}
+
+G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
+                               G1ConcurrentMark* cm,
+                               G1CMTask* task)
+  : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
+    _g1h(g1h), _cm(cm), _task(task)
+{ }
+
+void G1CMTask::setup_for_region(HeapRegion* hr) {
+  assert(hr != NULL,
+        "claim_region() should have filtered out NULL regions");
+  _curr_region  = hr;
+  _finger       = hr->bottom();
+  update_region_limit();
+}
+
+void G1CMTask::update_region_limit() {
+  HeapRegion* hr            = _curr_region;
+  HeapWord* bottom          = hr->bottom();
+  HeapWord* limit           = hr->next_top_at_mark_start();
+
+  if (limit == bottom) {
+    // The region was collected underneath our feet.
+    // We set the finger to bottom to ensure that the bitmap
+    // iteration that will follow this will not do anything.
+    // (this is not a condition that holds when we set the region up,
+    // as the region is not supposed to be empty in the first place)
+    _finger = bottom;
+  } else if (limit >= _region_limit) {
+    assert(limit >= _finger, "peace of mind");
+  } else {
+    assert(limit < _region_limit, "only way to get here");
+    // This can happen under some pretty unusual circumstances.  An
+    // evacuation pause empties the region underneath our feet (NTAMS
+    // at bottom). We then do some allocation in the region (NTAMS
+    // stays at bottom), followed by the region being used as a GC
+    // alloc region (NTAMS will move to top() and the objects
+    // originally below it will be grayed). All objects now marked in
+    // the region are explicitly grayed, if below the global finger,
+    // and we do not need in fact to scan anything else. So, we simply
+    // set _finger to be limit to ensure that the bitmap iteration
+    // doesn't do anything.
+    _finger = limit;
+  }
+
+  _region_limit = limit;
+}
+
+void G1CMTask::giveup_current_region() {
+  assert(_curr_region != NULL, "invariant");
+  clear_region_fields();
+}
+
+void G1CMTask::clear_region_fields() {
+  // Values for these three fields that indicate that we're not
+  // holding on to a region.
+  _curr_region   = NULL;
+  _finger        = NULL;
+  _region_limit  = NULL;
+}
+
+void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
+  if (cm_oop_closure == NULL) {
+    assert(_cm_oop_closure != NULL, "invariant");
+  } else {
+    assert(_cm_oop_closure == NULL, "invariant");
+  }
+  _cm_oop_closure = cm_oop_closure;
+}
+
+void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
+  guarantee(nextMarkBitMap != NULL, "invariant");
+  _nextMarkBitMap                = nextMarkBitMap;
+  clear_region_fields();
+
+  _calls                         = 0;
+  _elapsed_time_ms               = 0.0;
+  _termination_time_ms           = 0.0;
+  _termination_start_time_ms     = 0.0;
+}
+
+bool G1CMTask::should_exit_termination() {
+  regular_clock_call();
+  // This is called when we are in the termination protocol. We should
+  // quit if, for some reason, this task wants to abort or the global
+  // stack is not empty (this means that we can get work from it).
+  return !_cm->mark_stack_empty() || has_aborted();
+}
+
+void G1CMTask::reached_limit() {
+  assert(_words_scanned >= _words_scanned_limit ||
+         _refs_reached >= _refs_reached_limit ,
+         "shouldn't have been called otherwise");
+  regular_clock_call();
+}
+
+void G1CMTask::regular_clock_call() {
+  if (has_aborted()) return;
+
+  // First, we need to recalculate the words scanned and refs reached
+  // limits for the next clock call.
+  recalculate_limits();
+
+  // During the regular clock call we do the following
+
+  // (1) If an overflow has been flagged, then we abort.
+  if (_cm->has_overflown()) {
+    set_has_aborted();
+    return;
+  }
+
+  // If we are not concurrent (i.e. we're doing remark) we don't need
+  // to check anything else. The other steps are only needed during
+  // the concurrent marking phase.
+  if (!concurrent()) return;
+
+  // (2) If marking has been aborted for Full GC, then we also abort.
+  if (_cm->has_aborted()) {
+    set_has_aborted();
+    return;
+  }
+
+  double curr_time_ms = os::elapsedVTime() * 1000.0;
+
+  // (4) We check whether we should yield. If we have to, then we abort.
+  if (SuspendibleThreadSet::should_yield()) {
+    // We should yield. To do this we abort the task. The caller is
+    // responsible for yielding.
+    set_has_aborted();
+    return;
+  }
+
+  // (5) We check whether we've reached our time quota. If we have,
+  // then we abort.
+  double elapsed_time_ms = curr_time_ms - _start_time_ms;
+  if (elapsed_time_ms > _time_target_ms) {
+    set_has_aborted();
+    _has_timed_out = true;
+    return;
+  }
+
+  // (6) Finally, we check whether there are enough completed STAB
+  // buffers available for processing. If there are, we abort.
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
+    // we do need to process SATB buffers, we'll abort and restart
+    // the marking task to do so
+    set_has_aborted();
+    return;
+  }
+}
+
+void G1CMTask::recalculate_limits() {
+  _real_words_scanned_limit = _words_scanned + words_scanned_period;
+  _words_scanned_limit      = _real_words_scanned_limit;
+
+  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
+  _refs_reached_limit       = _real_refs_reached_limit;
+}
+
+void G1CMTask::decrease_limits() {
+  // This is called when we believe that we're going to do an infrequent
+  // operation which will increase the per byte scanned cost (i.e. move
+  // entries to/from the global stack). It basically tries to decrease the
+  // scanning limit so that the clock is called earlier.
+
+  _words_scanned_limit = _real_words_scanned_limit -
+    3 * words_scanned_period / 4;
+  _refs_reached_limit  = _real_refs_reached_limit -
+    3 * refs_reached_period / 4;
+}
+
+void G1CMTask::move_entries_to_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the local queue
+  oop buffer[global_stack_transfer_size];
+
+  int n = 0;
+  oop obj;
+  while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
+    buffer[n] = obj;
+    ++n;
+  }
+
+  if (n > 0) {
+    // we popped at least one entry from the local queue
+
+    if (!_cm->mark_stack_push(buffer, n)) {
+      set_has_aborted();
+    }
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void G1CMTask::get_entries_from_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the global stack.
+  oop buffer[global_stack_transfer_size];
+  int n;
+  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
+  assert(n <= global_stack_transfer_size,
+         "we should not pop more than the given limit");
+  if (n > 0) {
+    // yes, we did actually pop at least one entry
+    for (int i = 0; i < n; ++i) {
+      bool success = _task_queue->push(buffer[i]);
+      // We only call this when the local queue is empty or under a
+      // given target limit. So, we do not expect this push to fail.
+      assert(success, "invariant");
+    }
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void G1CMTask::drain_local_queue(bool partially) {
+  if (has_aborted()) return;
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).
+  size_t target_size;
+  if (partially) {
+    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
+  } else {
+    target_size = 0;
+  }
+
+  if (_task_queue->size() > target_size) {
+    oop obj;
+    bool ret = _task_queue->pop_local(obj);
+    while (ret) {
+      assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
+      assert(!_g1h->is_on_master_free_list(
+                  _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
+
+      scan_object(obj);
+
+      if (_task_queue->size() <= target_size || has_aborted()) {
+        ret = false;
+      } else {
+        ret = _task_queue->pop_local(obj);
+      }
+    }
+  }
+}
+
+void G1CMTask::drain_global_stack(bool partially) {
+  if (has_aborted()) return;
+
+  // We have a policy to drain the local queue before we attempt to
+  // drain the global stack.
+  assert(partially || _task_queue->size() == 0, "invariant");
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).  Notice that,
+  // because we move entries from the global stack in chunks or
+  // because another task might be doing the same, we might in fact
+  // drop below the target. But, this is not a problem.
+  size_t target_size;
+  if (partially) {
+    target_size = _cm->partial_mark_stack_size_target();
+  } else {
+    target_size = 0;
+  }
+
+  if (_cm->mark_stack_size() > target_size) {
+    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
+      get_entries_from_global_stack();
+      drain_local_queue(partially);
+    }
+  }
+}
+
+// SATB Queue has several assumptions on whether to call the par or
+// non-par versions of the methods. this is why some of the code is
+// replicated. We should really get rid of the single-threaded version
+// of the code to simplify things.
+void G1CMTask::drain_satb_buffers() {
+  if (has_aborted()) return;
+
+  // We set this so that the regular clock knows that we're in the
+  // middle of draining buffers and doesn't set the abort flag when it
+  // notices that SATB buffers are available for draining. It'd be
+  // very counter productive if it did that. :-)
+  _draining_satb_buffers = true;
+
+  G1CMSATBBufferClosure satb_cl(this, _g1h);
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+
+  // This keeps claiming and applying the closure to completed buffers
+  // until we run out of buffers or we need to abort.
+  while (!has_aborted() &&
+         satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
+    regular_clock_call();
+  }
+
+  _draining_satb_buffers = false;
+
+  assert(has_aborted() ||
+         concurrent() ||
+         satb_mq_set.completed_buffers_num() == 0, "invariant");
+
+  // again, this was a potentially expensive operation, decrease the
+  // limits to get the regular clock call early
+  decrease_limits();
+}
+
+void G1CMTask::print_stats() {
+  log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
+                       _worker_id, _calls);
+  log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
+                       _elapsed_time_ms, _termination_time_ms);
+  log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
+                       _step_times_ms.num(), _step_times_ms.avg(),
+                       _step_times_ms.sd());
+  log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
+                       _step_times_ms.maximum(), _step_times_ms.sum());
+}
+
+bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
+  return _task_queues->steal(worker_id, hash_seed, obj);
+}
+
+/*****************************************************************************
+
+    The do_marking_step(time_target_ms, ...) method is the building
+    block of the parallel marking framework. It can be called in parallel
+    with other invocations of do_marking_step() on different tasks
+    (but only one per task, obviously) and concurrently with the
+    mutator threads, or during remark, hence it eliminates the need
+    for two versions of the code. When called during remark, it will
+    pick up from where the task left off during the concurrent marking
+    phase. Interestingly, tasks are also claimable during evacuation
+    pauses too, since do_marking_step() ensures that it aborts before
+    it needs to yield.
+
+    The data structures that it uses to do marking work are the
+    following:
+
+      (1) Marking Bitmap. If there are gray objects that appear only
+      on the bitmap (this happens either when dealing with an overflow
+      or when the initial marking phase has simply marked the roots
+      and didn't push them on the stack), then tasks claim heap
+      regions whose bitmap they then scan to find gray objects. A
+      global finger indicates where the end of the last claimed region
+      is. A local finger indicates how far into the region a task has
+      scanned. The two fingers are used to determine how to gray an
+      object (i.e. whether simply marking it is OK, as it will be
+      visited by a task in the future, or whether it needs to be also
+      pushed on a stack).
+
+      (2) Local Queue. The local queue of the task which is accessed
+      reasonably efficiently by the task. Other tasks can steal from
+      it when they run out of work. Throughout the marking phase, a
+      task attempts to keep its local queue short but not totally
+      empty, so that entries are available for stealing by other
+      tasks. Only when there is no more work, a task will totally
+      drain its local queue.
+
+      (3) Global Mark Stack. This handles local queue overflow. During
+      marking only sets of entries are moved between it and the local
+      queues, as access to it requires a mutex and more fine-grain
+      interaction with it which might cause contention. If it
+      overflows, then the marking phase should restart and iterate
+      over the bitmap to identify gray objects. Throughout the marking
+      phase, tasks attempt to keep the global mark stack at a small
+      length but not totally empty, so that entries are available for
+      popping by other tasks. Only when there is no more work, tasks
+      will totally drain the global mark stack.
+
+      (4) SATB Buffer Queue. This is where completed SATB buffers are
+      made available. Buffers are regularly removed from this queue
+      and scanned for roots, so that the queue doesn't get too
+      long. During remark, all completed buffers are processed, as
+      well as the filled in parts of any uncompleted buffers.
+
+    The do_marking_step() method tries to abort when the time target
+    has been reached. There are a few other cases when the
+    do_marking_step() method also aborts:
+
+      (1) When the marking phase has been aborted (after a Full GC).
+
+      (2) When a global overflow (on the global stack) has been
+      triggered. Before the task aborts, it will actually sync up with
+      the other tasks to ensure that all the marking data structures
+      (local queues, stacks, fingers etc.)  are re-initialized so that
+      when do_marking_step() completes, the marking phase can
+      immediately restart.
+
+      (3) When enough completed SATB buffers are available. The
+      do_marking_step() method only tries to drain SATB buffers right
+      at the beginning. So, if enough buffers are available, the
+      marking step aborts and the SATB buffers are processed at
+      the beginning of the next invocation.
+
+      (4) To yield. when we have to yield then we abort and yield
+      right at the end of do_marking_step(). This saves us from a lot
+      of hassle as, by yielding we might allow a Full GC. If this
+      happens then objects will be compacted underneath our feet, the
+      heap might shrink, etc. We save checking for this by just
+      aborting and doing the yield right at the end.
+
+    From the above it follows that the do_marking_step() method should
+    be called in a loop (or, otherwise, regularly) until it completes.
+
+    If a marking step completes without its has_aborted() flag being
+    true, it means it has completed the current marking phase (and
+    also all other marking tasks have done so and have all synced up).
+
+    A method called regular_clock_call() is invoked "regularly" (in
+    sub ms intervals) throughout marking. It is this clock method that
+    checks all the abort conditions which were mentioned above and
+    decides when the task should abort. A work-based scheme is used to
+    trigger this clock method: when the number of object words the
+    marking phase has scanned or the number of references the marking
+    phase has visited reach a given limit. Additional invocations to
+    the method clock have been planted in a few other strategic places
+    too. The initial reason for the clock method was to avoid calling
+    vtime too regularly, as it is quite expensive. So, once it was in
+    place, it was natural to piggy-back all the other conditions on it
+    too and not constantly check them throughout the code.
+
+    If do_termination is true then do_marking_step will enter its
+    termination protocol.
+
+    The value of is_serial must be true when do_marking_step is being
+    called serially (i.e. by the VMThread) and do_marking_step should
+    skip any synchronization in the termination and overflow code.
+    Examples include the serial remark code and the serial reference
+    processing closures.
+
+    The value of is_serial must be false when do_marking_step is
+    being called by any of the worker threads in a work gang.
+    Examples include the concurrent marking code (CMMarkingTask),
+    the MT remark code, and the MT reference processing closures.
+
+ *****************************************************************************/
+
+void G1CMTask::do_marking_step(double time_target_ms,
+                               bool do_termination,
+                               bool is_serial) {
+  assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
+  assert(concurrent() == _cm->concurrent(), "they should be the same");
+
+  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
+  assert(_task_queues != NULL, "invariant");
+  assert(_task_queue != NULL, "invariant");
+  assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
+
+  assert(!_claimed,
+         "only one thread should claim this task at any one time");
+
+  // OK, this doesn't safeguard again all possible scenarios, as it is
+  // possible for two threads to set the _claimed flag at the same
+  // time. But it is only for debugging purposes anyway and it will
+  // catch most problems.
+  _claimed = true;
+
+  _start_time_ms = os::elapsedVTime() * 1000.0;
+
+  // If do_stealing is true then do_marking_step will attempt to
+  // steal work from the other G1CMTasks. It only makes sense to
+  // enable stealing when the termination protocol is enabled
+  // and do_marking_step() is not being called serially.
+  bool do_stealing = do_termination && !is_serial;
+
+  double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
+  _time_target_ms = time_target_ms - diff_prediction_ms;
+
+  // set up the variables that are used in the work-based scheme to
+  // call the regular clock method
+  _words_scanned = 0;
+  _refs_reached  = 0;
+  recalculate_limits();
+
+  // clear all flags
+  clear_has_aborted();
+  _has_timed_out = false;
+  _draining_satb_buffers = false;
+
+  ++_calls;
+
+  // Set up the bitmap and oop closures. Anything that uses them is
+  // eventually called from this method, so it is OK to allocate these
+  // statically.
+  G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
+  G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
+  set_cm_oop_closure(&cm_oop_closure);
+
+  if (_cm->has_overflown()) {
+    // This can happen if the mark stack overflows during a GC pause
+    // and this task, after a yield point, restarts. We have to abort
+    // as we need to get into the overflow protocol which happens
+    // right at the end of this task.
+    set_has_aborted();
+  }
+
+  // First drain any available SATB buffers. After this, we will not
+  // look at SATB buffers before the next invocation of this method.
+  // If enough completed SATB buffers are queued up, the regular clock
+  // will abort this task so that it restarts.
+  drain_satb_buffers();
+  // ...then partially drain the local queue and the global stack
+  drain_local_queue(true);
+  drain_global_stack(true);
+
+  do {
+    if (!has_aborted() && _curr_region != NULL) {
+      // This means that we're already holding on to a region.
+      assert(_finger != NULL, "if region is not NULL, then the finger "
+             "should not be NULL either");
+
+      // We might have restarted this task after an evacuation pause
+      // which might have evacuated the region we're holding on to
+      // underneath our feet. Let's read its limit again to make sure
+      // that we do not iterate over a region of the heap that
+      // contains garbage (update_region_limit() will also move
+      // _finger to the start of the region if it is found empty).
+      update_region_limit();
+      // We will start from _finger not from the start of the region,
+      // as we might be restarting this task after aborting half-way
+      // through scanning this region. In this case, _finger points to
+      // the address where we last found a marked object. If this is a
+      // fresh region, _finger points to start().
+      MemRegion mr = MemRegion(_finger, _region_limit);
+
+      assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
+             "humongous regions should go around loop once only");
+
+      // Some special cases:
+      // If the memory region is empty, we can just give up the region.
+      // If the current region is humongous then we only need to check
+      // the bitmap for the bit associated with the start of the object,
+      // scan the object if it's live, and give up the region.
+      // Otherwise, let's iterate over the bitmap of the part of the region
+      // that is left.
+      // If the iteration is successful, give up the region.
+      if (mr.is_empty()) {
+        giveup_current_region();
+        regular_clock_call();
+      } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
+        if (_nextMarkBitMap->isMarked(mr.start())) {
+          // The object is marked - apply the closure
+          BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
+          bitmap_closure.do_bit(offset);
+        }
+        // Even if this task aborted while scanning the humongous object
+        // we can (and should) give up the current region.
+        giveup_current_region();
+        regular_clock_call();
+      } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
+        giveup_current_region();
+        regular_clock_call();
+      } else {
+        assert(has_aborted(), "currently the only way to do so");
+        // The only way to abort the bitmap iteration is to return
+        // false from the do_bit() method. However, inside the
+        // do_bit() method we move the _finger to point to the
+        // object currently being looked at. So, if we bail out, we
+        // have definitely set _finger to something non-null.
+        assert(_finger != NULL, "invariant");
+
+        // Region iteration was actually aborted. So now _finger
+        // points to the address of the object we last scanned. If we
+        // leave it there, when we restart this task, we will rescan
+        // the object. It is easy to avoid this. We move the finger by
+        // enough to point to the next possible object header (the
+        // bitmap knows by how much we need to move it as it knows its
+        // granularity).
+        assert(_finger < _region_limit, "invariant");
+        HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
+        // Check if bitmap iteration was aborted while scanning the last object
+        if (new_finger >= _region_limit) {
+          giveup_current_region();
+        } else {
+          move_finger_to(new_finger);
+        }
+      }
+    }
+    // At this point we have either completed iterating over the
+    // region we were holding on to, or we have aborted.
+
+    // We then partially drain the local queue and the global stack.
+    // (Do we really need this?)
+    drain_local_queue(true);
+    drain_global_stack(true);
+
+    // Read the note on the claim_region() method on why it might
+    // return NULL with potentially more regions available for
+    // claiming and why we have to check out_of_regions() to determine
+    // whether we're done or not.
+    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
+      // We are going to try to claim a new region. We should have
+      // given up on the previous one.
+      // Separated the asserts so that we know which one fires.
+      assert(_curr_region  == NULL, "invariant");
+      assert(_finger       == NULL, "invariant");
+      assert(_region_limit == NULL, "invariant");
+      HeapRegion* claimed_region = _cm->claim_region(_worker_id);
+      if (claimed_region != NULL) {
+        // Yes, we managed to claim one
+        setup_for_region(claimed_region);
+        assert(_curr_region == claimed_region, "invariant");
+      }
+      // It is important to call the regular clock here. It might take
+      // a while to claim a region if, for example, we hit a large
+      // block of empty regions. So we need to call the regular clock
+      // method once round the loop to make sure it's called
+      // frequently enough.
+      regular_clock_call();
+    }
+
+    if (!has_aborted() && _curr_region == NULL) {
+      assert(_cm->out_of_regions(),
+             "at this point we should be out of regions");
+    }
+  } while ( _curr_region != NULL && !has_aborted());
+
+  if (!has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently.
+    assert(_cm->out_of_regions(),
+           "at this point we should be out of regions");
+    // Try to reduce the number of available SATB buffers so that
+    // remark has less work to do.
+    drain_satb_buffers();
+  }
+
+  // Since we've done everything else, we can now totally drain the
+  // local queue and global stack.
+  drain_local_queue(false);
+  drain_global_stack(false);
+
+  // Attempt at work stealing from other task's queues.
+  if (do_stealing && !has_aborted()) {
+    // We have not aborted. This means that we have finished all that
+    // we could. Let's try to do some stealing...
+
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently.
+    assert(_cm->out_of_regions() && _task_queue->size() == 0,
+           "only way to reach here");
+    while (!has_aborted()) {
+      oop obj;
+      if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
+        assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
+               "any stolen object should be marked");
+        scan_object(obj);
+
+        // And since we're towards the end, let's totally drain the
+        // local queue and global stack.
+        drain_local_queue(false);
+        drain_global_stack(false);
+      } else {
+        break;
+      }
+    }
+  }
+
+  // We still haven't aborted. Now, let's try to get into the
+  // termination protocol.
+  if (do_termination && !has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be concurrently pushing objects on it.
+    // Separated the asserts so that we know which one fires.
+    assert(_cm->out_of_regions(), "only way to reach here");
+    assert(_task_queue->size() == 0, "only way to reach here");
+    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
+
+    // The G1CMTask class also extends the TerminatorTerminator class,
+    // hence its should_exit_termination() method will also decide
+    // whether to exit the termination protocol or not.
+    bool finished = (is_serial ||
+                     _cm->terminator()->offer_termination(this));
+    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
+    _termination_time_ms +=
+      termination_end_time_ms - _termination_start_time_ms;
+
+    if (finished) {
+      // We're all done.
+
+      if (_worker_id == 0) {
+        // let's allow task 0 to do this
+        if (concurrent()) {
+          assert(_cm->concurrent_marking_in_progress(), "invariant");
+          // we need to set this to false before the next
+          // safepoint. This way we ensure that the marking phase
+          // doesn't observe any more heap expansions.
+          _cm->clear_concurrent_marking_in_progress();
+        }
+      }
+
+      // We can now guarantee that the global stack is empty, since
+      // all other tasks have finished. We separated the guarantees so
+      // that, if a condition is false, we can immediately find out
+      // which one.
+      guarantee(_cm->out_of_regions(), "only way to reach here");
+      guarantee(_cm->mark_stack_empty(), "only way to reach here");
+      guarantee(_task_queue->size() == 0, "only way to reach here");
+      guarantee(!_cm->has_overflown(), "only way to reach here");
+      guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
+    } else {
+      // Apparently there's more work to do. Let's abort this task. It
+      // will restart it and we can hopefully find more things to do.
+      set_has_aborted();
+    }
+  }
+
+  // Mainly for debugging purposes to make sure that a pointer to the
+  // closure which was statically allocated in this frame doesn't
+  // escape it by accident.
+  set_cm_oop_closure(NULL);
+  double end_time_ms = os::elapsedVTime() * 1000.0;
+  double elapsed_time_ms = end_time_ms - _start_time_ms;
+  // Update the step history.
+  _step_times_ms.add(elapsed_time_ms);
+
+  if (has_aborted()) {
+    // The task was aborted for some reason.
+    if (_has_timed_out) {
+      double diff_ms = elapsed_time_ms - _time_target_ms;
+      // Keep statistics of how well we did with respect to hitting
+      // our target only if we actually timed out (if we aborted for
+      // other reasons, then the results might get skewed).
+      _marking_step_diffs_ms.add(diff_ms);
+    }
+
+    if (_cm->has_overflown()) {
+      // This is the interesting one. We aborted because a global
+      // overflow was raised. This means we have to restart the
+      // marking phase and start iterating over regions. However, in
+      // order to do this we have to make sure that all tasks stop
+      // what they are doing and re-initialize in a safe manner. We
+      // will achieve this with the use of two barrier sync points.
+
+      if (!is_serial) {
+        // We only need to enter the sync barrier if being called
+        // from a parallel context
+        _cm->enter_first_sync_barrier(_worker_id);
+
+        // When we exit this sync barrier we know that all tasks have
+        // stopped doing marking work. So, it's now safe to
+        // re-initialize our data structures. At the end of this method,
+        // task 0 will clear the global data structures.
+      }
+
+      // We clear the local state of this task...
+      clear_region_fields();
+
+      if (!is_serial) {
+        // ...and enter the second barrier.
+        _cm->enter_second_sync_barrier(_worker_id);
+      }
+      // At this point, if we're during the concurrent phase of
+      // marking, everything has been re-initialized and we're
+      // ready to restart.
+    }
+  }
+
+  _claimed = false;
+}
+
+G1CMTask::G1CMTask(uint worker_id,
+                   G1ConcurrentMark* cm,
+                   size_t* marked_bytes,
+                   BitMap* card_bm,
+                   G1CMTaskQueue* task_queue,
+                   G1CMTaskQueueSet* task_queues)
+  : _g1h(G1CollectedHeap::heap()),
+    _worker_id(worker_id), _cm(cm),
+    _claimed(false),
+    _nextMarkBitMap(NULL), _hash_seed(17),
+    _task_queue(task_queue),
+    _task_queues(task_queues),
+    _cm_oop_closure(NULL),
+    _marked_bytes_array(marked_bytes),
+    _card_bm(card_bm) {
+  guarantee(task_queue != NULL, "invariant");
+  guarantee(task_queues != NULL, "invariant");
+
+  _marking_step_diffs_ms.add(0.5);
+}
+
+// These are formatting macros that are used below to ensure
+// consistent formatting. The *_H_* versions are used to format the
+// header for a particular value and they should be kept consistent
+// with the corresponding macro. Also note that most of the macros add
+// the necessary white space (as a prefix) which makes them a bit
+// easier to compose.
+
+// All the output lines are prefixed with this string to be able to
+// identify them easily in a large log file.
+#define G1PPRL_LINE_PREFIX            "###"
+
+#define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
+#ifdef _LP64
+#define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
+#else // _LP64
+#define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
+#endif // _LP64
+
+// For per-region info
+#define G1PPRL_TYPE_FORMAT            "   %-4s"
+#define G1PPRL_TYPE_H_FORMAT          "   %4s"
+#define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
+#define G1PPRL_BYTE_H_FORMAT          "  %9s"
+#define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
+#define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
+
+// For summary info
+#define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
+#define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
+#define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
+#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
+
+G1PrintRegionLivenessInfoClosure::
+G1PrintRegionLivenessInfoClosure(const char* phase_name)
+  : _total_used_bytes(0), _total_capacity_bytes(0),
+    _total_prev_live_bytes(0), _total_next_live_bytes(0),
+    _hum_used_bytes(0), _hum_capacity_bytes(0),
+    _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
+    _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  MemRegion g1_reserved = g1h->g1_reserved();
+  double now = os::elapsedTime();
+
+  // Print the header of the output.
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
+                          G1PPRL_SUM_ADDR_FORMAT("reserved")
+                          G1PPRL_SUM_BYTE_FORMAT("region-size"),
+                          p2i(g1_reserved.start()), p2i(g1_reserved.end()),
+                          HeapRegion::GrainBytes);
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
+                          G1PPRL_TYPE_H_FORMAT
+                          G1PPRL_ADDR_BASE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_DOUBLE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT,
+                          "type", "address-range",
+                          "used", "prev-live", "next-live", "gc-eff",
+                          "remset", "code-roots");
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
+                          G1PPRL_TYPE_H_FORMAT
+                          G1PPRL_ADDR_BASE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_DOUBLE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT
+                          G1PPRL_BYTE_H_FORMAT,
+                          "", "",
+                          "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
+                          "(bytes)", "(bytes)");
+}
+
+// It takes as a parameter a reference to one of the _hum_* fields, it
+// deduces the corresponding value for a region in a humongous region
+// series (either the region size, or what's left if the _hum_* field
+// is < the region size), and updates the _hum_* field accordingly.
+size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
+  size_t bytes = 0;
+  // The > 0 check is to deal with the prev and next live bytes which
+  // could be 0.
+  if (*hum_bytes > 0) {
+    bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
+    *hum_bytes -= bytes;
+  }
+  return bytes;
+}
+
+// It deduces the values for a region in a humongous region series
+// from the _hum_* fields and updates those accordingly. It assumes
+// that that _hum_* fields have already been set up from the "starts
+// humongous" region and we visit the regions in address order.
+void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
+                                                     size_t* capacity_bytes,
+                                                     size_t* prev_live_bytes,
+                                                     size_t* next_live_bytes) {
+  assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
+  *used_bytes      = get_hum_bytes(&_hum_used_bytes);
+  *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
+  *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
+  *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
+}
+
+bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
+  const char* type       = r->get_type_str();
+  HeapWord* bottom       = r->bottom();
+  HeapWord* end          = r->end();
+  size_t capacity_bytes  = r->capacity();
+  size_t used_bytes      = r->used();
+  size_t prev_live_bytes = r->live_bytes();
+  size_t next_live_bytes = r->next_live_bytes();
+  double gc_eff          = r->gc_efficiency();
+  size_t remset_bytes    = r->rem_set()->mem_size();
+  size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
+
+  if (r->is_starts_humongous()) {
+    assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
+           _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
+           "they should have been zeroed after the last time we used them");
+    // Set up the _hum_* fields.
+    _hum_capacity_bytes  = capacity_bytes;
+    _hum_used_bytes      = used_bytes;
+    _hum_prev_live_bytes = prev_live_bytes;
+    _hum_next_live_bytes = next_live_bytes;
+    get_hum_bytes(&used_bytes, &capacity_bytes,
+                  &prev_live_bytes, &next_live_bytes);
+    end = bottom + HeapRegion::GrainWords;
+  } else if (r->is_continues_humongous()) {
+    get_hum_bytes(&used_bytes, &capacity_bytes,
+                  &prev_live_bytes, &next_live_bytes);
+    assert(end == bottom + HeapRegion::GrainWords, "invariant");
+  }
+
+  _total_used_bytes      += used_bytes;
+  _total_capacity_bytes  += capacity_bytes;
+  _total_prev_live_bytes += prev_live_bytes;
+  _total_next_live_bytes += next_live_bytes;
+  _total_remset_bytes    += remset_bytes;
+  _total_strong_code_roots_bytes += strong_code_roots_bytes;
+
+  // Print a line for this particular region.
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
+                          G1PPRL_TYPE_FORMAT
+                          G1PPRL_ADDR_BASE_FORMAT
+                          G1PPRL_BYTE_FORMAT
+                          G1PPRL_BYTE_FORMAT
+                          G1PPRL_BYTE_FORMAT
+                          G1PPRL_DOUBLE_FORMAT
+                          G1PPRL_BYTE_FORMAT
+                          G1PPRL_BYTE_FORMAT,
+                          type, p2i(bottom), p2i(end),
+                          used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
+                          remset_bytes, strong_code_roots_bytes);
+
+  return false;
+}
+
+G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
+  // add static memory usages to remembered set sizes
+  _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
+  // Print the footer of the output.
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
+  log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
+                         " SUMMARY"
+                         G1PPRL_SUM_MB_FORMAT("capacity")
+                         G1PPRL_SUM_MB_PERC_FORMAT("used")
+                         G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
+                         G1PPRL_SUM_MB_PERC_FORMAT("next-live")
+                         G1PPRL_SUM_MB_FORMAT("remset")
+                         G1PPRL_SUM_MB_FORMAT("code-roots"),
+                         bytes_to_mb(_total_capacity_bytes),
+                         bytes_to_mb(_total_used_bytes),
+                         perc(_total_used_bytes, _total_capacity_bytes),
+                         bytes_to_mb(_total_prev_live_bytes),
+                         perc(_total_prev_live_bytes, _total_capacity_bytes),
+                         bytes_to_mb(_total_next_live_bytes),
+                         perc(_total_next_live_bytes, _total_capacity_bytes),
+                         bytes_to_mb(_total_remset_bytes),
+                         bytes_to_mb(_total_strong_code_roots_bytes));
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -0,0 +1,1024 @@
+/*
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP
+
+#include "classfile/javaClasses.hpp"
+#include "gc/g1/g1RegionToSpaceMapper.hpp"
+#include "gc/g1/heapRegionSet.hpp"
+#include "gc/shared/taskqueue.hpp"
+
+class G1CollectedHeap;
+class G1CMBitMap;
+class G1CMTask;
+class G1ConcurrentMark;
+typedef GenericTaskQueue<oop, mtGC>              G1CMTaskQueue;
+typedef GenericTaskQueueSet<G1CMTaskQueue, mtGC> G1CMTaskQueueSet;
+
+// Closure used by CM during concurrent reference discovery
+// and reference processing (during remarking) to determine
+// if a particular object is alive. It is primarily used
+// to determine if referents of discovered reference objects
+// are alive. An instance is also embedded into the
+// reference processor as the _is_alive_non_header field
+class G1CMIsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+ public:
+  G1CMIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { }
+
+  bool do_object_b(oop obj);
+};
+
+// A generic CM bit map.  This is essentially a wrapper around the BitMap
+// class, with one bit per (1<<_shifter) HeapWords.
+
+class G1CMBitMapRO VALUE_OBJ_CLASS_SPEC {
+ protected:
+  HeapWord* _bmStartWord;      // base address of range covered by map
+  size_t    _bmWordSize;       // map size (in #HeapWords covered)
+  const int _shifter;          // map to char or bit
+  BitMap    _bm;               // the bit map itself
+
+ public:
+  // constructor
+  G1CMBitMapRO(int shifter);
+
+  // inquiries
+  HeapWord* startWord()   const { return _bmStartWord; }
+  // the following is one past the last word in space
+  HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
+
+  // read marks
+
+  bool isMarked(HeapWord* addr) const {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.at(heapWordToOffset(addr));
+  }
+
+  // iteration
+  inline bool iterate(BitMapClosure* cl, MemRegion mr);
+
+  // Return the address corresponding to the next marked bit at or after
+  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
+  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
+  HeapWord* getNextMarkedWordAddress(const HeapWord* addr,
+                                     const HeapWord* limit = NULL) const;
+
+  // conversion utilities
+  HeapWord* offsetToHeapWord(size_t offset) const {
+    return _bmStartWord + (offset << _shifter);
+  }
+  size_t heapWordToOffset(const HeapWord* addr) const {
+    return pointer_delta(addr, _bmStartWord) >> _shifter;
+  }
+
+  // The argument addr should be the start address of a valid object
+  inline HeapWord* nextObject(HeapWord* addr);
+
+  void print_on_error(outputStream* st, const char* prefix) const;
+
+  // debugging
+  NOT_PRODUCT(bool covers(MemRegion rs) const;)
+};
+
+class G1CMBitMapMappingChangedListener : public G1MappingChangedListener {
+ private:
+  G1CMBitMap* _bm;
+ public:
+  G1CMBitMapMappingChangedListener() : _bm(NULL) {}
+
+  void set_bitmap(G1CMBitMap* bm) { _bm = bm; }
+
+  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
+};
+
+class G1CMBitMap : public G1CMBitMapRO {
+ private:
+  G1CMBitMapMappingChangedListener _listener;
+
+ public:
+  static size_t compute_size(size_t heap_size);
+  // Returns the amount of bytes on the heap between two marks in the bitmap.
+  static size_t mark_distance();
+  // Returns how many bytes (or bits) of the heap a single byte (or bit) of the
+  // mark bitmap corresponds to. This is the same as the mark distance above.
+  static size_t heap_map_factor() {
+    return mark_distance();
+  }
+
+  G1CMBitMap() : G1CMBitMapRO(LogMinObjAlignment), _listener() { _listener.set_bitmap(this); }
+
+  // Initializes the underlying BitMap to cover the given area.
+  void initialize(MemRegion heap, G1RegionToSpaceMapper* storage);
+
+  // Write marks.
+  inline void mark(HeapWord* addr);
+  inline void clear(HeapWord* addr);
+  inline bool parMark(HeapWord* addr);
+
+  void clearRange(MemRegion mr);
+
+  // Clear the whole mark bitmap.
+  void clearAll();
+};
+
+// Represents a marking stack used by ConcurrentMarking in the G1 collector.
+class G1CMMarkStack VALUE_OBJ_CLASS_SPEC {
+  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
+  G1ConcurrentMark* _cm;
+  oop* _base;        // bottom of stack
+  jint _index;       // one more than last occupied index
+  jint _capacity;    // max #elements
+  jint _saved_index; // value of _index saved at start of GC
+
+  bool  _overflow;
+  bool  _should_expand;
+
+ public:
+  G1CMMarkStack(G1ConcurrentMark* cm);
+  ~G1CMMarkStack();
+
+  bool allocate(size_t capacity);
+
+  // Pushes the first "n" elements of "ptr_arr" on the stack.
+  // Locking impl: concurrency is allowed only with
+  // "par_push_arr" and/or "par_pop_arr" operations, which use the same
+  // locking strategy.
+  void par_push_arr(oop* ptr_arr, int n);
+
+  // If returns false, the array was empty.  Otherwise, removes up to "max"
+  // elements from the stack, and transfers them to "ptr_arr" in an
+  // unspecified order.  The actual number transferred is given in "n" ("n
+  // == 0" is deliberately redundant with the return value.)  Locking impl:
+  // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
+  // operations, which use the same locking strategy.
+  bool par_pop_arr(oop* ptr_arr, int max, int* n);
+
+  bool isEmpty()    { return _index == 0; }
+  int  maxElems()   { return _capacity; }
+
+  bool overflow() { return _overflow; }
+  void clear_overflow() { _overflow = false; }
+
+  bool should_expand() const { return _should_expand; }
+  void set_should_expand();
+
+  // Expand the stack, typically in response to an overflow condition
+  void expand();
+
+  int  size() { return _index; }
+
+  void setEmpty()   { _index = 0; clear_overflow(); }
+
+  // Record the current index.
+  void note_start_of_gc();
+
+  // Make sure that we have not added any entries to the stack during GC.
+  void note_end_of_gc();
+
+  // Apply fn to each oop in the mark stack, up to the bound recorded
+  // via one of the above "note" functions.  The mark stack must not
+  // be modified while iterating.
+  template<typename Fn> void iterate(Fn fn);
+};
+
+class YoungList;
+
+// Root Regions are regions that are not empty at the beginning of a
+// marking cycle and which we might collect during an evacuation pause
+// while the cycle is active. Given that, during evacuation pauses, we
+// do not copy objects that are explicitly marked, what we have to do
+// for the root regions is to scan them and mark all objects reachable
+// from them. According to the SATB assumptions, we only need to visit
+// each object once during marking. So, as long as we finish this scan
+// before the next evacuation pause, we can copy the objects from the
+// root regions without having to mark them or do anything else to them.
+//
+// Currently, we only support root region scanning once (at the start
+// of the marking cycle) and the root regions are all the survivor
+// regions populated during the initial-mark pause.
+class G1CMRootRegions VALUE_OBJ_CLASS_SPEC {
+private:
+  YoungList*           _young_list;
+  G1ConcurrentMark*    _cm;
+
+  volatile bool        _scan_in_progress;
+  volatile bool        _should_abort;
+  HeapRegion* volatile _next_survivor;
+
+public:
+  G1CMRootRegions();
+  // We actually do most of the initialization in this method.
+  void init(G1CollectedHeap* g1h, G1ConcurrentMark* cm);
+
+  // Reset the claiming / scanning of the root regions.
+  void prepare_for_scan();
+
+  // Forces get_next() to return NULL so that the iteration aborts early.
+  void abort() { _should_abort = true; }
+
+  // Return true if the CM thread are actively scanning root regions,
+  // false otherwise.
+  bool scan_in_progress() { return _scan_in_progress; }
+
+  // Claim the next root region to scan atomically, or return NULL if
+  // all have been claimed.
+  HeapRegion* claim_next();
+
+  // Flag that we're done with root region scanning and notify anyone
+  // who's waiting on it. If aborted is false, assume that all regions
+  // have been claimed.
+  void scan_finished();
+
+  // If CM threads are still scanning root regions, wait until they
+  // are done. Return true if we had to wait, false otherwise.
+  bool wait_until_scan_finished();
+};
+
+class ConcurrentMarkThread;
+
+class G1ConcurrentMark: public CHeapObj<mtGC> {
+  friend class ConcurrentMarkThread;
+  friend class G1ParNoteEndTask;
+  friend class CalcLiveObjectsClosure;
+  friend class G1CMRefProcTaskProxy;
+  friend class G1CMRefProcTaskExecutor;
+  friend class G1CMKeepAliveAndDrainClosure;
+  friend class G1CMDrainMarkingStackClosure;
+  friend class G1CMBitMapClosure;
+  friend class G1CMConcurrentMarkingTask;
+  friend class G1CMMarkStack;
+  friend class G1CMRemarkTask;
+  friend class G1CMTask;
+
+protected:
+  ConcurrentMarkThread* _cmThread;   // The thread doing the work
+  G1CollectedHeap*      _g1h;        // The heap
+  uint                  _parallel_marking_threads; // The number of marking
+                                                   // threads we're using
+  uint                  _max_parallel_marking_threads; // Max number of marking
+                                                       // threads we'll ever use
+  double                _sleep_factor; // How much we have to sleep, with
+                                       // respect to the work we just did, to
+                                       // meet the marking overhead goal
+  double                _marking_task_overhead; // Marking target overhead for
+                                                // a single task
+
+  FreeRegionList        _cleanup_list;
+
+  // Concurrent marking support structures
+  G1CMBitMap              _markBitMap1;
+  G1CMBitMap              _markBitMap2;
+  G1CMBitMapRO*           _prevMarkBitMap; // Completed mark bitmap
+  G1CMBitMap*             _nextMarkBitMap; // Under-construction mark bitmap
+
+  BitMap                  _region_bm;
+  BitMap                  _card_bm;
+
+  // Heap bounds
+  HeapWord*               _heap_start;
+  HeapWord*               _heap_end;
+
+  // Root region tracking and claiming
+  G1CMRootRegions         _root_regions;
+
+  // For gray objects
+  G1CMMarkStack           _markStack; // Grey objects behind global finger
+  HeapWord* volatile      _finger;  // The global finger, region aligned,
+                                    // always points to the end of the
+                                    // last claimed region
+
+  // Marking tasks
+  uint                    _max_worker_id;// Maximum worker id
+  uint                    _active_tasks; // Task num currently active
+  G1CMTask**              _tasks;        // Task queue array (max_worker_id len)
+  G1CMTaskQueueSet*       _task_queues;  // Task queue set
+  ParallelTaskTerminator  _terminator;   // For termination
+
+  // Two sync barriers that are used to synchronize tasks when an
+  // overflow occurs. The algorithm is the following. All tasks enter
+  // the first one to ensure that they have all stopped manipulating
+  // the global data structures. After they exit it, they re-initialize
+  // their data structures and task 0 re-initializes the global data
+  // structures. Then, they enter the second sync barrier. This
+  // ensure, that no task starts doing work before all data
+  // structures (local and global) have been re-initialized. When they
+  // exit it, they are free to start working again.
+  WorkGangBarrierSync     _first_overflow_barrier_sync;
+  WorkGangBarrierSync     _second_overflow_barrier_sync;
+
+  // This is set by any task, when an overflow on the global data
+  // structures is detected
+  volatile bool           _has_overflown;
+  // True: marking is concurrent, false: we're in remark
+  volatile bool           _concurrent;
+  // Set at the end of a Full GC so that marking aborts
+  volatile bool           _has_aborted;
+
+  // Used when remark aborts due to an overflow to indicate that
+  // another concurrent marking phase should start
+  volatile bool           _restart_for_overflow;
+
+  // This is true from the very start of concurrent marking until the
+  // point when all the tasks complete their work. It is really used
+  // to determine the points between the end of concurrent marking and
+  // time of remark.
+  volatile bool           _concurrent_marking_in_progress;
+
+  // Keep track of whether we have started concurrent phase or not.
+  bool                    _concurrent_phase_started;
+
+  // All of these times are in ms
+  NumberSeq _init_times;
+  NumberSeq _remark_times;
+  NumberSeq _remark_mark_times;
+  NumberSeq _remark_weak_ref_times;
+  NumberSeq _cleanup_times;
+  double    _total_counting_time;
+  double    _total_rs_scrub_time;
+
+  double*   _accum_task_vtime;   // Accumulated task vtime
+
+  WorkGang* _parallel_workers;
+
+  void weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes);
+  void weakRefsWork(bool clear_all_soft_refs);
+
+  void swapMarkBitMaps();
+
+  // It resets the global marking data structures, as well as the
+  // task local ones; should be called during initial mark.
+  void reset();
+
+  // Resets all the marking data structures. Called when we have to restart
+  // marking or when marking completes (via set_non_marking_state below).
+  void reset_marking_state(bool clear_overflow = true);
+
+  // We do this after we're done with marking so that the marking data
+  // structures are initialized to a sensible and predictable state.
+  void set_non_marking_state();
+
+  // Called to indicate how many threads are currently active.
+  void set_concurrency(uint active_tasks);
+
+  // It should be called to indicate which phase we're in (concurrent
+  // mark or remark) and how many threads are currently active.
+  void set_concurrency_and_phase(uint active_tasks, bool concurrent);
+
+  // Prints all gathered CM-related statistics
+  void print_stats();
+
+  bool cleanup_list_is_empty() {
+    return _cleanup_list.is_empty();
+  }
+
+  // Accessor methods
+  uint parallel_marking_threads() const     { return _parallel_marking_threads; }
+  uint max_parallel_marking_threads() const { return _max_parallel_marking_threads;}
+  double sleep_factor()                     { return _sleep_factor; }
+  double marking_task_overhead()            { return _marking_task_overhead;}
+
+  HeapWord*               finger()          { return _finger;   }
+  bool                    concurrent()      { return _concurrent; }
+  uint                    active_tasks()    { return _active_tasks; }
+  ParallelTaskTerminator* terminator()      { return &_terminator; }
+
+  // It claims the next available region to be scanned by a marking
+  // task/thread. It might return NULL if the next region is empty or
+  // we have run out of regions. In the latter case, out_of_regions()
+  // determines whether we've really run out of regions or the task
+  // should call claim_region() again. This might seem a bit
+  // awkward. Originally, the code was written so that claim_region()
+  // either successfully returned with a non-empty region or there
+  // were no more regions to be claimed. The problem with this was
+  // that, in certain circumstances, it iterated over large chunks of
+  // the heap finding only empty regions and, while it was working, it
+  // was preventing the calling task to call its regular clock
+  // method. So, this way, each task will spend very little time in
+  // claim_region() and is allowed to call the regular clock method
+  // frequently.
+  HeapRegion* claim_region(uint worker_id);
+
+  // It determines whether we've run out of regions to scan. Note that
+  // the finger can point past the heap end in case the heap was expanded
+  // to satisfy an allocation without doing a GC. This is fine, because all
+  // objects in those regions will be considered live anyway because of
+  // SATB guarantees (i.e. their TAMS will be equal to bottom).
+  bool        out_of_regions() { return _finger >= _heap_end; }
+
+  // Returns the task with the given id
+  G1CMTask* task(int id) {
+    assert(0 <= id && id < (int) _active_tasks,
+           "task id not within active bounds");
+    return _tasks[id];
+  }
+
+  // Returns the task queue with the given id
+  G1CMTaskQueue* task_queue(int id) {
+    assert(0 <= id && id < (int) _active_tasks,
+           "task queue id not within active bounds");
+    return (G1CMTaskQueue*) _task_queues->queue(id);
+  }
+
+  // Returns the task queue set
+  G1CMTaskQueueSet* task_queues()  { return _task_queues; }
+
+  // Access / manipulation of the overflow flag which is set to
+  // indicate that the global stack has overflown
+  bool has_overflown()           { return _has_overflown; }
+  void set_has_overflown()       { _has_overflown = true; }
+  void clear_has_overflown()     { _has_overflown = false; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
+
+  // Methods to enter the two overflow sync barriers
+  void enter_first_sync_barrier(uint worker_id);
+  void enter_second_sync_barrier(uint worker_id);
+
+  // Live Data Counting data structures...
+  // These data structures are initialized at the start of
+  // marking. They are written to while marking is active.
+  // They are aggregated during remark; the aggregated values
+  // are then used to populate the _region_bm, _card_bm, and
+  // the total live bytes, which are then subsequently updated
+  // during cleanup.
+
+  // An array of bitmaps (one bit map per task). Each bitmap
+  // is used to record the cards spanned by the live objects
+  // marked by that task/worker.
+  BitMap*  _count_card_bitmaps;
+
+  // Used to record the number of marked live bytes
+  // (for each region, by worker thread).
+  size_t** _count_marked_bytes;
+
+  // Card index of the bottom of the G1 heap. Used for biasing indices into
+  // the card bitmaps.
+  intptr_t _heap_bottom_card_num;
+
+  // Set to true when initialization is complete
+  bool _completed_initialization;
+
+public:
+  // Manipulation of the global mark stack.
+  // The push and pop operations are used by tasks for transfers
+  // between task-local queues and the global mark stack, and use
+  // locking for concurrency safety.
+  bool mark_stack_push(oop* arr, int n) {
+    _markStack.par_push_arr(arr, n);
+    if (_markStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  void mark_stack_pop(oop* arr, int max, int* n) {
+    _markStack.par_pop_arr(arr, max, n);
+  }
+  size_t mark_stack_size()                { return _markStack.size(); }
+  size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
+  bool mark_stack_overflow()              { return _markStack.overflow(); }
+  bool mark_stack_empty()                 { return _markStack.isEmpty(); }
+
+  G1CMRootRegions* root_regions() { return &_root_regions; }
+
+  bool concurrent_marking_in_progress() {
+    return _concurrent_marking_in_progress;
+  }
+  void set_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = true;
+  }
+  void clear_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = false;
+  }
+
+  void register_concurrent_phase_start(const char* title);
+  void register_concurrent_phase_end();
+
+  void update_accum_task_vtime(int i, double vtime) {
+    _accum_task_vtime[i] += vtime;
+  }
+
+  double all_task_accum_vtime() {
+    double ret = 0.0;
+    for (uint i = 0; i < _max_worker_id; ++i)
+      ret += _accum_task_vtime[i];
+    return ret;
+  }
+
+  // Attempts to steal an object from the task queues of other tasks
+  bool try_stealing(uint worker_id, int* hash_seed, oop& obj);
+
+  G1ConcurrentMark(G1CollectedHeap* g1h,
+                   G1RegionToSpaceMapper* prev_bitmap_storage,
+                   G1RegionToSpaceMapper* next_bitmap_storage);
+  ~G1ConcurrentMark();
+
+  ConcurrentMarkThread* cmThread() { return _cmThread; }
+
+  G1CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
+  G1CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
+
+  // Returns the number of GC threads to be used in a concurrent
+  // phase based on the number of GC threads being used in a STW
+  // phase.
+  uint scale_parallel_threads(uint n_par_threads);
+
+  // Calculates the number of GC threads to be used in a concurrent phase.
+  uint calc_parallel_marking_threads();
+
+  // The following three are interaction between CM and
+  // G1CollectedHeap
+
+  // This notifies CM that a root during initial-mark needs to be
+  // grayed. It is MT-safe. word_size is the size of the object in
+  // words. It is passed explicitly as sometimes we cannot calculate
+  // it from the given object because it might be in an inconsistent
+  // state (e.g., in to-space and being copied). So the caller is
+  // responsible for dealing with this issue (e.g., get the size from
+  // the from-space image when the to-space image might be
+  // inconsistent) and always passing the size. hr is the region that
+  // contains the object and it's passed optionally from callers who
+  // might already have it (no point in recalculating it).
+  inline void grayRoot(oop obj,
+                       size_t word_size,
+                       uint worker_id,
+                       HeapRegion* hr = NULL);
+
+  // Clear the next marking bitmap (will be called concurrently).
+  void clearNextBitmap();
+
+  // Return whether the next mark bitmap has no marks set. To be used for assertions
+  // only. Will not yield to pause requests.
+  bool nextMarkBitmapIsClear();
+
+  // These two do the work that needs to be done before and after the
+  // initial root checkpoint. Since this checkpoint can be done at two
+  // different points (i.e. an explicit pause or piggy-backed on a
+  // young collection), then it's nice to be able to easily share the
+  // pre/post code. It might be the case that we can put everything in
+  // the post method. TP
+  void checkpointRootsInitialPre();
+  void checkpointRootsInitialPost();
+
+  // Scan all the root regions and mark everything reachable from
+  // them.
+  void scanRootRegions();
+
+  // Scan a single root region and mark everything reachable from it.
+  void scanRootRegion(HeapRegion* hr, uint worker_id);
+
+  // Do concurrent phase of marking, to a tentative transitive closure.
+  void markFromRoots();
+
+  void checkpointRootsFinal(bool clear_all_soft_refs);
+  void checkpointRootsFinalWork();
+  void cleanup();
+  void completeCleanup();
+
+  // Mark in the previous bitmap.  NB: this is usually read-only, so use
+  // this carefully!
+  inline void markPrev(oop p);
+
+  // Clears marks for all objects in the given range, for the prev or
+  // next bitmaps.  NB: the previous bitmap is usually
+  // read-only, so use this carefully!
+  void clearRangePrevBitmap(MemRegion mr);
+
+  // Notify data structures that a GC has started.
+  void note_start_of_gc() {
+    _markStack.note_start_of_gc();
+  }
+
+  // Notify data structures that a GC is finished.
+  void note_end_of_gc() {
+    _markStack.note_end_of_gc();
+  }
+
+  // Verify that there are no CSet oops on the stacks (taskqueues /
+  // global mark stack) and fingers (global / per-task).
+  // If marking is not in progress, it's a no-op.
+  void verify_no_cset_oops() PRODUCT_RETURN;
+
+  inline bool isPrevMarked(oop p) const;
+
+  inline bool do_yield_check(uint worker_i = 0);
+
+  // Called to abort the marking cycle after a Full GC takes place.
+  void abort();
+
+  bool has_aborted()      { return _has_aborted; }
+
+  void print_summary_info();
+
+  void print_worker_threads_on(outputStream* st) const;
+
+  void print_on_error(outputStream* st) const;
+
+  // Liveness counting
+
+  // Utility routine to set an exclusive range of cards on the given
+  // card liveness bitmap
+  inline void set_card_bitmap_range(BitMap* card_bm,
+                                    BitMap::idx_t start_idx,
+                                    BitMap::idx_t end_idx,
+                                    bool is_par);
+
+  // Returns the card number of the bottom of the G1 heap.
+  // Used in biasing indices into accounting card bitmaps.
+  intptr_t heap_bottom_card_num() const {
+    return _heap_bottom_card_num;
+  }
+
+  // Returns the card bitmap for a given task or worker id.
+  BitMap* count_card_bitmap_for(uint worker_id) {
+    assert(worker_id < _max_worker_id, "oob");
+    assert(_count_card_bitmaps != NULL, "uninitialized");
+    BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    return task_card_bm;
+  }
+
+  // Returns the array containing the marked bytes for each region,
+  // for the given worker or task id.
+  size_t* count_marked_bytes_array_for(uint worker_id) {
+    assert(worker_id < _max_worker_id, "oob");
+    assert(_count_marked_bytes != NULL, "uninitialized");
+    size_t* marked_bytes_array = _count_marked_bytes[worker_id];
+    assert(marked_bytes_array != NULL, "uninitialized");
+    return marked_bytes_array;
+  }
+
+  // Returns the index in the liveness accounting card table bitmap
+  // for the given address
+  inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
+
+  // Counts the size of the given memory region in the the given
+  // marked_bytes array slot for the given HeapRegion.
+  // Sets the bits in the given card bitmap that are associated with the
+  // cards that are spanned by the memory region.
+  inline void count_region(MemRegion mr,
+                           HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+
+  // Counts the given object in the given task/worker counting
+  // data structures.
+  inline void count_object(oop obj,
+                           HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm,
+                           size_t word_size);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the given task/worker counting structures.
+  inline bool par_mark_and_count(oop obj,
+                                 HeapRegion* hr,
+                                 size_t* marked_bytes_array,
+                                 BitMap* task_card_bm);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj,
+                                 size_t word_size,
+                                 HeapRegion* hr,
+                                 uint worker_id);
+
+  // Returns true if initialization was successfully completed.
+  bool completed_initialization() const {
+    return _completed_initialization;
+  }
+
+protected:
+  // Clear all the per-task bitmaps and arrays used to store the
+  // counting data.
+  void clear_all_count_data();
+
+  // Aggregates the counting data for each worker/task
+  // that was constructed while marking. Also sets
+  // the amount of marked bytes for each region and
+  // the top at concurrent mark count.
+  void aggregate_count_data();
+
+  // Verification routine
+  void verify_count_data();
+};
+
+// A class representing a marking task.
+class G1CMTask : public TerminatorTerminator {
+private:
+  enum PrivateConstants {
+    // the regular clock call is called once the scanned words reaches
+    // this limit
+    words_scanned_period          = 12*1024,
+    // the regular clock call is called once the number of visited
+    // references reaches this limit
+    refs_reached_period           = 384,
+    // initial value for the hash seed, used in the work stealing code
+    init_hash_seed                = 17,
+    // how many entries will be transferred between global stack and
+    // local queues
+    global_stack_transfer_size    = 16
+  };
+
+  uint                        _worker_id;
+  G1CollectedHeap*            _g1h;
+  G1ConcurrentMark*           _cm;
+  G1CMBitMap*                 _nextMarkBitMap;
+  // the task queue of this task
+  G1CMTaskQueue*              _task_queue;
+private:
+  // the task queue set---needed for stealing
+  G1CMTaskQueueSet*           _task_queues;
+  // indicates whether the task has been claimed---this is only  for
+  // debugging purposes
+  bool                        _claimed;
+
+  // number of calls to this task
+  int                         _calls;
+
+  // when the virtual timer reaches this time, the marking step should
+  // exit
+  double                      _time_target_ms;
+  // the start time of the current marking step
+  double                      _start_time_ms;
+
+  // the oop closure used for iterations over oops
+  G1CMOopClosure*             _cm_oop_closure;
+
+  // the region this task is scanning, NULL if we're not scanning any
+  HeapRegion*                 _curr_region;
+  // the local finger of this task, NULL if we're not scanning a region
+  HeapWord*                   _finger;
+  // limit of the region this task is scanning, NULL if we're not scanning one
+  HeapWord*                   _region_limit;
+
+  // the number of words this task has scanned
+  size_t                      _words_scanned;
+  // When _words_scanned reaches this limit, the regular clock is
+  // called. Notice that this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _words_scanned_limit;
+  // the initial value of _words_scanned_limit (i.e. what it was
+  // before it was decreased).
+  size_t                      _real_words_scanned_limit;
+
+  // the number of references this task has visited
+  size_t                      _refs_reached;
+  // When _refs_reached reaches this limit, the regular clock is
+  // called. Notice this this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _refs_reached_limit;
+  // the initial value of _refs_reached_limit (i.e. what it was before
+  // it was decreased).
+  size_t                      _real_refs_reached_limit;
+
+  // used by the work stealing stuff
+  int                         _hash_seed;
+  // if this is true, then the task has aborted for some reason
+  bool                        _has_aborted;
+  // set when the task aborts because it has met its time quota
+  bool                        _has_timed_out;
+  // true when we're draining SATB buffers; this avoids the task
+  // aborting due to SATB buffers being available (as we're already
+  // dealing with them)
+  bool                        _draining_satb_buffers;
+
+  // number sequence of past step times
+  NumberSeq                   _step_times_ms;
+  // elapsed time of this task
+  double                      _elapsed_time_ms;
+  // termination time of this task
+  double                      _termination_time_ms;
+  // when this task got into the termination protocol
+  double                      _termination_start_time_ms;
+
+  // true when the task is during a concurrent phase, false when it is
+  // in the remark phase (so, in the latter case, we do not have to
+  // check all the things that we have to check during the concurrent
+  // phase, i.e. SATB buffer availability...)
+  bool                        _concurrent;
+
+  TruncatedSeq                _marking_step_diffs_ms;
+
+  // Counting data structures. Embedding the task's marked_bytes_array
+  // and card bitmap into the actual task saves having to go through
+  // the ConcurrentMark object.
+  size_t*                     _marked_bytes_array;
+  BitMap*                     _card_bm;
+
+  // it updates the local fields after this task has claimed
+  // a new region to scan
+  void setup_for_region(HeapRegion* hr);
+  // it brings up-to-date the limit of the region
+  void update_region_limit();
+
+  // called when either the words scanned or the refs visited limit
+  // has been reached
+  void reached_limit();
+  // recalculates the words scanned and refs visited limits
+  void recalculate_limits();
+  // decreases the words scanned and refs visited limits when we reach
+  // an expensive operation
+  void decrease_limits();
+  // it checks whether the words scanned or refs visited reached their
+  // respective limit and calls reached_limit() if they have
+  void check_limits() {
+    if (_words_scanned >= _words_scanned_limit ||
+        _refs_reached >= _refs_reached_limit) {
+      reached_limit();
+    }
+  }
+  // this is supposed to be called regularly during a marking step as
+  // it checks a bunch of conditions that might cause the marking step
+  // to abort
+  void regular_clock_call();
+  bool concurrent() { return _concurrent; }
+
+  // Test whether obj might have already been passed over by the
+  // mark bitmap scan, and so needs to be pushed onto the mark stack.
+  bool is_below_finger(oop obj, HeapWord* global_finger) const;
+
+  template<bool scan> void process_grey_object(oop obj);
+
+public:
+  // It resets the task; it should be called right at the beginning of
+  // a marking phase.
+  void reset(G1CMBitMap* _nextMarkBitMap);
+  // it clears all the fields that correspond to a claimed region.
+  void clear_region_fields();
+
+  void set_concurrent(bool concurrent) { _concurrent = concurrent; }
+
+  // The main method of this class which performs a marking step
+  // trying not to exceed the given duration. However, it might exit
+  // prematurely, according to some conditions (i.e. SATB buffers are
+  // available for processing).
+  void do_marking_step(double target_ms,
+                       bool do_termination,
+                       bool is_serial);
+
+  // These two calls start and stop the timer
+  void record_start_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0;
+  }
+  void record_end_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
+  }
+
+  // returns the worker ID associated with this task.
+  uint worker_id() { return _worker_id; }
+
+  // From TerminatorTerminator. It determines whether this task should
+  // exit the termination protocol after it's entered it.
+  virtual bool should_exit_termination();
+
+  // Resets the local region fields after a task has finished scanning a
+  // region; or when they have become stale as a result of the region
+  // being evacuated.
+  void giveup_current_region();
+
+  HeapWord* finger()            { return _finger; }
+
+  bool has_aborted()            { return _has_aborted; }
+  void set_has_aborted()        { _has_aborted = true; }
+  void clear_has_aborted()      { _has_aborted = false; }
+  bool has_timed_out()          { return _has_timed_out; }
+  bool claimed()                { return _claimed; }
+
+  void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
+
+  // Increment the number of references this task has visited.
+  void increment_refs_reached() { ++_refs_reached; }
+
+  // Grey the object by marking it.  If not already marked, push it on
+  // the local queue if below the finger.
+  // Precondition: obj is in region.
+  // Precondition: obj is below region's NTAMS.
+  inline void make_reference_grey(oop obj, HeapRegion* region);
+
+  // Grey the object (by calling make_grey_reference) if required,
+  // e.g. obj is below its containing region's NTAMS.
+  // Precondition: obj is a valid heap object.
+  inline void deal_with_reference(oop obj);
+
+  // It scans an object and visits its children.
+  inline void scan_object(oop obj);
+
+  // It pushes an object on the local queue.
+  inline void push(oop obj);
+
+  // These two move entries to/from the global stack.
+  void move_entries_to_global_stack();
+  void get_entries_from_global_stack();
+
+  // It pops and scans objects from the local queue. If partially is
+  // true, then it stops when the queue size is of a given limit. If
+  // partially is false, then it stops when the queue is empty.
+  void drain_local_queue(bool partially);
+  // It moves entries from the global stack to the local queue and
+  // drains the local queue. If partially is true, then it stops when
+  // both the global stack and the local queue reach a given size. If
+  // partially if false, it tries to empty them totally.
+  void drain_global_stack(bool partially);
+  // It keeps picking SATB buffers and processing them until no SATB
+  // buffers are available.
+  void drain_satb_buffers();
+
+  // moves the local finger to a new location
+  inline void move_finger_to(HeapWord* new_finger) {
+    assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
+    _finger = new_finger;
+  }
+
+  G1CMTask(uint worker_id,
+           G1ConcurrentMark *cm,
+           size_t* marked_bytes,
+           BitMap* card_bm,
+           G1CMTaskQueue* task_queue,
+           G1CMTaskQueueSet* task_queues);
+
+  // it prints statistics associated with this task
+  void print_stats();
+};
+
+// Class that's used to to print out per-region liveness
+// information. It's currently used at the end of marking and also
+// after we sort the old regions at the end of the cleanup operation.
+class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
+private:
+  // Accumulators for these values.
+  size_t _total_used_bytes;
+  size_t _total_capacity_bytes;
+  size_t _total_prev_live_bytes;
+  size_t _total_next_live_bytes;
+
+  // These are set up when we come across a "stars humongous" region
+  // (as this is where most of this information is stored, not in the
+  // subsequent "continues humongous" regions). After that, for every
+  // region in a given humongous region series we deduce the right
+  // values for it by simply subtracting the appropriate amount from
+  // these fields. All these values should reach 0 after we've visited
+  // the last region in the series.
+  size_t _hum_used_bytes;
+  size_t _hum_capacity_bytes;
+  size_t _hum_prev_live_bytes;
+  size_t _hum_next_live_bytes;
+
+  // Accumulator for the remembered set size
+  size_t _total_remset_bytes;
+
+  // Accumulator for strong code roots memory size
+  size_t _total_strong_code_roots_bytes;
+
+  static double perc(size_t val, size_t total) {
+    if (total == 0) {
+      return 0.0;
+    } else {
+      return 100.0 * ((double) val / (double) total);
+    }
+  }
+
+  static double bytes_to_mb(size_t val) {
+    return (double) val / (double) M;
+  }
+
+  // See the .cpp file.
+  size_t get_hum_bytes(size_t* hum_bytes);
+  void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
+                     size_t* prev_live_bytes, size_t* next_live_bytes);
+
+public:
+  // The header and footer are printed in the constructor and
+  // destructor respectively.
+  G1PrintRegionLivenessInfoClosure(const char* phase_name);
+  virtual bool doHeapRegion(HeapRegion* r);
+  ~G1PrintRegionLivenessInfoClosure();
+};
+
+#endif // SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARK_INLINE_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARK_INLINE_HPP
+
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1ConcurrentMark.hpp"
+#include "gc/shared/taskqueue.inline.hpp"
+
+// Utility routine to set an exclusive range of cards on the given
+// card liveness bitmap
+inline void G1ConcurrentMark::set_card_bitmap_range(BitMap* card_bm,
+                                                    BitMap::idx_t start_idx,
+                                                    BitMap::idx_t end_idx,
+                                                    bool is_par) {
+
+  // Set the exclusive bit range [start_idx, end_idx).
+  assert((end_idx - start_idx) > 0, "at least one card");
+  assert(end_idx <= card_bm->size(), "sanity");
+
+  // Silently clip the end index
+  end_idx = MIN2(end_idx, card_bm->size());
+
+  // For small ranges use a simple loop; otherwise use set_range or
+  // use par_at_put_range (if parallel). The range is made up of the
+  // cards that are spanned by an object/mem region so 8 cards will
+  // allow up to object sizes up to 4K to be handled using the loop.
+  if ((end_idx - start_idx) <= 8) {
+    for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
+      if (is_par) {
+        card_bm->par_set_bit(i);
+      } else {
+        card_bm->set_bit(i);
+      }
+    }
+  } else {
+    // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive.
+    if (is_par) {
+      card_bm->par_at_put_range(start_idx, end_idx, true);
+    } else {
+      card_bm->set_range(start_idx, end_idx);
+    }
+  }
+}
+
+// Returns the index in the liveness accounting card bitmap
+// for the given address
+inline BitMap::idx_t G1ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
+  // Below, the term "card num" means the result of shifting an address
+  // by the card shift -- address 0 corresponds to card number 0.  One
+  // must subtract the card num of the bottom of the heap to obtain a
+  // card table index.
+  intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
+  return card_num - heap_bottom_card_num();
+}
+
+// Counts the given memory region in the given task/worker
+// counting data structures.
+inline void G1ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
+                                           size_t* marked_bytes_array,
+                                           BitMap* task_card_bm) {
+  G1CollectedHeap* g1h = _g1h;
+  CardTableModRefBS* ct_bs = g1h->g1_barrier_set();
+
+  HeapWord* start = mr.start();
+  HeapWord* end = mr.end();
+  size_t region_size_bytes = mr.byte_size();
+  uint index = hr->hrm_index();
+
+  assert(hr == g1h->heap_region_containing(start), "sanity");
+  assert(marked_bytes_array != NULL, "pre-condition");
+  assert(task_card_bm != NULL, "pre-condition");
+
+  // Add to the task local marked bytes for this region.
+  marked_bytes_array[index] += region_size_bytes;
+
+  BitMap::idx_t start_idx = card_bitmap_index_for(start);
+  BitMap::idx_t end_idx = card_bitmap_index_for(end);
+
+  // Note: if we're looking at the last region in heap - end
+  // could be actually just beyond the end of the heap; end_idx
+  // will then correspond to a (non-existent) card that is also
+  // just beyond the heap.
+  if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) {
+    // end of region is not card aligned - increment to cover
+    // all the cards spanned by the region.
+    end_idx += 1;
+  }
+  // The card bitmap is task/worker specific => no need to use
+  // the 'par' BitMap routines.
+  // Set bits in the exclusive bit range [start_idx, end_idx).
+  set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */);
+}
+
+// Counts the given object in the given task/worker counting data structures.
+inline void G1ConcurrentMark::count_object(oop obj,
+                                           HeapRegion* hr,
+                                           size_t* marked_bytes_array,
+                                           BitMap* task_card_bm,
+                                           size_t word_size) {
+  assert(!hr->is_continues_humongous(), "Cannot enter count_object with continues humongous");
+  if (!hr->is_starts_humongous()) {
+    MemRegion mr((HeapWord*)obj, word_size);
+    count_region(mr, hr, marked_bytes_array, task_card_bm);
+  } else {
+    do {
+      MemRegion mr(hr->bottom(), hr->top());
+      count_region(mr, hr, marked_bytes_array, task_card_bm);
+      hr = _g1h->next_region_in_humongous(hr);
+    } while (hr != NULL);
+  }
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the given task/worker counting structures.
+inline bool G1ConcurrentMark::par_mark_and_count(oop obj,
+                                                 HeapRegion* hr,
+                                                 size_t* marked_bytes_array,
+                                                 BitMap* task_card_bm) {
+  if (_nextMarkBitMap->parMark((HeapWord*)obj)) {
+    // Update the task specific count data for the object.
+    count_object(obj, hr, marked_bytes_array, task_card_bm, obj->size());
+    return true;
+  }
+  return false;
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool G1ConcurrentMark::par_mark_and_count(oop obj,
+                                                 size_t word_size,
+                                                 HeapRegion* hr,
+                                                 uint worker_id) {
+  if (_nextMarkBitMap->parMark((HeapWord*)obj)) {
+    size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+    BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+    count_object(obj, hr, marked_bytes_array, task_card_bm, word_size);
+    return true;
+  }
+  return false;
+}
+
+inline bool G1CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+  HeapWord* start_addr = MAX2(startWord(), mr.start());
+  HeapWord* end_addr = MIN2(endWord(), mr.end());
+
+  if (end_addr > start_addr) {
+    // Right-open interval [start-offset, end-offset).
+    BitMap::idx_t start_offset = heapWordToOffset(start_addr);
+    BitMap::idx_t end_offset = heapWordToOffset(end_addr);
+
+    start_offset = _bm.get_next_one_offset(start_offset, end_offset);
+    while (start_offset < end_offset) {
+      if (!cl->do_bit(start_offset)) {
+        return false;
+      }
+      HeapWord* next_addr = MIN2(nextObject(offsetToHeapWord(start_offset)), end_addr);
+      BitMap::idx_t next_offset = heapWordToOffset(next_addr);
+      start_offset = _bm.get_next_one_offset(next_offset, end_offset);
+    }
+  }
+  return true;
+}
+
+// The argument addr should be the start address of a valid object
+HeapWord* G1CMBitMapRO::nextObject(HeapWord* addr) {
+  oop obj = (oop) addr;
+  HeapWord* res =  addr + obj->size();
+  assert(offsetToHeapWord(heapWordToOffset(res)) == res, "sanity");
+  return res;
+}
+
+#define check_mark(addr)                                                       \
+  assert(_bmStartWord <= (addr) && (addr) < (_bmStartWord + _bmWordSize),      \
+         "outside underlying space?");                                         \
+  assert(G1CollectedHeap::heap()->is_in_exact(addr),                           \
+         "Trying to access not available bitmap " PTR_FORMAT                   \
+         " corresponding to " PTR_FORMAT " (%u)",                              \
+         p2i(this), p2i(addr), G1CollectedHeap::heap()->addr_to_region(addr));
+
+inline void G1CMBitMap::mark(HeapWord* addr) {
+  check_mark(addr);
+  _bm.set_bit(heapWordToOffset(addr));
+}
+
+inline void G1CMBitMap::clear(HeapWord* addr) {
+  check_mark(addr);
+  _bm.clear_bit(heapWordToOffset(addr));
+}
+
+inline bool G1CMBitMap::parMark(HeapWord* addr) {
+  check_mark(addr);
+  return _bm.par_set_bit(heapWordToOffset(addr));
+}
+
+#undef check_mark
+
+template<typename Fn>
+inline void G1CMMarkStack::iterate(Fn fn) {
+  assert(_saved_index == _index, "saved index: %d index: %d", _saved_index, _index);
+  for (int i = 0; i < _index; ++i) {
+    fn(_base[i]);
+  }
+}
+
+// It scans an object and visits its children.
+inline void G1CMTask::scan_object(oop obj) { process_grey_object<true>(obj); }
+
+inline void G1CMTask::push(oop obj) {
+  HeapWord* objAddr = (HeapWord*) obj;
+  assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
+  assert(!_g1h->is_on_master_free_list(
+              _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
+  assert(!_g1h->is_obj_ill(obj), "invariant");
+  assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
+
+  if (!_task_queue->push(obj)) {
+    // The local task queue looks full. We need to push some entries
+    // to the global stack.
+    move_entries_to_global_stack();
+
+    // this should succeed since, even if we overflow the global
+    // stack, we should have definitely removed some entries from the
+    // local queue. So, there must be space on it.
+    bool success = _task_queue->push(obj);
+    assert(success, "invariant");
+  }
+}
+
+inline bool G1CMTask::is_below_finger(oop obj, HeapWord* global_finger) const {
+  // If obj is above the global finger, then the mark bitmap scan
+  // will find it later, and no push is needed.  Similarly, if we have
+  // a current region and obj is between the local finger and the
+  // end of the current region, then no push is needed.  The tradeoff
+  // of checking both vs only checking the global finger is that the
+  // local check will be more accurate and so result in fewer pushes,
+  // but may also be a little slower.
+  HeapWord* objAddr = (HeapWord*)obj;
+  if (_finger != NULL) {
+    // We have a current region.
+
+    // Finger and region values are all NULL or all non-NULL.  We
+    // use _finger to check since we immediately use its value.
+    assert(_curr_region != NULL, "invariant");
+    assert(_region_limit != NULL, "invariant");
+    assert(_region_limit <= global_finger, "invariant");
+
+    // True if obj is less than the local finger, or is between
+    // the region limit and the global finger.
+    if (objAddr < _finger) {
+      return true;
+    } else if (objAddr < _region_limit) {
+      return false;
+    } // Else check global finger.
+  }
+  // Check global finger.
+  return objAddr < global_finger;
+}
+
+template<bool scan>
+inline void G1CMTask::process_grey_object(oop obj) {
+  assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
+  assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
+
+  size_t obj_size = obj->size();
+  _words_scanned += obj_size;
+
+  if (scan) {
+    obj->oop_iterate(_cm_oop_closure);
+  }
+  check_limits();
+}
+
+
+
+inline void G1CMTask::make_reference_grey(oop obj, HeapRegion* hr) {
+  if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
+    // No OrderAccess:store_load() is needed. It is implicit in the
+    // CAS done in G1CMBitMap::parMark() call in the routine above.
+    HeapWord* global_finger = _cm->finger();
+
+    // We only need to push a newly grey object on the mark
+    // stack if it is in a section of memory the mark bitmap
+    // scan has already examined.  Mark bitmap scanning
+    // maintains progress "fingers" for determining that.
+    //
+    // Notice that the global finger might be moving forward
+    // concurrently. This is not a problem. In the worst case, we
+    // mark the object while it is above the global finger and, by
+    // the time we read the global finger, it has moved forward
+    // past this object. In this case, the object will probably
+    // be visited when a task is scanning the region and will also
+    // be pushed on the stack. So, some duplicate work, but no
+    // correctness problems.
+    if (is_below_finger(obj, global_finger)) {
+      if (obj->is_typeArray()) {
+        // Immediately process arrays of primitive types, rather
+        // than pushing on the mark stack.  This keeps us from
+        // adding humongous objects to the mark stack that might
+        // be reclaimed before the entry is processed - see
+        // selection of candidates for eager reclaim of humongous
+        // objects.  The cost of the additional type test is
+        // mitigated by avoiding a trip through the mark stack,
+        // by only doing a bookkeeping update and avoiding the
+        // actual scan of the object - a typeArray contains no
+        // references, and the metadata is built-in.
+        process_grey_object<false>(obj);
+      } else {
+        push(obj);
+      }
+    }
+  }
+}
+
+inline void G1CMTask::deal_with_reference(oop obj) {
+  increment_refs_reached();
+
+  HeapWord* objAddr = (HeapWord*) obj;
+  assert(obj->is_oop_or_null(true /* ignore mark word */), "Expected an oop or NULL at " PTR_FORMAT, p2i(obj));
+  if (_g1h->is_in_g1_reserved(objAddr)) {
+    assert(obj != NULL, "null check is implicit");
+    if (!_nextMarkBitMap->isMarked(objAddr)) {
+      // Only get the containing region if the object is not marked on the
+      // bitmap (otherwise, it's a waste of time since we won't do
+      // anything with it).
+      HeapRegion* hr = _g1h->heap_region_containing(obj);
+      if (!hr->obj_allocated_since_next_marking(obj)) {
+        make_reference_grey(obj, hr);
+      }
+    }
+  }
+}
+
+inline void G1ConcurrentMark::markPrev(oop p) {
+  assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((G1CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
+}
+
+bool G1ConcurrentMark::isPrevMarked(oop p) const {
+  assert(p != NULL && p->is_oop(), "expected an oop");
+  HeapWord* addr = (HeapWord*)p;
+  assert(addr >= _prevMarkBitMap->startWord() ||
+         addr < _prevMarkBitMap->endWord(), "in a region");
+
+  return _prevMarkBitMap->isMarked(addr);
+}
+
+inline void G1ConcurrentMark::grayRoot(oop obj, size_t word_size,
+                                       uint worker_id, HeapRegion* hr) {
+  assert(obj != NULL, "pre-condition");
+  HeapWord* addr = (HeapWord*) obj;
+  if (hr == NULL) {
+    hr = _g1h->heap_region_containing(addr);
+  } else {
+    assert(hr->is_in(addr), "pre-condition");
+  }
+  assert(hr != NULL, "sanity");
+  // Given that we're looking for a region that contains an object
+  // header it's impossible to get back a HC region.
+  assert(!hr->is_continues_humongous(), "sanity");
+
+  if (addr < hr->next_top_at_mark_start()) {
+    if (!_nextMarkBitMap->isMarked(addr)) {
+      par_mark_and_count(obj, word_size, hr, worker_id);
+    }
+  }
+}
+
+#endif // SHARE_VM_GC_G1_G1CONCURRENTMARK_INLINE_HPP
--- a/hotspot/src/share/vm/gc/g1/g1EvacFailure.cpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1EvacFailure.cpp	Fri Feb 05 18:37:18 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,10 +23,10 @@
  */
 
 #include "precompiled.hpp"
-#include "gc/g1/concurrentMark.inline.hpp"
 #include "gc/g1/dirtyCardQueue.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
 #include "gc/g1/g1EvacFailure.hpp"
 #include "gc/g1/g1HeapVerifier.hpp"
 #include "gc/g1/g1OopClosures.inline.hpp"
@@ -62,7 +62,7 @@
 class RemoveSelfForwardPtrObjClosure: public ObjectClosure {
 private:
   G1CollectedHeap* _g1;
-  ConcurrentMark* _cm;
+  G1ConcurrentMark* _cm;
   HeapRegion* _hr;
   size_t _marked_bytes;
   OopsInHeapRegionClosure *_update_rset_cl;
--- a/hotspot/src/share/vm/gc/g1/g1EvacFailure.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1EvacFailure.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/hotspot/src/share/vm/gc/g1/g1HeapVerifier.cpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1HeapVerifier.cpp	Fri Feb 05 18:37:18 2016 +0100
@@ -591,7 +591,7 @@
   verify_dirty_young_list(_g1h->young_list()->first_region());
 }
 
-bool G1HeapVerifier::verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+bool G1HeapVerifier::verify_no_bits_over_tams(const char* bitmap_name, G1CMBitMapRO* bitmap,
                                                HeapWord* tams, HeapWord* end) {
   guarantee(tams <= end,
             "tams: " PTR_FORMAT " end: " PTR_FORMAT, p2i(tams), p2i(end));
@@ -605,8 +605,8 @@
 }
 
 bool G1HeapVerifier::verify_bitmaps(const char* caller, HeapRegion* hr) {
-  CMBitMapRO* prev_bitmap = _g1h->concurrent_mark()->prevMarkBitMap();
-  CMBitMapRO* next_bitmap = (CMBitMapRO*) _g1h->concurrent_mark()->nextMarkBitMap();
+  G1CMBitMapRO* prev_bitmap = _g1h->concurrent_mark()->prevMarkBitMap();
+  G1CMBitMapRO* next_bitmap = (G1CMBitMapRO*) _g1h->concurrent_mark()->nextMarkBitMap();
 
   HeapWord* bottom = hr->bottom();
   HeapWord* ptams  = hr->prev_top_at_mark_start();
--- a/hotspot/src/share/vm/gc/g1/g1HeapVerifier.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1HeapVerifier.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -82,7 +82,7 @@
   // range [from,limit). If it does, print an error message and return
   // false. Otherwise, just return true. bitmap_name should be "prev"
   // or "next".
-  bool verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+  bool verify_no_bits_over_tams(const char* bitmap_name, G1CMBitMapRO* bitmap,
                                 HeapWord* from, HeapWord* limit);
 
   // Verify that the prev / next bitmap range [tams,end) for the given
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -31,12 +31,12 @@
 class HeapRegion;
 class G1CollectedHeap;
 class G1RemSet;
-class ConcurrentMark;
+class G1ConcurrentMark;
 class DirtyCardToOopClosure;
-class CMBitMap;
-class CMMarkStack;
+class G1CMBitMap;
+class G1CMMarkStack;
 class G1ParScanThreadState;
-class CMTask;
+class G1CMTask;
 class ReferenceProcessor;
 
 // A class that scans oops in a given heap region (much as OopsInGenClosure
@@ -92,7 +92,7 @@
   G1ParScanThreadState* _par_scan_state;
   uint _worker_id;              // Cache value from par_scan_state.
   Klass* _scanned_klass;
-  ConcurrentMark* _cm;
+  G1ConcurrentMark* _cm;
 
   // Mark the object if it's not already marked. This is used to mark
   // objects pointed to by roots that are guaranteed not to move
@@ -170,12 +170,12 @@
 // Closure for iterating over object fields during concurrent marking
 class G1CMOopClosure : public MetadataAwareOopClosure {
 protected:
-  ConcurrentMark*    _cm;
+  G1ConcurrentMark*  _cm;
 private:
   G1CollectedHeap*   _g1h;
-  CMTask*            _task;
+  G1CMTask*          _task;
 public:
-  G1CMOopClosure(G1CollectedHeap* g1h, ConcurrentMark* cm, CMTask* task);
+  G1CMOopClosure(G1CollectedHeap* g1h, G1ConcurrentMark* cm, G1CMTask* task);
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(      oop* p) { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
@@ -185,10 +185,10 @@
 class G1RootRegionScanClosure : public MetadataAwareOopClosure {
 private:
   G1CollectedHeap* _g1h;
-  ConcurrentMark*  _cm;
+  G1ConcurrentMark* _cm;
   uint _worker_id;
 public:
-  G1RootRegionScanClosure(G1CollectedHeap* g1h, ConcurrentMark* cm,
+  G1RootRegionScanClosure(G1CollectedHeap* g1h, G1ConcurrentMark* cm,
                           uint worker_id) :
     _g1h(g1h), _cm(cm), _worker_id(worker_id) { }
   template <class T> void do_oop_nv(T* p);
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Fri Feb 05 18:37:18 2016 +0100
@@ -25,8 +25,8 @@
 #ifndef SHARE_VM_GC_G1_G1OOPCLOSURES_INLINE_HPP
 #define SHARE_VM_GC_G1_G1OOPCLOSURES_INLINE_HPP
 
-#include "gc/g1/concurrentMark.inline.hpp"
 #include "gc/g1/g1CollectedHeap.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
 #include "gc/g1/g1OopClosures.hpp"
 #include "gc/g1/g1ParScanThreadState.inline.hpp"
 #include "gc/g1/g1RemSet.hpp"
--- a/hotspot/src/share/vm/prims/whitebox.cpp	Fri Feb 05 16:19:31 2016 +0000
+++ b/hotspot/src/share/vm/prims/whitebox.cpp	Fri Feb 05 18:37:18 2016 +0100
@@ -51,9 +51,9 @@
 #include "utilities/exceptions.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
-#include "gc/g1/concurrentMark.hpp"
 #include "gc/g1/concurrentMarkThread.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1ConcurrentMark.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
 #include "gc/parallel/parallelScavengeHeap.inline.hpp"
 #include "gc/parallel/adjoiningGenerations.hpp"