Merge
authorhseigel
Wed, 06 Apr 2016 16:03:02 +0200
changeset 37416 9f905b8b3718
parent 37414 2672ba9af0dc (diff)
parent 37415 35df58edb348 (current diff)
child 37417 84ec61d3b87e
child 37418 ebb041956080
Merge
--- a/hotspot/src/share/vm/gc/g1/concurrentMarkThread.cpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/concurrentMarkThread.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -28,6 +28,7 @@
 #include "gc/g1/g1Analytics.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectorPolicy.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
 #include "gc/g1/g1MMUTracker.hpp"
 #include "gc/g1/suspendibleThreadSet.hpp"
 #include "gc/g1/vm_operations_g1.hpp"
@@ -183,6 +184,11 @@
         }
       } while (cm()->restart_for_overflow());
 
+      if (!cm()->has_aborted()) {
+        G1ConcPhaseTimer t(_cm, "Concurrent Create Live Data");
+        cm()->create_live_data();
+      }
+
       double end_time = os::elapsedVTime();
       // Update the total virtual time before doing this, since it will try
       // to measure it to get the vtime for this marking.  We purposely
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1CardLiveData.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
+#include "gc/g1/g1CardLiveData.inline.hpp"
+#include "gc/g1/suspendibleThreadSet.hpp"
+#include "gc/shared/workgroup.hpp"
+#include "memory/universe.hpp"
+#include "runtime/atomic.inline.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/os.hpp"
+#include "utilities/bitMap.inline.hpp"
+#include "utilities/debug.hpp"
+
+G1CardLiveData::G1CardLiveData() :
+  _max_capacity(0),
+  _cards_per_region(0),
+  _live_regions(NULL),
+  _live_regions_size_in_bits(0),
+  _live_cards(NULL),
+  _live_cards_size_in_bits(0) {
+}
+
+G1CardLiveData::~G1CardLiveData()  {
+  free_large_bitmap(_live_cards, _live_cards_size_in_bits);
+  free_large_bitmap(_live_regions, _live_regions_size_in_bits);
+}
+
+G1CardLiveData::bm_word_t* G1CardLiveData::allocate_large_bitmap(size_t size_in_bits) {
+  size_t size_in_words = BitMap::calc_size_in_words(size_in_bits);
+
+  bm_word_t* map = MmapArrayAllocator<bm_word_t, mtGC>::allocate(size_in_words);
+
+  return map;
+}
+
+void G1CardLiveData::free_large_bitmap(bm_word_t* bitmap, size_t size_in_bits) {
+  MmapArrayAllocator<bm_word_t, mtGC>::free(bitmap, size_in_bits / BitsPerWord);
+}
+
+void G1CardLiveData::initialize(size_t max_capacity, uint num_max_regions) {
+  assert(max_capacity % num_max_regions == 0,
+         "Given capacity must be evenly divisible by region size.");
+  size_t region_size = max_capacity / num_max_regions;
+  assert(region_size % (G1SATBCardTableModRefBS::card_size * BitsPerWord) == 0,
+         "Region size must be evenly divisible by area covered by a single word.");
+  _max_capacity = max_capacity;
+  _cards_per_region = region_size / G1SATBCardTableModRefBS::card_size;
+
+  _live_regions_size_in_bits = live_region_bitmap_size_in_bits();
+  _live_regions = allocate_large_bitmap(_live_regions_size_in_bits);
+  _live_cards_size_in_bits = live_card_bitmap_size_in_bits();
+  _live_cards = allocate_large_bitmap(_live_cards_size_in_bits);
+}
+
+void G1CardLiveData::pretouch() {
+  live_cards_bm().pretouch();
+  live_regions_bm().pretouch();
+}
+
+size_t G1CardLiveData::live_region_bitmap_size_in_bits() const {
+  return _max_capacity / (_cards_per_region << G1SATBCardTableModRefBS::card_shift);
+}
+
+size_t G1CardLiveData::live_card_bitmap_size_in_bits() const {
+  return _max_capacity >> G1SATBCardTableModRefBS::card_shift;
+}
+
+// Helper class that provides functionality to generate the Live Data Count
+// information.
+class G1CardLiveDataHelper VALUE_OBJ_CLASS_SPEC {
+private:
+  BitMap _region_bm;
+  BitMap _card_bm;
+
+  // The card number of the bottom of the G1 heap.
+  // Used in biasing indices into accounting card bitmaps.
+  BitMap::idx_t _heap_card_bias;
+
+  // Utility routine to set an exclusive range of bits on the given
+  // bitmap, optimized for very small ranges.
+  // There must be at least one bit to set.
+  void set_card_bitmap_range(BitMap::idx_t start_idx,
+                             BitMap::idx_t end_idx) {
+
+    // Set the exclusive bit range [start_idx, end_idx).
+    assert((end_idx - start_idx) > 0, "at least one bit");
+
+    // For small ranges use a simple loop; otherwise use set_range.
+    // The range is made up of the cards that are spanned by an object/mem
+    // region so 8 cards will allow up to object sizes up to 4K to be handled
+    // using the loop.
+    if ((end_idx - start_idx) <= 8) {
+      for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
+        _card_bm.set_bit(i);
+      }
+    } else {
+      _card_bm.set_range(start_idx, end_idx);
+    }
+  }
+
+  // We cache the last mark set. This avoids setting the same bit multiple times.
+  // This is particularly interesting for dense bitmaps, as this avoids doing
+  // lots of work most of the time.
+  BitMap::idx_t _last_marked_bit_idx;
+
+  // Mark the card liveness bitmap for the object spanning from start to end.
+  void mark_card_bitmap_range(HeapWord* start, HeapWord* end) {
+    BitMap::idx_t start_idx = card_live_bitmap_index_for(start);
+    BitMap::idx_t end_idx = card_live_bitmap_index_for((HeapWord*)align_ptr_up(end, CardTableModRefBS::card_size));
+
+    assert((end_idx - start_idx) > 0, "Trying to mark zero sized range.");
+
+    if (start_idx == _last_marked_bit_idx) {
+      start_idx++;
+    }
+    if (start_idx == end_idx) {
+      return;
+    }
+
+    // Set the bits in the card bitmap for the cards spanned by this object.
+    set_card_bitmap_range(start_idx, end_idx);
+    _last_marked_bit_idx = end_idx - 1;
+  }
+
+  void reset_mark_cache() {
+    _last_marked_bit_idx = (BitMap::idx_t)-1;
+  }
+
+public:
+  // Returns the index in the per-card liveness count bitmap
+  // for the given address
+  inline BitMap::idx_t card_live_bitmap_index_for(HeapWord* addr) {
+    // Below, the term "card num" means the result of shifting an address
+    // by the card shift -- address 0 corresponds to card number 0.  One
+    // must subtract the card num of the bottom of the heap to obtain a
+    // card table index.
+    BitMap::idx_t card_num = uintptr_t(addr) >> CardTableModRefBS::card_shift;
+    return card_num - _heap_card_bias;
+  }
+
+  // Takes a region that's not empty (i.e., it has at least one
+  // live object in it and sets its corresponding bit on the region
+  // bitmap to 1.
+  void set_bit_for_region(HeapRegion* hr) {
+    _region_bm.par_set_bit(hr->hrm_index());
+  }
+
+  // Mark the range of bits covered by allocations done since the last marking
+  // in the given heap region, i.e. from NTAMS to top of the given region.
+  // Returns if there has been some allocation in this region since the last marking.
+  bool mark_allocated_since_marking(HeapRegion* hr) {
+    reset_mark_cache();
+
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* top   = hr->top();
+
+    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
+
+    // Mark the allocated-since-marking portion...
+    if (ntams < top) {
+      mark_card_bitmap_range(ntams, top);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // Mark the range of bits covered by live objects on the mark bitmap between
+  // bottom and NTAMS of the given region.
+  // Returns the number of live bytes marked within that area for the given
+  // heap region.
+  size_t mark_marked_during_marking(G1CMBitMap* mark_bitmap, HeapRegion* hr) {
+    reset_mark_cache();
+
+    size_t marked_bytes = 0;
+
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* start = hr->bottom();
+
+    if (ntams <= start) {
+      // Skip empty regions.
+      return 0;
+    }
+    if (hr->is_humongous()) {
+      mark_card_bitmap_range(start, hr->top());
+      return pointer_delta(hr->top(), start, 1);
+    }
+
+    assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
+           "Preconditions not met - "
+           "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
+           p2i(start), p2i(ntams), p2i(hr->end()));
+
+    // Find the first marked object at or after "start".
+    start = mark_bitmap->getNextMarkedWordAddress(start, ntams);
+    while (start < ntams) {
+      oop obj = oop(start);
+      size_t obj_size = obj->size();
+      HeapWord* obj_end = start + obj_size;
+
+      assert(obj_end <= hr->end(), "Humongous objects must have been handled elsewhere.");
+
+      mark_card_bitmap_range(start, obj_end);
+
+      // Add the size of this object to the number of marked bytes.
+      marked_bytes += obj_size * HeapWordSize;
+
+      // Find the next marked object after this one.
+      start = mark_bitmap->getNextMarkedWordAddress(obj_end, ntams);
+    }
+
+    return marked_bytes;
+  }
+
+  G1CardLiveDataHelper(G1CardLiveData* live_data, HeapWord* base_address) :
+    _region_bm(live_data->live_regions_bm()),
+    _card_bm(live_data->live_cards_bm()) {
+    // Calculate the card number for the bottom of the heap. Used
+    // in biasing indexes into the accounting card bitmaps.
+    _heap_card_bias =
+      uintptr_t(base_address) >> CardTableModRefBS::card_shift;
+  }
+};
+
+class G1CreateCardLiveDataTask: public AbstractGangTask {
+  // Aggregate the counting data that was constructed concurrently
+  // with marking.
+  class G1CreateLiveDataClosure : public HeapRegionClosure {
+    G1CardLiveDataHelper _helper;
+
+    G1CMBitMap* _mark_bitmap;
+
+    G1ConcurrentMark* _cm;
+  public:
+    G1CreateLiveDataClosure(G1CollectedHeap* g1h,
+                            G1ConcurrentMark* cm,
+                            G1CMBitMap* mark_bitmap,
+                            G1CardLiveData* live_data) :
+      HeapRegionClosure(),
+      _helper(live_data, g1h->reserved_region().start()),
+      _mark_bitmap(mark_bitmap),
+      _cm(cm) { }
+
+    bool doHeapRegion(HeapRegion* hr) {
+      size_t marked_bytes = _helper.mark_marked_during_marking(_mark_bitmap, hr);
+      if (marked_bytes > 0) {
+        hr->add_to_marked_bytes(marked_bytes);
+      }
+
+      return (_cm->do_yield_check() && _cm->has_aborted());
+    }
+  };
+
+  G1ConcurrentMark* _cm;
+  G1CardLiveData* _live_data;
+  HeapRegionClaimer _hr_claimer;
+
+public:
+  G1CreateCardLiveDataTask(G1CMBitMap* bitmap,
+                           G1CardLiveData* live_data,
+                           uint n_workers) :
+      AbstractGangTask("G1 Create Live Data"),
+      _live_data(live_data),
+      _hr_claimer(n_workers) {
+  }
+
+  void work(uint worker_id) {
+    SuspendibleThreadSetJoiner sts_join;
+
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    G1ConcurrentMark* cm = g1h->concurrent_mark();
+    G1CreateLiveDataClosure cl(g1h, cm, cm->nextMarkBitMap(), _live_data);
+    g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
+  }
+};
+
+void G1CardLiveData::create(WorkGang* workers, G1CMBitMap* mark_bitmap) {
+  uint n_workers = workers->active_workers();
+
+  G1CreateCardLiveDataTask cl(mark_bitmap,
+                              this,
+                              n_workers);
+  workers->run_task(&cl);
+}
+
+class G1FinalizeCardLiveDataTask: public AbstractGangTask {
+  // Finalizes the liveness counting data.
+  // Sets the bits corresponding to the interval [NTAMS, top]
+  // (which contains the implicitly live objects) in the
+  // card liveness bitmap. Also sets the bit for each region
+  // containing live data, in the region liveness bitmap.
+  class G1FinalizeCardLiveDataClosure: public HeapRegionClosure {
+  private:
+    G1CardLiveDataHelper _helper;
+  public:
+    G1FinalizeCardLiveDataClosure(G1CollectedHeap* g1h,
+                                  G1CMBitMap* bitmap,
+                                  G1CardLiveData* live_data) :
+      HeapRegionClosure(),
+      _helper(live_data, g1h->reserved_region().start()) { }
+
+    bool doHeapRegion(HeapRegion* hr) {
+      bool allocated_since_marking = _helper.mark_allocated_since_marking(hr);
+      if (allocated_since_marking || hr->next_marked_bytes() > 0) {
+        _helper.set_bit_for_region(hr);
+      }
+      return false;
+    }
+  };
+
+  G1CMBitMap* _bitmap;
+
+  G1CardLiveData* _live_data;
+
+  HeapRegionClaimer _hr_claimer;
+
+public:
+  G1FinalizeCardLiveDataTask(G1CMBitMap* bitmap, G1CardLiveData* live_data, uint n_workers) :
+    AbstractGangTask("G1 Finalize Card Live Data"),
+    _bitmap(bitmap),
+    _live_data(live_data),
+    _hr_claimer(n_workers) {
+  }
+
+  void work(uint worker_id) {
+    G1FinalizeCardLiveDataClosure cl(G1CollectedHeap::heap(), _bitmap, _live_data);
+
+    G1CollectedHeap::heap()->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
+  }
+};
+
+void G1CardLiveData::finalize(WorkGang* workers, G1CMBitMap* mark_bitmap) {
+  // Finalize the live data.
+  G1FinalizeCardLiveDataTask cl(mark_bitmap,
+                                this,
+                                workers->active_workers());
+  workers->run_task(&cl);
+}
+
+class G1ClearCardLiveDataTask : public AbstractGangTask {
+  BitMap _bitmap;
+  size_t _num_chunks;
+  size_t _cur_chunk;
+public:
+  G1ClearCardLiveDataTask(BitMap bitmap, size_t num_tasks) :
+    AbstractGangTask("G1 Clear Card Live Data"),
+    _bitmap(bitmap),
+    _num_chunks(num_tasks),
+    _cur_chunk(0) {
+  }
+
+  static size_t chunk_size() { return M; }
+
+  virtual void work(uint worker_id) {
+    while (true) {
+      size_t to_process = Atomic::add(1, &_cur_chunk) - 1;
+      if (to_process >= _num_chunks) {
+        break;
+      }
+
+      BitMap::idx_t start = M * BitsPerByte * to_process;
+      BitMap::idx_t end = MIN2(start + M * BitsPerByte, _bitmap.size());
+      _bitmap.clear_range(start, end);
+    }
+  }
+};
+
+void G1CardLiveData::clear(WorkGang* workers) {
+  guarantee(Universe::is_fully_initialized(), "Should not call this during initialization.");
+
+  size_t const num_chunks = align_size_up(live_cards_bm().size_in_bytes(), G1ClearCardLiveDataTask::chunk_size()) / G1ClearCardLiveDataTask::chunk_size();
+
+  G1ClearCardLiveDataTask cl(live_cards_bm(), num_chunks);
+  workers->run_task(&cl);
+
+  // The region live bitmap is always very small, even for huge heaps. Clear
+  // directly.
+  live_regions_bm().clear();
+}
+
+class G1VerifyCardLiveDataTask: public AbstractGangTask {
+  // Heap region closure used for verifying the live count data
+  // that was created concurrently and finalized during
+  // the remark pause. This closure is applied to the heap
+  // regions during the STW cleanup pause.
+  class G1VerifyCardLiveDataClosure: public HeapRegionClosure {
+  private:
+    G1CollectedHeap* _g1h;
+    G1CMBitMap* _mark_bitmap;
+    G1CardLiveDataHelper _helper;
+
+    G1CardLiveData* _act_live_data;
+
+    G1CardLiveData* _exp_live_data;
+
+    int _failures;
+
+    // Completely recreates the live data count for the given heap region and
+    // returns the number of bytes marked.
+    size_t create_live_data_count(HeapRegion* hr) {
+      size_t bytes_marked = _helper.mark_marked_during_marking(_mark_bitmap, hr);
+      bool allocated_since_marking = _helper.mark_allocated_since_marking(hr);
+      if (allocated_since_marking || bytes_marked > 0) {
+        _helper.set_bit_for_region(hr);
+      }
+      return bytes_marked;
+    }
+  public:
+    G1VerifyCardLiveDataClosure(G1CollectedHeap* g1h,
+                                G1CMBitMap* mark_bitmap,
+                                G1CardLiveData* act_live_data,
+                                G1CardLiveData* exp_live_data) :
+      _g1h(g1h),
+      _mark_bitmap(mark_bitmap),
+      _helper(exp_live_data, g1h->reserved_region().start()),
+      _act_live_data(act_live_data),
+      _exp_live_data(exp_live_data),
+      _failures(0) { }
+
+    int failures() const { return _failures; }
+
+    bool doHeapRegion(HeapRegion* hr) {
+      int failures = 0;
+
+      // Walk the marking bitmap for this region and set the corresponding bits
+      // in the expected region and card bitmaps.
+      size_t exp_marked_bytes = create_live_data_count(hr);
+      size_t act_marked_bytes = hr->next_marked_bytes();
+      // Verify the marked bytes for this region.
+
+      if (exp_marked_bytes != act_marked_bytes) {
+        failures += 1;
+      } else if (exp_marked_bytes > HeapRegion::GrainBytes) {
+        failures += 1;
+      }
+
+      // Verify the bit, for this region, in the actual and expected
+      // (which was just calculated) region bit maps.
+      // We're not OK if the bit in the calculated expected region
+      // bitmap is set and the bit in the actual region bitmap is not.
+      uint index = hr->hrm_index();
+
+      bool expected = _exp_live_data->is_region_live(index);
+      bool actual = _act_live_data->is_region_live(index);
+      if (expected && !actual) {
+        failures += 1;
+      }
+
+      // Verify that the card bit maps for the cards spanned by the current
+      // region match. We have an error if we have a set bit in the expected
+      // bit map and the corresponding bit in the actual bitmap is not set.
+
+      BitMap::idx_t start_idx = _helper.card_live_bitmap_index_for(hr->bottom());
+      BitMap::idx_t end_idx = _helper.card_live_bitmap_index_for(hr->top());
+
+      for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
+        expected = _exp_live_data->is_card_live_at(i);
+        actual = _act_live_data->is_card_live_at(i);
+
+        if (expected && !actual) {
+          failures += 1;
+        }
+      }
+
+      _failures += failures;
+
+      // We could stop iteration over the heap when we
+      // find the first violating region by returning true.
+      return false;
+    }
+  };
+protected:
+  G1CollectedHeap* _g1h;
+  G1CMBitMap* _mark_bitmap;
+
+  G1CardLiveData* _act_live_data;
+
+  G1CardLiveData _exp_live_data;
+
+  int  _failures;
+
+  HeapRegionClaimer _hr_claimer;
+
+public:
+  G1VerifyCardLiveDataTask(G1CMBitMap* bitmap,
+                           G1CardLiveData* act_live_data,
+                           uint n_workers)
+  : AbstractGangTask("G1 Verify Card Live Data"),
+    _g1h(G1CollectedHeap::heap()),
+    _mark_bitmap(bitmap),
+    _act_live_data(act_live_data),
+    _exp_live_data(),
+    _failures(0),
+    _hr_claimer(n_workers) {
+    assert(VerifyDuringGC, "don't call this otherwise");
+    _exp_live_data.initialize(_g1h->max_capacity(), _g1h->max_regions());
+  }
+
+  void work(uint worker_id) {
+    G1VerifyCardLiveDataClosure cl(_g1h,
+                                   _mark_bitmap,
+                                   _act_live_data,
+                                   &_exp_live_data);
+    _g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
+
+    Atomic::add(cl.failures(), &_failures);
+  }
+
+  int failures() const { return _failures; }
+};
+
+void G1CardLiveData::verify(WorkGang* workers, G1CMBitMap* actual_bitmap) {
+    ResourceMark rm;
+
+    G1VerifyCardLiveDataTask cl(actual_bitmap,
+                                this,
+                                workers->active_workers());
+    workers->run_task(&cl);
+
+    guarantee(cl.failures() == 0, "Unexpected accounting failures");
+}
+
+#ifndef PRODUCT
+void G1CardLiveData::verify_is_clear() {
+  assert(live_cards_bm().count_one_bits() == 0, "Live cards bitmap must be clear.");
+  assert(live_regions_bm().count_one_bits() == 0, "Live regions bitmap must be clear.");
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1CardLiveData.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CARDLIVEDATA_HPP
+#define SHARE_VM_GC_G1_G1CARDLIVEDATA_HPP
+
+#include "gc/g1/g1CollectedHeap.hpp"
+#include "utilities/bitMap.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+class G1CollectedHeap;
+class G1CMBitMap;
+class WorkGang;
+
+// Information about object liveness on the Java heap on a "card" basis.
+// Can be used for various purposes, like as remembered set for completely
+// coarsened remembered sets, scrubbing remembered sets or estimating liveness.
+// This information is created as part of the concurrent marking cycle.
+class G1CardLiveData VALUE_OBJ_CLASS_SPEC {
+  friend class G1CardLiveDataHelper;
+  friend class G1VerifyCardLiveDataTask;
+private:
+  typedef BitMap::bm_word_t bm_word_t;
+  // Store some additional information about the covered area to be able to test.
+  size_t _max_capacity;
+  size_t _cards_per_region;
+
+  // The per-card liveness bitmap.
+  bm_word_t* _live_cards;
+  size_t _live_cards_size_in_bits;
+  // The per-region liveness bitmap.
+  bm_word_t* _live_regions;
+  size_t _live_regions_size_in_bits;
+  // The bits in this bitmap contain for every card whether it contains
+  // at least part of at least one live object.
+  BitMap live_cards_bm() const { return BitMap(_live_cards, _live_cards_size_in_bits); }
+  // The bits in this bitmap indicate that a given region contains some live objects.
+  BitMap live_regions_bm() const { return BitMap(_live_regions, _live_regions_size_in_bits); }
+
+  // Allocate a "large" bitmap from virtual memory with the given size in bits.
+  bm_word_t* allocate_large_bitmap(size_t size_in_bits);
+  void free_large_bitmap(bm_word_t* map, size_t size_in_bits);
+
+  inline BitMap live_card_bitmap(uint region);
+
+  inline bool is_card_live_at(BitMap::idx_t idx) const;
+
+  size_t live_region_bitmap_size_in_bits() const;
+  size_t live_card_bitmap_size_in_bits() const;
+public:
+  inline bool is_region_live(uint region) const;
+
+  inline void remove_nonlive_cards(uint region, BitMap* bm);
+  inline void remove_nonlive_regions(BitMap* bm);
+
+  G1CardLiveData();
+  ~G1CardLiveData();
+
+  void initialize(size_t max_capacity, uint num_max_regions);
+  void pretouch();
+
+  // Create the initial liveness data based on the marking result from the bottom
+  // to the ntams of every region in the heap and the marks in the given bitmap.
+  void create(WorkGang* workers, G1CMBitMap* mark_bitmap);
+  // Finalize the liveness data.
+  void finalize(WorkGang* workers, G1CMBitMap* mark_bitmap);
+
+  // Verify that the liveness count data created concurrently matches one created
+  // during this safepoint.
+  void verify(WorkGang* workers, G1CMBitMap* actual_bitmap);
+  // Clear all data structures, prepare for next processing.
+  void clear(WorkGang* workers);
+
+  void verify_is_clear() PRODUCT_RETURN;
+};
+
+#endif /* SHARE_VM_GC_G1_G1CARDLIVEDATA_HPP */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1CardLiveData.inline.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CARDLIVEDATA_INLINE_HPP
+#define SHARE_VM_GC_G1_G1CARDLIVEDATA_INLINE_HPP
+
+#include "gc/g1/g1CardLiveData.hpp"
+#include "utilities/bitMap.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+inline BitMap G1CardLiveData::live_card_bitmap(uint region) {
+  return BitMap(_live_cards + ((size_t)region * _cards_per_region >> LogBitsPerWord), _cards_per_region);
+}
+
+inline bool G1CardLiveData::is_card_live_at(BitMap::idx_t idx) const {
+  return live_cards_bm().at(idx);
+}
+
+inline bool G1CardLiveData::is_region_live(uint region) const {
+  return live_regions_bm().at(region);
+}
+
+inline void G1CardLiveData::remove_nonlive_cards(uint region, BitMap* bm) {
+  bm->set_intersection(live_card_bitmap(region));
+}
+
+inline void G1CardLiveData::remove_nonlive_regions(BitMap* bm) {
+  bm->set_intersection(live_regions_bm());
+}
+
+#endif /* SHARE_VM_GC_G1_G1CARDLIVEDATA_INLINE_HPP */
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -1425,6 +1425,7 @@
       // the full GC has compacted objects and updated TAMS but not updated
       // the prev bitmap.
       if (G1VerifyBitmaps) {
+        GCTraceTime(Debug, gc)("Clear Bitmap for Verification");
         _cm->clear_prev_bitmap(workers());
       }
       _verifier->check_bitmaps("Full GC End");
@@ -1944,7 +1945,7 @@
   const uint max_region_idx = (1U << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1;
   guarantee((max_regions() - 1) <= max_region_idx, "too many regions");
 
-  G1RemSet::initialize(max_regions());
+  g1_rem_set()->initialize(max_capacity(), max_regions());
 
   size_t max_cards_per_region = ((size_t)1 << (sizeof(CardIdx_t)*BitsPerByte-1)) - 1;
   guarantee(HeapRegion::CardsPerRegion > 0, "make sure it's initialized");
@@ -4787,27 +4788,23 @@
 class G1ParScrubRemSetTask: public AbstractGangTask {
 protected:
   G1RemSet* _g1rs;
-  BitMap* _region_bm;
-  BitMap* _card_bm;
   HeapRegionClaimer _hrclaimer;
 
 public:
-  G1ParScrubRemSetTask(G1RemSet* g1_rs, BitMap* region_bm, BitMap* card_bm, uint num_workers) :
+  G1ParScrubRemSetTask(G1RemSet* g1_rs, uint num_workers) :
     AbstractGangTask("G1 ScrubRS"),
     _g1rs(g1_rs),
-    _region_bm(region_bm),
-    _card_bm(card_bm),
     _hrclaimer(num_workers) {
   }
 
   void work(uint worker_id) {
-    _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
+    _g1rs->scrub(worker_id, &_hrclaimer);
   }
 };
 
-void G1CollectedHeap::scrub_rem_set(BitMap* region_bm, BitMap* card_bm) {
+void G1CollectedHeap::scrub_rem_set() {
   uint num_workers = workers()->active_workers();
-  G1ParScrubRemSetTask g1_par_scrub_rs_task(g1_rem_set(), region_bm, card_bm, num_workers);
+  G1ParScrubRemSetTask g1_par_scrub_rs_task(g1_rem_set(), num_workers);
   workers()->run_task(&g1_par_scrub_rs_task);
 }
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -992,7 +992,8 @@
   // The rem set and barrier set.
   G1RemSet* g1_rem_set() const { return _g1_rem_set; }
 
-  void scrub_rem_set(BitMap* region_bm, BitMap* card_bm);
+  // Try to minimize the remembered set.
+  void scrub_rem_set();
 
   unsigned get_gc_time_stamp() {
     return _gc_time_stamp;
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -28,7 +28,7 @@
 #include "gc/g1/g1CollectedHeap.hpp"
 #include "gc/g1/g1CollectorPolicy.hpp"
 #include "gc/g1/g1CollectorState.hpp"
-#include "gc/g1/g1ConcurrentMark.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/heapRegionManager.inline.hpp"
 #include "gc/g1/heapRegionSet.inline.hpp"
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -33,6 +33,7 @@
 #include "gc/g1/g1ConcurrentMark.inline.hpp"
 #include "gc/g1/g1HeapVerifier.hpp"
 #include "gc/g1/g1OopClosures.inline.hpp"
+#include "gc/g1/g1CardLiveData.inline.hpp"
 #include "gc/g1/g1StringDedup.hpp"
 #include "gc/g1/heapRegion.inline.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
@@ -355,10 +356,6 @@
   _sleep_factor(0.0),
   _marking_task_overhead(1.0),
   _cleanup_list("Cleanup List"),
-  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
-  _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
-            CardTableModRefBS::card_shift,
-            false /* in_resource_area*/),
 
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
@@ -390,8 +387,6 @@
 
   _parallel_workers(NULL),
 
-  _count_card_bitmaps(NULL),
-  _count_marked_bytes(NULL),
   _completed_initialization(false) {
 
   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
@@ -505,40 +500,19 @@
   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 
-  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
-  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
-
-  BitMap::idx_t card_bm_size = _card_bm.size();
-
   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   _active_tasks = _max_worker_id;
 
-  uint max_regions = _g1h->max_regions();
   for (uint i = 0; i < _max_worker_id; ++i) {
     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
     task_queue->initialize();
     _task_queues->register_queue(i, task_queue);
 
-    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
-    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
-
-    _tasks[i] = new G1CMTask(i, this,
-                             _count_marked_bytes[i],
-                             &_count_card_bitmaps[i],
-                             task_queue, _task_queues);
+    _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 
     _accum_task_vtime[i] = 0.0;
   }
 
-  // Calculate the card number for the bottom of the heap. Used
-  // in biasing indexes into the accounting card bitmaps.
-  _heap_bottom_card_num =
-    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
-                                CardTableModRefBS::card_shift);
-
-  // Clear all the liveness counting data
-  clear_all_count_data();
-
   // so that the call below can read a sensible value
   _heap_start = g1h->reserved_region().start();
   set_non_marking_state();
@@ -716,10 +690,11 @@
 
   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 
-  // Clear the liveness counting data. If the marking has been aborted, the abort()
+  // Clear the live count data. If the marking has been aborted, the abort()
   // call already did that.
   if (!has_aborted()) {
-    clear_all_count_data();
+    clear_live_data(_parallel_workers);
+    DEBUG_ONLY(verify_live_data_clear());
   }
 
   // Repeat the asserts from above.
@@ -901,7 +876,7 @@
           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
           _cm->clear_has_overflown();
 
-          _cm->do_yield_check(worker_id);
+          _cm->do_yield_check();
 
           jlong sleep_time_ms;
           if (!_cm->has_aborted() && the_task->has_aborted()) {
@@ -951,10 +926,10 @@
   return n_conc_workers;
 }
 
-void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
+void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
   // Currently, only survivors can be root regions.
   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
-  G1RootRegionScanClosure cl(_g1h, this, worker_id);
+  G1RootRegionScanClosure cl(_g1h, this);
 
   const uintx interval = PrefetchScanIntervalInBytes;
   HeapWord* curr = hr->bottom();
@@ -983,7 +958,7 @@
     G1CMRootRegions* root_regions = _cm->root_regions();
     HeapRegion* hr = root_regions->claim_next();
     while (hr != NULL) {
-      _cm->scanRootRegion(hr, worker_id);
+      _cm->scanRootRegion(hr);
       hr = root_regions->claim_next();
     }
   }
@@ -1107,14 +1082,6 @@
     // marking due to overflowing the global mark stack.
     reset_marking_state();
   } else {
-    {
-      GCTraceTime(Debug, gc, phases) trace("Aggregate Data", _gc_timer_cm);
-
-      // Aggregate the per-task counting data that we have accumulated
-      // while marking.
-      aggregate_count_data();
-    }
-
     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
     // We're done with marking.
     // This is the end of  the marking cycle, we're expected all
@@ -1150,363 +1117,6 @@
   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
 }
 
-// Base class of the closures that finalize and verify the
-// liveness counting data.
-class G1CMCountDataClosureBase: public HeapRegionClosure {
-protected:
-  G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
-  CardTableModRefBS* _ct_bs;
-
-  BitMap* _region_bm;
-  BitMap* _card_bm;
-
-  // Takes a region that's not empty (i.e., it has at least one
-  // live object in it and sets its corresponding bit on the region
-  // bitmap to 1.
-  void set_bit_for_region(HeapRegion* hr) {
-    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
-    _region_bm->par_at_put(index, true);
-  }
-
-public:
-  G1CMCountDataClosureBase(G1CollectedHeap* g1h,
-                           BitMap* region_bm, BitMap* card_bm):
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
-    _region_bm(region_bm), _card_bm(card_bm) { }
-};
-
-// Closure that calculates the # live objects per region. Used
-// for verification purposes during the cleanup pause.
-class CalcLiveObjectsClosure: public G1CMCountDataClosureBase {
-  G1CMBitMapRO* _bm;
-  size_t _region_marked_bytes;
-
-public:
-  CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h,
-                         BitMap* region_bm, BitMap* card_bm) :
-    G1CMCountDataClosureBase(g1h, region_bm, card_bm),
-    _bm(bm), _region_marked_bytes(0) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* ntams = hr->next_top_at_mark_start();
-    HeapWord* start = hr->bottom();
-
-    assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
-           "Preconditions not met - "
-           "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
-           p2i(start), p2i(ntams), p2i(hr->end()));
-
-    // Find the first marked object at or after "start".
-    start = _bm->getNextMarkedWordAddress(start, ntams);
-
-    size_t marked_bytes = 0;
-
-    while (start < ntams) {
-      oop obj = oop(start);
-      int obj_sz = obj->size();
-      HeapWord* obj_end = start + obj_sz;
-
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
-
-      // Note: if we're looking at the last region in heap - obj_end
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
-
-      // Set the bits in the card BM for the cards spanned by this object.
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-
-      // Add the size of this object to the number of marked bytes.
-      marked_bytes += (size_t)obj_sz * HeapWordSize;
-
-      // This will happen if we are handling a humongous object that spans
-      // several heap regions.
-      if (obj_end > hr->end()) {
-        break;
-      }
-      // Find the next marked object after this one.
-      start = _bm->getNextMarkedWordAddress(obj_end, ntams);
-    }
-
-    // Mark the allocated-since-marking portion...
-    HeapWord* top = hr->top();
-    if (ntams < top) {
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
-
-      // Note: if we're looking at the last region in heap - top
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-
-      // This definitely means the region has live objects.
-      set_bit_for_region(hr);
-    }
-
-    // Update the live region bitmap.
-    if (marked_bytes > 0) {
-      set_bit_for_region(hr);
-    }
-
-    // Set the marked bytes for the current region so that
-    // it can be queried by a calling verification routine
-    _region_marked_bytes = marked_bytes;
-
-    return false;
-  }
-
-  size_t region_marked_bytes() const { return _region_marked_bytes; }
-};
-
-// Heap region closure used for verifying the counting data
-// that was accumulated concurrently and aggregated during
-// the remark pause. This closure is applied to the heap
-// regions during the STW cleanup pause.
-
-class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
-  CalcLiveObjectsClosure _calc_cl;
-  BitMap* _region_bm;   // Region BM to be verified
-  BitMap* _card_bm;     // Card BM to be verified
-
-  BitMap* _exp_region_bm; // Expected Region BM values
-  BitMap* _exp_card_bm;   // Expected card BM values
-
-  int _failures;
-
-public:
-  VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
-                                BitMap* region_bm,
-                                BitMap* card_bm,
-                                BitMap* exp_region_bm,
-                                BitMap* exp_card_bm) :
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
-    _region_bm(region_bm), _card_bm(card_bm),
-    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
-    _failures(0) { }
-
-  int failures() const { return _failures; }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    int failures = 0;
-
-    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
-    // this region and set the corresponding bits in the expected region
-    // and card bitmaps.
-    bool res = _calc_cl.doHeapRegion(hr);
-    assert(res == false, "should be continuing");
-
-    // Verify the marked bytes for this region.
-    size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
-    size_t act_marked_bytes = hr->next_marked_bytes();
-
-    if (exp_marked_bytes > act_marked_bytes) {
-      if (hr->is_starts_humongous()) {
-        // For start_humongous regions, the size of the whole object will be
-        // in exp_marked_bytes.
-        HeapRegion* region = hr;
-        int num_regions;
-        for (num_regions = 0; region != NULL; num_regions++) {
-          region = _g1h->next_region_in_humongous(region);
-        }
-        if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
-          failures += 1;
-        } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
-          failures += 1;
-        }
-      } else {
-        // We're not OK if expected marked bytes > actual marked bytes. It means
-        // we have missed accounting some objects during the actual marking.
-        failures += 1;
-      }
-    }
-
-    // Verify the bit, for this region, in the actual and expected
-    // (which was just calculated) region bit maps.
-    // We're not OK if the bit in the calculated expected region
-    // bitmap is set and the bit in the actual region bitmap is not.
-    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
-
-    bool expected = _exp_region_bm->at(index);
-    bool actual = _region_bm->at(index);
-    if (expected && !actual) {
-      failures += 1;
-    }
-
-    // Verify that the card bit maps for the cards spanned by the current
-    // region match. We have an error if we have a set bit in the expected
-    // bit map and the corresponding bit in the actual bitmap is not set.
-
-    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
-    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
-
-    for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
-      expected = _exp_card_bm->at(i);
-      actual = _card_bm->at(i);
-
-      if (expected && !actual) {
-        failures += 1;
-      }
-    }
-
-    _failures += failures;
-
-    // We could stop iteration over the heap when we
-    // find the first violating region by returning true.
-    return false;
-  }
-};
-
-class G1ParVerifyFinalCountTask: public AbstractGangTask {
-protected:
-  G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
-  BitMap* _actual_region_bm;
-  BitMap* _actual_card_bm;
-
-  uint    _n_workers;
-
-  BitMap* _expected_region_bm;
-  BitMap* _expected_card_bm;
-
-  int  _failures;
-
-  HeapRegionClaimer _hrclaimer;
-
-public:
-  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
-                            BitMap* region_bm, BitMap* card_bm,
-                            BitMap* expected_region_bm, BitMap* expected_card_bm)
-    : AbstractGangTask("G1 verify final counting"),
-      _g1h(g1h), _cm(_g1h->concurrent_mark()),
-      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
-      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
-      _failures(0),
-      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
-    assert(VerifyDuringGC, "don't call this otherwise");
-    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
-    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
-  }
-
-  void work(uint worker_id) {
-    assert(worker_id < _n_workers, "invariant");
-
-    VerifyLiveObjectDataHRClosure verify_cl(_g1h,
-                                            _actual_region_bm, _actual_card_bm,
-                                            _expected_region_bm,
-                                            _expected_card_bm);
-
-    _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
-
-    Atomic::add(verify_cl.failures(), &_failures);
-  }
-
-  int failures() const { return _failures; }
-};
-
-// Closure that finalizes the liveness counting data.
-// Used during the cleanup pause.
-// Sets the bits corresponding to the interval [NTAMS, top]
-// (which contains the implicitly live objects) in the
-// card liveness bitmap. Also sets the bit for each region,
-// containing live data, in the region liveness bitmap.
-
-class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase {
- public:
-  FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
-                              BitMap* region_bm,
-                              BitMap* card_bm) :
-    G1CMCountDataClosureBase(g1h, region_bm, card_bm) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* ntams = hr->next_top_at_mark_start();
-    HeapWord* top   = hr->top();
-
-    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
-
-    // Mark the allocated-since-marking portion...
-    if (ntams < top) {
-      // This definitely means the region has live objects.
-      set_bit_for_region(hr);
-
-      // Now set the bits in the card bitmap for [ntams, top)
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
-
-      // Note: if we're looking at the last region in heap - top
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
-
-      assert(end_idx <= _card_bm->size(),
-             "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
-             end_idx, _card_bm->size());
-      assert(start_idx < _card_bm->size(),
-             "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
-             start_idx, _card_bm->size());
-
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-    }
-
-    // Set the bit for the region if it contains live data
-    if (hr->next_marked_bytes() > 0) {
-      set_bit_for_region(hr);
-    }
-
-    return false;
-  }
-};
-
-class G1ParFinalCountTask: public AbstractGangTask {
-protected:
-  G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
-  BitMap* _actual_region_bm;
-  BitMap* _actual_card_bm;
-
-  uint    _n_workers;
-  HeapRegionClaimer _hrclaimer;
-
-public:
-  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
-    : AbstractGangTask("G1 final counting"),
-      _g1h(g1h), _cm(_g1h->concurrent_mark()),
-      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
-      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
-  }
-
-  void work(uint worker_id) {
-    assert(worker_id < _n_workers, "invariant");
-
-    FinalCountDataUpdateClosure final_update_cl(_g1h,
-                                                _actual_region_bm,
-                                                _actual_card_bm);
-
-    _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
-  }
-};
-
 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
   G1CollectedHeap* _g1;
   size_t _freed_bytes;
@@ -1637,31 +1247,16 @@
 
   HeapRegionRemSet::reset_for_cleanup_tasks();
 
-  // Do counting once more with the world stopped for good measure.
-  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
-
-  g1h->workers()->run_task(&g1_par_count_task);
+  {
+    GCTraceTime(Debug, gc)("Finalize Live Data");
+    finalize_live_data();
+  }
 
   if (VerifyDuringGC) {
-    // Verify that the counting data accumulated during marking matches
-    // that calculated by walking the marking bitmap.
-
-    // Bitmaps to hold expected values
-    BitMap expected_region_bm(_region_bm.size(), true);
-    BitMap expected_card_bm(_card_bm.size(), true);
-
-    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
-                                                 &_region_bm,
-                                                 &_card_bm,
-                                                 &expected_region_bm,
-                                                 &expected_card_bm);
-
-    g1h->workers()->run_task(&g1_par_verify_task);
-
-    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
+    GCTraceTime(Debug, gc)("Verify Live Data");
+    verify_live_data();
   }
 
-  size_t start_used_bytes = g1h->used();
   g1h->collector_state()->set_mark_in_progress(false);
 
   double count_end = os::elapsedTime();
@@ -1696,7 +1291,7 @@
   // regions.
   if (G1ScrubRemSets) {
     double rs_scrub_start = os::elapsedTime();
-    g1h->scrub_rem_set(&_region_bm, &_card_bm);
+    g1h->scrub_rem_set();
     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
   }
 
@@ -2160,7 +1755,7 @@
       oop obj = static_cast<oop>(entry);
       assert(obj->is_oop(true /* ignore mark word */),
              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
-      _task->make_reference_grey(obj, hr);
+      _task->make_reference_grey(obj);
     }
   }
 
@@ -2401,168 +1996,28 @@
   }
 }
 #endif // PRODUCT
-
-// Aggregate the counting data that was constructed concurrently
-// with marking.
-class AggregateCountDataHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
-  CardTableModRefBS* _ct_bs;
-  BitMap* _cm_card_bm;
-  uint _max_worker_id;
-
- public:
-  AggregateCountDataHRClosure(G1CollectedHeap* g1h,
-                              BitMap* cm_card_bm,
-                              uint max_worker_id) :
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
-    _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* start = hr->bottom();
-    HeapWord* limit = hr->next_top_at_mark_start();
-    HeapWord* end = hr->end();
-
-    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
-           "Preconditions not met - "
-           "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
-           "top: " PTR_FORMAT ", end: " PTR_FORMAT,
-           p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
-
-    assert(hr->next_marked_bytes() == 0, "Precondition");
-
-    if (start == limit) {
-      // NTAMS of this region has not been set so nothing to do.
-      return false;
-    }
-
-    // 'start' should be in the heap.
-    assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
-    // 'end' *may* be just beyond the end of the heap (if hr is the last region)
-    assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
-
-    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
-    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
-    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
-
-    // If ntams is not card aligned then we bump card bitmap index
-    // for limit so that we get the all the cards spanned by
-    // the object ending at ntams.
-    // Note: if this is the last region in the heap then ntams
-    // could be actually just beyond the end of the the heap;
-    // limit_idx will then  correspond to a (non-existent) card
-    // that is also outside the heap.
-    if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
-      limit_idx += 1;
-    }
-
-    assert(limit_idx <= end_idx, "or else use atomics");
-
-    // Aggregate the "stripe" in the count data associated with hr.
-    uint hrm_index = hr->hrm_index();
-    size_t marked_bytes = 0;
-
-    for (uint i = 0; i < _max_worker_id; i += 1) {
-      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
-      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
-
-      // Fetch the marked_bytes in this region for task i and
-      // add it to the running total for this region.
-      marked_bytes += marked_bytes_array[hrm_index];
-
-      // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
-      // into the global card bitmap.
-      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
-
-      while (scan_idx < limit_idx) {
-        assert(task_card_bm->at(scan_idx) == true, "should be");
-        _cm_card_bm->set_bit(scan_idx);
-        assert(_cm_card_bm->at(scan_idx) == true, "should be");
-
-        // BitMap::get_next_one_offset() can handle the case when
-        // its left_offset parameter is greater than its right_offset
-        // parameter. It does, however, have an early exit if
-        // left_offset == right_offset. So let's limit the value
-        // passed in for left offset here.
-        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
-        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
-      }
-    }
-
-    // Update the marked bytes for this region.
-    hr->add_to_marked_bytes(marked_bytes);
-
-    // Next heap region
-    return false;
-  }
-};
-
-class G1AggregateCountDataTask: public AbstractGangTask {
-protected:
-  G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
-  BitMap* _cm_card_bm;
-  uint _max_worker_id;
-  uint _active_workers;
-  HeapRegionClaimer _hrclaimer;
-
-public:
-  G1AggregateCountDataTask(G1CollectedHeap* g1h,
-                           G1ConcurrentMark* cm,
-                           BitMap* cm_card_bm,
-                           uint max_worker_id,
-                           uint n_workers) :
-      AbstractGangTask("Count Aggregation"),
-      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
-      _max_worker_id(max_worker_id),
-      _active_workers(n_workers),
-      _hrclaimer(_active_workers) {
-  }
-
-  void work(uint worker_id) {
-    AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
-
-    _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
-  }
-};
-
-
-void G1ConcurrentMark::aggregate_count_data() {
-  uint n_workers = _g1h->workers()->active_workers();
-
-  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
-                                           _max_worker_id, n_workers);
-
-  _g1h->workers()->run_task(&g1_par_agg_task);
+void G1ConcurrentMark::create_live_data() {
+  _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
+}
+
+void G1ConcurrentMark::finalize_live_data() {
+  _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
+}
+
+void G1ConcurrentMark::verify_live_data() {
+  _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
 }
 
-// Clear the per-worker arrays used to store the per-region counting data
-void G1ConcurrentMark::clear_all_count_data() {
-  // Clear the global card bitmap - it will be filled during
-  // liveness count aggregation (during remark) and the
-  // final counting task.
-  _card_bm.clear();
-
-  // Clear the global region bitmap - it will be filled as part
-  // of the final counting task.
-  _region_bm.clear();
-
-  uint max_regions = _g1h->max_regions();
-  assert(_max_worker_id > 0, "uninitialized");
-
-  for (uint i = 0; i < _max_worker_id; i += 1) {
-    BitMap* task_card_bm = count_card_bitmap_for(i);
-    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
-
-    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
-    assert(marked_bytes_array != NULL, "uninitialized");
-
-    memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
-    task_card_bm->clear();
-  }
+void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
+  _g1h->g1_rem_set()->clear_card_live_data(workers);
 }
 
+#ifdef ASSERT
+void G1ConcurrentMark::verify_live_data_clear() {
+  _g1h->g1_rem_set()->verify_card_live_data_is_clear();
+}
+#endif
+
 void G1ConcurrentMark::print_stats() {
   if (!log_is_enabled(Debug, gc, stats)) {
     return;
@@ -2574,7 +2029,6 @@
   }
 }
 
-// abandon current marking iteration due to a Full GC
 void G1ConcurrentMark::abort() {
   if (!cmThread()->during_cycle() || _has_aborted) {
     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
@@ -2583,14 +2037,22 @@
 
   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
   // concurrent bitmap clearing.
-  clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
-
+  {
+    GCTraceTime(Debug, gc)("Clear Next Bitmap");
+    clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
+  }
   // Note we cannot clear the previous marking bitmap here
   // since VerifyDuringGC verifies the objects marked during
   // a full GC against the previous bitmap.
 
-  // Clear the liveness counting data
-  clear_all_count_data();
+  {
+    GCTraceTime(Debug, gc)("Clear Live Data");
+    clear_live_data(_g1h->workers());
+  }
+  DEBUG_ONLY({
+    GCTraceTime(Debug, gc)("Verify Live Data Clear");
+    verify_live_data_clear();
+  })
   // Empty mark stack
   reset_marking_state();
   for (uint i = 0; i < _max_worker_id; ++i) {
@@ -2634,7 +2096,7 @@
 
   }
   print_ms_time_info("  ", "cleanups", _cleanup_times);
-  log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
+  log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
   if (G1ScrubRemSets) {
     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
@@ -2657,16 +2119,6 @@
   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
 }
 
-// We take a break if someone is trying to stop the world.
-bool G1ConcurrentMark::do_yield_check(uint worker_id) {
-  if (SuspendibleThreadSet::should_yield()) {
-    SuspendibleThreadSet::yield();
-    return true;
-  } else {
-    return false;
-  }
-}
-
 // Closure for iteration over bitmaps
 class G1CMBitMapClosure : public BitMapClosure {
 private:
@@ -3473,8 +2925,6 @@
 
 G1CMTask::G1CMTask(uint worker_id,
                    G1ConcurrentMark* cm,
-                   size_t* marked_bytes,
-                   BitMap* card_bm,
                    G1CMTaskQueue* task_queue,
                    G1CMTaskQueueSet* task_queues)
   : _g1h(G1CollectedHeap::heap()),
@@ -3483,9 +2933,7 @@
     _nextMarkBitMap(NULL), _hash_seed(17),
     _task_queue(task_queue),
     _task_queues(task_queues),
-    _cm_oop_closure(NULL),
-    _marked_bytes_array(marked_bytes),
-    _card_bm(card_bm) {
+    _cm_oop_closure(NULL) {
   guarantee(task_queue != NULL, "invariant");
   guarantee(task_queues != NULL, "invariant");
 
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -266,7 +266,7 @@
 class G1ConcurrentMark: public CHeapObj<mtGC> {
   friend class ConcurrentMarkThread;
   friend class G1ParNoteEndTask;
-  friend class CalcLiveObjectsClosure;
+  friend class G1VerifyLiveDataClosure;
   friend class G1CMRefProcTaskProxy;
   friend class G1CMRefProcTaskExecutor;
   friend class G1CMKeepAliveAndDrainClosure;
@@ -298,9 +298,6 @@
   G1CMBitMapRO*           _prevMarkBitMap; // Completed mark bitmap
   G1CMBitMap*             _nextMarkBitMap; // Under-construction mark bitmap
 
-  BitMap                  _region_bm;
-  BitMap                  _card_bm;
-
   // Heap bounds
   HeapWord*               _heap_start;
   HeapWord*               _heap_end;
@@ -461,23 +458,6 @@
   void enter_first_sync_barrier(uint worker_id);
   void enter_second_sync_barrier(uint worker_id);
 
-  // Live Data Counting data structures...
-  // These data structures are initialized at the start of
-  // marking. They are written to while marking is active.
-  // They are aggregated during remark; the aggregated values
-  // are then used to populate the _region_bm, _card_bm, and
-  // the total live bytes, which are then subsequently updated
-  // during cleanup.
-
-  // An array of bitmaps (one bit map per task). Each bitmap
-  // is used to record the cards spanned by the live objects
-  // marked by that task/worker.
-  BitMap*  _count_card_bitmaps;
-
-  // Used to record the number of marked live bytes
-  // (for each region, by worker thread).
-  size_t** _count_marked_bytes;
-
   // Card index of the bottom of the G1 heap. Used for biasing indices into
   // the card bitmaps.
   intptr_t _heap_bottom_card_num;
@@ -563,18 +543,10 @@
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed. It is MT-safe. word_size is the size of the object in
-  // words. It is passed explicitly as sometimes we cannot calculate
-  // it from the given object because it might be in an inconsistent
-  // state (e.g., in to-space and being copied). So the caller is
-  // responsible for dealing with this issue (e.g., get the size from
-  // the from-space image when the to-space image might be
-  // inconsistent) and always passing the size. hr is the region that
+  // grayed. It is MT-safe. hr is the region that
   // contains the object and it's passed optionally from callers who
   // might already have it (no point in recalculating it).
   inline void grayRoot(oop obj,
-                       size_t word_size,
-                       uint worker_id,
                        HeapRegion* hr = NULL);
 
   // Prepare internal data structures for the next mark cycle. This includes clearing
@@ -603,7 +575,7 @@
   void scan_root_regions();
 
   // Scan a single root region and mark everything reachable from it.
-  void scanRootRegion(HeapRegion* hr, uint worker_id);
+  void scanRootRegion(HeapRegion* hr);
 
   // Do concurrent phase of marking, to a tentative transitive closure.
   void mark_from_roots();
@@ -639,9 +611,9 @@
 
   inline bool isPrevMarked(oop p) const;
 
-  inline bool do_yield_check(uint worker_i = 0);
+  inline bool do_yield_check();
 
-  // Called to abort the marking cycle after a Full GC takes place.
+  // Abandon current marking iteration due to a Full GC.
   void abort();
 
   bool has_aborted()      { return _has_aborted; }
@@ -652,75 +624,8 @@
 
   void print_on_error(outputStream* st) const;
 
-  // Liveness counting
-
-  // Utility routine to set an exclusive range of cards on the given
-  // card liveness bitmap
-  inline void set_card_bitmap_range(BitMap* card_bm,
-                                    BitMap::idx_t start_idx,
-                                    BitMap::idx_t end_idx,
-                                    bool is_par);
-
-  // Returns the card number of the bottom of the G1 heap.
-  // Used in biasing indices into accounting card bitmaps.
-  intptr_t heap_bottom_card_num() const {
-    return _heap_bottom_card_num;
-  }
-
-  // Returns the card bitmap for a given task or worker id.
-  BitMap* count_card_bitmap_for(uint worker_id) {
-    assert(worker_id < _max_worker_id, "oob");
-    assert(_count_card_bitmaps != NULL, "uninitialized");
-    BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
-    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
-    return task_card_bm;
-  }
-
-  // Returns the array containing the marked bytes for each region,
-  // for the given worker or task id.
-  size_t* count_marked_bytes_array_for(uint worker_id) {
-    assert(worker_id < _max_worker_id, "oob");
-    assert(_count_marked_bytes != NULL, "uninitialized");
-    size_t* marked_bytes_array = _count_marked_bytes[worker_id];
-    assert(marked_bytes_array != NULL, "uninitialized");
-    return marked_bytes_array;
-  }
-
-  // Returns the index in the liveness accounting card table bitmap
-  // for the given address
-  inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
-
-  // Counts the size of the given memory region in the the given
-  // marked_bytes array slot for the given HeapRegion.
-  // Sets the bits in the given card bitmap that are associated with the
-  // cards that are spanned by the memory region.
-  inline void count_region(MemRegion mr,
-                           HeapRegion* hr,
-                           size_t* marked_bytes_array,
-                           BitMap* task_card_bm);
-
-  // Counts the given object in the given task/worker counting
-  // data structures.
-  inline void count_object(oop obj,
-                           HeapRegion* hr,
-                           size_t* marked_bytes_array,
-                           BitMap* task_card_bm,
-                           size_t word_size);
-
-  // Attempts to mark the given object and, if successful, counts
-  // the object in the given task/worker counting structures.
-  inline bool par_mark_and_count(oop obj,
-                                 HeapRegion* hr,
-                                 size_t* marked_bytes_array,
-                                 BitMap* task_card_bm);
-
-  // Attempts to mark the given object and, if successful, counts
-  // the object in the task/worker counting structures for the
-  // given worker id.
-  inline bool par_mark_and_count(oop obj,
-                                 size_t word_size,
-                                 HeapRegion* hr,
-                                 uint worker_id);
+  // Attempts to mark the given object on the next mark bitmap.
+  inline bool par_mark(oop obj);
 
   // Returns true if initialization was successfully completed.
   bool completed_initialization() const {
@@ -730,19 +635,22 @@
   ConcurrentGCTimer* gc_timer_cm() const { return _gc_timer_cm; }
   G1OldTracer* gc_tracer_cm() const { return _gc_tracer_cm; }
 
-protected:
-  // Clear all the per-task bitmaps and arrays used to store the
-  // counting data.
-  void clear_all_count_data();
+private:
+  // Clear (Reset) all liveness count data.
+  void clear_live_data(WorkGang* workers);
 
-  // Aggregates the counting data for each worker/task
-  // that was constructed while marking. Also sets
-  // the amount of marked bytes for each region and
-  // the top at concurrent mark count.
-  void aggregate_count_data();
+#ifdef ASSERT
+  // Verify all of the above data structures that they are in initial state.
+  void verify_live_data_clear();
+#endif
 
-  // Verification routine
-  void verify_count_data();
+  // Aggregates the per-card liveness data based on the current marking. Also sets
+  // the amount of marked bytes for each region.
+  void create_live_data();
+
+  void finalize_live_data();
+
+  void verify_live_data();
 };
 
 // A class representing a marking task.
@@ -844,12 +752,6 @@
 
   TruncatedSeq                _marking_step_diffs_ms;
 
-  // Counting data structures. Embedding the task's marked_bytes_array
-  // and card bitmap into the actual task saves having to go through
-  // the ConcurrentMark object.
-  size_t*                     _marked_bytes_array;
-  BitMap*                     _card_bm;
-
   // it updates the local fields after this task has claimed
   // a new region to scan
   void setup_for_region(HeapRegion* hr);
@@ -936,9 +838,8 @@
 
   // Grey the object by marking it.  If not already marked, push it on
   // the local queue if below the finger.
-  // Precondition: obj is in region.
-  // Precondition: obj is below region's NTAMS.
-  inline void make_reference_grey(oop obj, HeapRegion* region);
+  // obj is below its region's NTAMS.
+  inline void make_reference_grey(oop obj);
 
   // Grey the object (by calling make_grey_reference) if required,
   // e.g. obj is below its containing region's NTAMS.
@@ -976,8 +877,6 @@
 
   G1CMTask(uint worker_id,
            G1ConcurrentMark *cm,
-           size_t* marked_bytes,
-           BitMap* card_bm,
            G1CMTaskQueue* task_queue,
            G1CMTaskQueueSet* task_queues);
 
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -27,140 +27,11 @@
 
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1ConcurrentMark.hpp"
+#include "gc/g1/suspendibleThreadSet.hpp"
 #include "gc/shared/taskqueue.inline.hpp"
 
-// Utility routine to set an exclusive range of cards on the given
-// card liveness bitmap
-inline void G1ConcurrentMark::set_card_bitmap_range(BitMap* card_bm,
-                                                    BitMap::idx_t start_idx,
-                                                    BitMap::idx_t end_idx,
-                                                    bool is_par) {
-
-  // Set the exclusive bit range [start_idx, end_idx).
-  assert((end_idx - start_idx) > 0, "at least one card");
-  assert(end_idx <= card_bm->size(), "sanity");
-
-  // Silently clip the end index
-  end_idx = MIN2(end_idx, card_bm->size());
-
-  // For small ranges use a simple loop; otherwise use set_range or
-  // use par_at_put_range (if parallel). The range is made up of the
-  // cards that are spanned by an object/mem region so 8 cards will
-  // allow up to object sizes up to 4K to be handled using the loop.
-  if ((end_idx - start_idx) <= 8) {
-    for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
-      if (is_par) {
-        card_bm->par_set_bit(i);
-      } else {
-        card_bm->set_bit(i);
-      }
-    }
-  } else {
-    // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive.
-    if (is_par) {
-      card_bm->par_at_put_range(start_idx, end_idx, true);
-    } else {
-      card_bm->set_range(start_idx, end_idx);
-    }
-  }
-}
-
-// Returns the index in the liveness accounting card bitmap
-// for the given address
-inline BitMap::idx_t G1ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
-  // Below, the term "card num" means the result of shifting an address
-  // by the card shift -- address 0 corresponds to card number 0.  One
-  // must subtract the card num of the bottom of the heap to obtain a
-  // card table index.
-  intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
-  return card_num - heap_bottom_card_num();
-}
-
-// Counts the given memory region in the given task/worker
-// counting data structures.
-inline void G1ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
-                                           size_t* marked_bytes_array,
-                                           BitMap* task_card_bm) {
-  G1CollectedHeap* g1h = _g1h;
-  CardTableModRefBS* ct_bs = g1h->g1_barrier_set();
-
-  HeapWord* start = mr.start();
-  HeapWord* end = mr.end();
-  size_t region_size_bytes = mr.byte_size();
-  uint index = hr->hrm_index();
-
-  assert(hr == g1h->heap_region_containing(start), "sanity");
-  assert(marked_bytes_array != NULL, "pre-condition");
-  assert(task_card_bm != NULL, "pre-condition");
-
-  // Add to the task local marked bytes for this region.
-  marked_bytes_array[index] += region_size_bytes;
-
-  BitMap::idx_t start_idx = card_bitmap_index_for(start);
-  BitMap::idx_t end_idx = card_bitmap_index_for(end);
-
-  // Note: if we're looking at the last region in heap - end
-  // could be actually just beyond the end of the heap; end_idx
-  // will then correspond to a (non-existent) card that is also
-  // just beyond the heap.
-  if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) {
-    // end of region is not card aligned - increment to cover
-    // all the cards spanned by the region.
-    end_idx += 1;
-  }
-  // The card bitmap is task/worker specific => no need to use
-  // the 'par' BitMap routines.
-  // Set bits in the exclusive bit range [start_idx, end_idx).
-  set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */);
-}
-
-// Counts the given object in the given task/worker counting data structures.
-inline void G1ConcurrentMark::count_object(oop obj,
-                                           HeapRegion* hr,
-                                           size_t* marked_bytes_array,
-                                           BitMap* task_card_bm,
-                                           size_t word_size) {
-  assert(!hr->is_continues_humongous(), "Cannot enter count_object with continues humongous");
-  if (!hr->is_starts_humongous()) {
-    MemRegion mr((HeapWord*)obj, word_size);
-    count_region(mr, hr, marked_bytes_array, task_card_bm);
-  } else {
-    do {
-      MemRegion mr(hr->bottom(), hr->top());
-      count_region(mr, hr, marked_bytes_array, task_card_bm);
-      hr = _g1h->next_region_in_humongous(hr);
-    } while (hr != NULL);
-  }
-}
-
-// Attempts to mark the given object and, if successful, counts
-// the object in the given task/worker counting structures.
-inline bool G1ConcurrentMark::par_mark_and_count(oop obj,
-                                                 HeapRegion* hr,
-                                                 size_t* marked_bytes_array,
-                                                 BitMap* task_card_bm) {
-  if (_nextMarkBitMap->parMark((HeapWord*)obj)) {
-    // Update the task specific count data for the object.
-    count_object(obj, hr, marked_bytes_array, task_card_bm, obj->size());
-    return true;
-  }
-  return false;
-}
-
-// Attempts to mark the given object and, if successful, counts
-// the object in the task/worker counting structures for the
-// given worker id.
-inline bool G1ConcurrentMark::par_mark_and_count(oop obj,
-                                                 size_t word_size,
-                                                 HeapRegion* hr,
-                                                 uint worker_id) {
-  if (_nextMarkBitMap->parMark((HeapWord*)obj)) {
-    size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
-    BitMap* task_card_bm = count_card_bitmap_for(worker_id);
-    count_object(obj, hr, marked_bytes_array, task_card_bm, word_size);
-    return true;
-  }
-  return false;
+inline bool G1ConcurrentMark::par_mark(oop obj) {
+  return _nextMarkBitMap->parMark((HeapWord*)obj);
 }
 
 inline bool G1CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
@@ -294,10 +165,8 @@
   check_limits();
 }
 
-
-
-inline void G1CMTask::make_reference_grey(oop obj, HeapRegion* hr) {
-  if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
+inline void G1CMTask::make_reference_grey(oop obj) {
+  if (_cm->par_mark(obj)) {
     // No OrderAccess:store_load() is needed. It is implicit in the
     // CAS done in G1CMBitMap::parMark() call in the routine above.
     HeapWord* global_finger = _cm->finger();
@@ -348,7 +217,7 @@
       // anything with it).
       HeapRegion* hr = _g1h->heap_region_containing(obj);
       if (!hr->obj_allocated_since_next_marking(obj)) {
-        make_reference_grey(obj, hr);
+        make_reference_grey(obj);
       }
     }
   }
@@ -370,8 +239,7 @@
   return _prevMarkBitMap->isMarked(addr);
 }
 
-inline void G1ConcurrentMark::grayRoot(oop obj, size_t word_size,
-                                       uint worker_id, HeapRegion* hr) {
+inline void G1ConcurrentMark::grayRoot(oop obj, HeapRegion* hr) {
   assert(obj != NULL, "pre-condition");
   HeapWord* addr = (HeapWord*) obj;
   if (hr == NULL) {
@@ -386,9 +254,18 @@
 
   if (addr < hr->next_top_at_mark_start()) {
     if (!_nextMarkBitMap->isMarked(addr)) {
-      par_mark_and_count(obj, word_size, hr, worker_id);
+      par_mark(obj);
     }
   }
 }
 
+inline bool G1ConcurrentMark::do_yield_check() {
+  if (SuspendibleThreadSet::should_yield()) {
+    SuspendibleThreadSet::yield();
+    return true;
+  } else {
+    return false;
+  }
+}
+
 #endif // SHARE_VM_GC_G1_G1CONCURRENTMARK_INLINE_HPP
--- a/hotspot/src/share/vm/gc/g1/g1EvacFailure.cpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1EvacFailure.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -95,8 +95,6 @@
   void do_object(oop obj) {
     HeapWord* obj_addr = (HeapWord*) obj;
     assert(_hr->is_in(obj_addr), "sanity");
-    size_t obj_size = obj->size();
-    HeapWord* obj_end = obj_addr + obj_size;
 
     if (obj->is_forwarded() && obj->forwardee() == obj) {
       // The object failed to move.
@@ -119,8 +117,10 @@
         // explicitly and all objects in the CSet are considered
         // (implicitly) live. So, we won't mark them explicitly and
         // we'll leave them over NTAMS.
-        _cm->grayRoot(obj, obj_size, _worker_id, _hr);
+        _cm->grayRoot(obj, _hr);
       }
+      size_t obj_size = obj->size();
+
       _marked_bytes += (obj_size * HeapWordSize);
       obj->set_mark(markOopDesc::prototype());
 
@@ -138,6 +138,7 @@
       // the collection set. So, we'll recreate such entries now.
       obj->oop_iterate(_update_rset_cl);
 
+      HeapWord* obj_end = obj_addr + obj_size;
       _last_forwarded_object_end = obj_end;
       _hr->cross_threshold(obj_addr, obj_end);
     }
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -186,11 +186,9 @@
 private:
   G1CollectedHeap* _g1h;
   G1ConcurrentMark* _cm;
-  uint _worker_id;
 public:
-  G1RootRegionScanClosure(G1CollectedHeap* g1h, G1ConcurrentMark* cm,
-                          uint worker_id) :
-    _g1h(g1h), _cm(cm), _worker_id(worker_id) { }
+  G1RootRegionScanClosure(G1CollectedHeap* g1h, G1ConcurrentMark* cm) :
+    _g1h(g1h), _cm(cm) { }
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(      oop* p) { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -131,7 +131,7 @@
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
     HeapRegion* hr = _g1h->heap_region_containing((HeapWord*) obj);
-    _cm->grayRoot(obj, obj->size(), _worker_id, hr);
+    _cm->grayRoot(obj, hr);
   }
 }
 
@@ -246,7 +246,7 @@
   assert(!_g1->heap_region_containing(obj)->in_collection_set(), "should not mark objects in the CSet");
 
   // We know that the object is not moving so it's safe to read its size.
-  _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
+  _cm->grayRoot(obj);
 }
 
 void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
@@ -261,7 +261,7 @@
   // worker so we cannot trust that its to-space image is
   // well-formed. So we have to read its size from its from-space
   // image which we know should not be changing.
-  _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id);
+  _cm->grayRoot(to_obj);
 }
 
 template <G1Barrier barrier, G1Mark do_mark_object, bool use_ext>
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -38,6 +38,7 @@
 #include "gc/g1/heapRegion.inline.hpp"
 #include "gc/g1/heapRegionManager.inline.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
+#include "gc/shared/gcTraceTime.inline.hpp"
 #include "memory/iterator.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/oop.inline.hpp"
@@ -84,8 +85,16 @@
   return MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads);
 }
 
-void G1RemSet::initialize(uint max_regions) {
+void G1RemSet::initialize(size_t capacity, uint max_regions) {
   G1FromCardCache::initialize(num_par_rem_sets(), max_regions);
+  {
+    GCTraceTime(Debug, gc, marking)("Initialize Card Live Data");
+    _card_live_data.initialize(capacity, max_regions);
+  }
+  if (G1PretouchAuxiliaryMemory) {
+    GCTraceTime(Debug, gc, marking)("Pre-Touch Card Live Data");
+    _card_live_data.pretouch();
+  }
 }
 
 ScanRSClosure::ScanRSClosure(G1ParPushHeapRSClosure* oc,
@@ -312,27 +321,24 @@
   _into_cset_dirty_card_queue_set.clear_n_completed_buffers();
 }
 
-class ScrubRSClosure: public HeapRegionClosure {
+class G1ScrubRSClosure: public HeapRegionClosure {
   G1CollectedHeap* _g1h;
-  BitMap* _region_bm;
-  BitMap* _card_bm;
-  CardTableModRefBS* _ctbs;
+  G1CardLiveData* _live_data;
 public:
-  ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
+  G1ScrubRSClosure(G1CardLiveData* live_data) :
     _g1h(G1CollectedHeap::heap()),
-    _region_bm(region_bm), _card_bm(card_bm),
-    _ctbs(_g1h->g1_barrier_set()) {}
+    _live_data(live_data) { }
 
   bool doHeapRegion(HeapRegion* r) {
     if (!r->is_continues_humongous()) {
-      r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
+      r->rem_set()->scrub(_live_data);
     }
     return false;
   }
 };
 
-void G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm, uint worker_num, HeapRegionClaimer *hrclaimer) {
-  ScrubRSClosure scrub_cl(region_bm, card_bm);
+void G1RemSet::scrub(uint worker_num, HeapRegionClaimer *hrclaimer) {
+  G1ScrubRSClosure scrub_cl(&_card_live_data);
   _g1->heap_region_par_iterate(&scrub_cl, worker_num, hrclaimer);
 }
 
@@ -580,3 +586,25 @@
     assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   }
 }
+
+void G1RemSet::create_card_live_data(WorkGang* workers, G1CMBitMap* mark_bitmap) {
+  _card_live_data.create(workers, mark_bitmap);
+}
+
+void G1RemSet::finalize_card_live_data(WorkGang* workers, G1CMBitMap* mark_bitmap) {
+  _card_live_data.finalize(workers, mark_bitmap);
+}
+
+void G1RemSet::verify_card_live_data(WorkGang* workers, G1CMBitMap* bitmap) {
+  _card_live_data.verify(workers, bitmap);
+}
+
+void G1RemSet::clear_card_live_data(WorkGang* workers) {
+  _card_live_data.clear(workers);
+}
+
+#ifdef ASSERT
+void G1RemSet::verify_card_live_data_is_clear() {
+  _card_live_data.verify_is_clear();
+}
+#endif
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_G1_G1REMSET_HPP
 
 #include "gc/g1/dirtyCardQueue.hpp"
+#include "gc/g1/g1CardLiveData.hpp"
 #include "gc/g1/g1RemSetSummary.hpp"
 #include "gc/g1/heapRegion.hpp"
 #include "memory/allocation.hpp"
@@ -48,9 +49,10 @@
 // A G1RemSet in which each heap region has a rem set that records the
 // external heap references into it.  Uses a mod ref bs to track updates,
 // so that they can be used to update the individual region remsets.
-
 class G1RemSet: public CHeapObj<mtGC> {
 private:
+  G1CardLiveData _card_live_data;
+
   G1RemSetSummary _prev_period_summary;
 
   // A DirtyCardQueueSet that is used to hold cards that contain
@@ -83,7 +85,7 @@
   static uint num_par_rem_sets();
 
   // Initialize data that depends on the heap size being known.
-  static void initialize(uint max_regions);
+  void initialize(size_t capacity, uint max_regions);
 
   // This is called to reset dual hash tables after the gc pause
   // is finished and the initial hash table is no longer being
@@ -140,7 +142,7 @@
   // set entries that correspond to dead heap ranges. "worker_num" is the
   // parallel thread id of the current thread, and "hrclaimer" is the
   // HeapRegionClaimer that should be used.
-  void scrub(BitMap* region_bm, BitMap* card_bm, uint worker_num, HeapRegionClaimer* hrclaimer);
+  void scrub(uint worker_num, HeapRegionClaimer* hrclaimer);
 
   // Refine the card corresponding to "card_ptr".
   // If check_for_refs_into_cset is true, a true result is returned
@@ -162,6 +164,19 @@
   size_t conc_refine_cards() const {
     return _conc_refine_cards;
   }
+
+  void create_card_live_data(WorkGang* workers, G1CMBitMap* mark_bitmap);
+  void finalize_card_live_data(WorkGang* workers, G1CMBitMap* mark_bitmap);
+
+  // Verify that the liveness count data created concurrently matches one created
+  // during this safepoint.
+  void verify_card_live_data(WorkGang* workers, G1CMBitMap* actual_bitmap);
+
+  void clear_card_live_data(WorkGang* workers);
+
+#ifdef ASSERT
+  void verify_card_live_data_is_clear();
+#endif
 };
 
 class ScanRSClosure : public HeapRegionClosure {
--- a/hotspot/src/share/vm/gc/g1/g1_globals.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1_globals.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -260,6 +260,9 @@
           "The target number of mixed GCs after a marking cycle.")          \
           range(0, max_uintx)                                               \
                                                                             \
+  experimental(bool, G1PretouchAuxiliaryMemory, false,                      \
+          "Pre-touch large auxiliary data structures used by the GC.")      \
+                                                                            \
   experimental(bool, G1EagerReclaimHumongousObjects, true,                  \
           "Try to reclaim dead large objects at every young GC.")           \
                                                                             \
--- a/hotspot/src/share/vm/gc/g1/heapRegionRemSet.cpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegionRemSet.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -26,6 +26,7 @@
 #include "gc/g1/concurrentG1Refine.hpp"
 #include "gc/g1/g1BlockOffsetTable.inline.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1CardLiveData.inline.hpp"
 #include "gc/g1/heapRegionManager.inline.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
 #include "gc/shared/space.inline.hpp"
@@ -141,10 +142,8 @@
     add_reference_work(from, /*parallel*/ false);
   }
 
-  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
-    HeapWord* hr_bot = hr()->bottom();
-    size_t hr_first_card_index = ctbs->index_for(hr_bot);
-    bm()->set_intersection_at_offset(*card_bm, hr_first_card_index);
+  void scrub(G1CardLiveData* live_data) {
+    live_data->remove_nonlive_cards(hr()->hrm_index(), &_bm);
     recount_occupied();
   }
 
@@ -515,14 +514,12 @@
   return max;
 }
 
-void OtherRegionsTable::scrub(CardTableModRefBS* ctbs,
-                              BitMap* region_bm, BitMap* card_bm) {
+void OtherRegionsTable::scrub(G1CardLiveData* live_data) {
   // First eliminated garbage regions from the coarse map.
   log_develop_trace(gc, remset, scrub)("Scrubbing region %u:", _hr->hrm_index());
 
-  assert(_coarse_map.size() == region_bm->size(), "Precondition");
   log_develop_trace(gc, remset, scrub)("   Coarse map: before = " SIZE_FORMAT "...", _n_coarse_entries);
-  _coarse_map.set_intersection(*region_bm);
+  live_data->remove_nonlive_regions(&_coarse_map);
   _n_coarse_entries = _coarse_map.count_one_bits();
   log_develop_trace(gc, remset, scrub)("   after = " SIZE_FORMAT ".", _n_coarse_entries);
 
@@ -534,7 +531,7 @@
       PerRegionTable* nxt = cur->collision_list_next();
       // If the entire region is dead, eliminate.
       log_develop_trace(gc, remset, scrub)("     For other region %u:", cur->hr()->hrm_index());
-      if (!region_bm->at((size_t) cur->hr()->hrm_index())) {
+      if (!live_data->is_region_live(cur->hr()->hrm_index())) {
         *prev = nxt;
         cur->set_collision_list_next(NULL);
         _n_fine_entries--;
@@ -544,7 +541,7 @@
       } else {
         // Do fine-grain elimination.
         log_develop_trace(gc, remset, scrub)("          occ: before = %4d.", cur->occupied());
-        cur->scrub(ctbs, card_bm);
+        cur->scrub(live_data);
         log_develop_trace(gc, remset, scrub)("          after = %4d.", cur->occupied());
         // Did that empty the table completely?
         if (cur->occupied() == 0) {
@@ -773,9 +770,8 @@
   assert(verify_ready_for_par_iteration(), "post-condition");
 }
 
-void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs,
-                             BitMap* region_bm, BitMap* card_bm) {
-  _other_regions.scrub(ctbs, region_bm, card_bm);
+void HeapRegionRemSet::scrub(G1CardLiveData* live_data) {
+  _other_regions.scrub(live_data);
 }
 
 // Code roots support
--- a/hotspot/src/share/vm/gc/g1/heapRegionRemSet.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegionRemSet.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -35,6 +35,7 @@
 
 class G1CollectedHeap;
 class G1BlockOffsetTable;
+class G1CardLiveData;
 class HeapRegion;
 class HeapRegionRemSetIterator;
 class PerRegionTable;
@@ -143,7 +144,7 @@
   // Removes any entries shown by the given bitmaps to contain only dead
   // objects. Not thread safe.
   // Set bits in the bitmaps indicate that the given region or card is live.
-  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+  void scrub(G1CardLiveData* live_data);
 
   // Returns whether this remembered set (and all sub-sets) does not contain any entry.
   bool is_empty() const;
@@ -230,10 +231,9 @@
     _other_regions.add_reference(from, tid);
   }
 
-  // Removes any entries in the remembered set shown by the given bitmaps to
+  // Removes any entries in the remembered set shown by the given card live data to
   // contain only dead objects. Not thread safe.
-  // One bits in the bitmaps indicate that the given region or card is live.
-  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+  void scrub(G1CardLiveData* live_data);
 
   // The region is being reclaimed; clear its remset, and any mention of
   // entries for this region in other remsets.
--- a/hotspot/src/share/vm/utilities/bitMap.cpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/utilities/bitMap.cpp	Wed Apr 06 16:03:02 2016 +0200
@@ -68,6 +68,10 @@
   }
 }
 
+void BitMap::pretouch() {
+  os::pretouch_memory(word_addr(0), word_addr(size()));
+}
+
 void BitMap::set_range_within_word(idx_t beg, idx_t end) {
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
--- a/hotspot/src/share/vm/utilities/bitMap.hpp	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/src/share/vm/utilities/bitMap.hpp	Wed Apr 06 16:03:02 2016 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -135,10 +135,18 @@
   // use the same value for "in_resource_area".)
   void resize(idx_t size_in_bits, bool in_resource_area = true);
 
+  // Pretouch the entire range of memory this BitMap covers.
+  void pretouch();
+
   // Accessing
   idx_t size() const                    { return _size; }
+  idx_t size_in_bytes() const           { return size_in_words() * BytesPerWord; }
   idx_t size_in_words() const           {
-    return word_index(size() + BitsPerWord - 1);
+    return calc_size_in_words(size());
+  }
+
+  static idx_t calc_size_in_words(size_t size_in_bits) {
+    return word_index(size_in_bits + BitsPerWord - 1);
   }
 
   bool at(idx_t index) const {
--- a/hotspot/test/gc/g1/Test2GbHeap.java	Wed Apr 06 07:37:15 2016 -0400
+++ b/hotspot/test/gc/g1/Test2GbHeap.java	Wed Apr 06 16:03:02 2016 +0200
@@ -25,6 +25,9 @@
  * @test Test2GbHeap
  * @bug 8031686
  * @summary Regression test to ensure we can start G1 with 2gb heap.
+ * Skip test on 32 bit Windows: it typically does not support the many and large virtual memory reservations needed.
+ * @requires (vm.gc == "G1" | vm.gc == "null")
+ * @requires !((sun.arch.data.model == "32") & (os.family == "windows"))
  * @key gc
  * @key regression
  * @library /testlibrary
@@ -48,17 +51,6 @@
     ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(testArguments.toArray(new String[0]));
 
     OutputAnalyzer output = new OutputAnalyzer(pb.start());
-
-    // Avoid failing test for setups not supported.
-    if (output.getOutput().contains("Could not reserve enough space for 2097152KB object heap")) {
-      // Will fail on machines with too little memory (and Windows 32-bit VM), ignore such failures.
-      output.shouldHaveExitValue(1);
-    } else if (output.getOutput().contains("-XX:+UseG1GC not supported in this VM")) {
-      // G1 is not supported on embedded, ignore such failures.
-      output.shouldHaveExitValue(1);
-    } else {
-      // Normally everything should be fine.
-      output.shouldHaveExitValue(0);
-    }
+    output.shouldHaveExitValue(0);
   }
 }