8034842: Parallelize the Free CSet phase in G1
authortschatzl
Tue, 19 Jul 2016 10:31:41 +0200
changeset 39979 b17e445924da
parent 39978 d79e47b2d2d9
child 39980 dcef6760667c
8034842: Parallelize the Free CSet phase in G1 Reviewed-by: jmasa, ehelin
hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp
hotspot/src/share/vm/gc/g1/g1DefaultPolicy.cpp
hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp
hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp
hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp
hotspot/src/share/vm/gc/g1/heapRegion.cpp
hotspot/src/share/vm/gc/g1/heapRegion.hpp
hotspot/test/gc/g1/TestGCLogMessages.java
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Tue Jul 19 10:31:41 2016 +0200
@@ -4524,7 +4524,8 @@
 
 void G1CollectedHeap::free_region(HeapRegion* hr,
                                   FreeRegionList* free_list,
-                                  bool par,
+                                  bool skip_remset,
+                                  bool skip_hot_card_cache,
                                   bool locked) {
   assert(!hr->is_free(), "the region should not be free");
   assert(!hr->is_empty(), "the region should not be empty");
@@ -4539,20 +4540,20 @@
   // Clear the card counts for this region.
   // Note: we only need to do this if the region is not young
   // (since we don't refine cards in young regions).
-  if (!hr->is_young()) {
+  if (!skip_hot_card_cache && !hr->is_young()) {
     _hot_card_cache->reset_card_counts(hr);
   }
-  hr->hr_clear(par, true /* clear_space */, locked /* locked */);
+  hr->hr_clear(skip_remset, true /* clear_space */, locked /* locked */);
   free_list->add_ordered(hr);
 }
 
 void G1CollectedHeap::free_humongous_region(HeapRegion* hr,
                                             FreeRegionList* free_list,
-                                            bool par) {
+                                            bool skip_remset) {
   assert(hr->is_humongous(), "this is only for humongous regions");
   assert(free_list != NULL, "pre-condition");
   hr->clear_humongous();
-  free_region(hr, free_list, par);
+  free_region(hr, free_list, skip_remset);
 }
 
 void G1CollectedHeap::remove_from_old_sets(const uint old_regions_removed,
@@ -4600,137 +4601,280 @@
   workers()->run_task(&g1_par_scrub_rs_task);
 }
 
-class G1FreeCollectionSetClosure : public HeapRegionClosure {
+class G1FreeCollectionSetTask : public AbstractGangTask {
 private:
+
+  // Closure applied to all regions in the collection set to do work that needs to
+  // be done serially in a single thread.
+  class G1SerialFreeCollectionSetClosure : public HeapRegionClosure {
+  private:
+    EvacuationInfo* _evacuation_info;
+    const size_t* _surviving_young_words;
+
+    // Bytes used in successfully evacuated regions before the evacuation.
+    size_t _before_used_bytes;
+    // Bytes used in unsucessfully evacuated regions before the evacuation
+    size_t _after_used_bytes;
+
+    size_t _bytes_allocated_in_old_since_last_gc;
+
+    size_t _failure_used_words;
+    size_t _failure_waste_words;
+
+    FreeRegionList _local_free_list;
+  public:
+    G1SerialFreeCollectionSetClosure(EvacuationInfo* evacuation_info, const size_t* surviving_young_words) :
+      HeapRegionClosure(),
+      _evacuation_info(evacuation_info),
+      _surviving_young_words(surviving_young_words),
+      _before_used_bytes(0),
+      _after_used_bytes(0),
+      _bytes_allocated_in_old_since_last_gc(0),
+      _failure_used_words(0),
+      _failure_waste_words(0),
+      _local_free_list("Local Region List for CSet Freeing") {
+    }
+
+    virtual bool doHeapRegion(HeapRegion* r) {
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+      assert(r->in_collection_set(), "Region %u should be in collection set.", r->hrm_index());
+      g1h->clear_in_cset(r);
+
+      if (r->is_young()) {
+        assert(r->young_index_in_cset() != -1 && (uint)r->young_index_in_cset() < g1h->collection_set()->young_region_length(),
+               "Young index %d is wrong for region %u of type %s with %u young regions",
+               r->young_index_in_cset(),
+               r->hrm_index(),
+               r->get_type_str(),
+               g1h->collection_set()->young_region_length());
+        size_t words_survived = _surviving_young_words[r->young_index_in_cset()];
+        r->record_surv_words_in_group(words_survived);
+      }
+
+      if (!r->evacuation_failed()) {
+        assert(r->not_empty(), "Region %u is an empty region in the collection set.", r->hrm_index());
+        _before_used_bytes += r->used();
+        g1h->free_region(r,
+                         &_local_free_list,
+                         true, /* skip_remset */
+                         true, /* skip_hot_card_cache */
+                         true  /* locked */);
+      } else {
+        r->uninstall_surv_rate_group();
+        r->set_young_index_in_cset(-1);
+        r->set_evacuation_failed(false);
+        // When moving a young gen region to old gen, we "allocate" that whole region
+        // there. This is in addition to any already evacuated objects. Notify the
+        // policy about that.
+        // Old gen regions do not cause an additional allocation: both the objects
+        // still in the region and the ones already moved are accounted for elsewhere.
+        if (r->is_young()) {
+          _bytes_allocated_in_old_since_last_gc += HeapRegion::GrainBytes;
+        }
+        // The region is now considered to be old.
+        r->set_old();
+        // Do some allocation statistics accounting. Regions that failed evacuation
+        // are always made old, so there is no need to update anything in the young
+        // gen statistics, but we need to update old gen statistics.
+        size_t used_words = r->marked_bytes() / HeapWordSize;
+
+        _failure_used_words += used_words;
+        _failure_waste_words += HeapRegion::GrainWords - used_words;
+
+        g1h->old_set_add(r);
+        _after_used_bytes += r->used();
+      }
+      return false;
+    }
+
+    void complete_work() {
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+      _evacuation_info->set_regions_freed(_local_free_list.length());
+      _evacuation_info->increment_collectionset_used_after(_after_used_bytes);
+
+      g1h->prepend_to_freelist(&_local_free_list);
+      g1h->decrement_summary_bytes(_before_used_bytes);
+
+      G1Policy* policy = g1h->g1_policy();
+      policy->add_bytes_allocated_in_old_since_last_gc(_bytes_allocated_in_old_since_last_gc);
+
+      g1h->alloc_buffer_stats(InCSetState::Old)->add_failure_used_and_waste(_failure_used_words, _failure_waste_words);
+    }
+  };
+
+  G1CollectionSet* _collection_set;
+  G1SerialFreeCollectionSetClosure _cl;
   const size_t* _surviving_young_words;
 
-  FreeRegionList _local_free_list;
   size_t _rs_lengths;
-  // Bytes used in successfully evacuated regions before the evacuation.
-  size_t _before_used_bytes;
-  // Bytes used in unsucessfully evacuated regions before the evacuation
-  size_t _after_used_bytes;
-
-  size_t _bytes_allocated_in_old_since_last_gc;
-
-  size_t _failure_used_words;
-  size_t _failure_waste_words;
-
-  double _young_time;
-  double _non_young_time;
-public:
-  G1FreeCollectionSetClosure(const size_t* surviving_young_words) :
-    HeapRegionClosure(),
-    _surviving_young_words(surviving_young_words),
-    _local_free_list("Local Region List for CSet Freeing"),
-    _rs_lengths(0),
-    _before_used_bytes(0),
-    _after_used_bytes(0),
-    _bytes_allocated_in_old_since_last_gc(0),
-    _failure_used_words(0),
-    _failure_waste_words(0),
-    _young_time(0.0),
-    _non_young_time(0.0) {
+
+  volatile jint _serial_work_claim;
+
+  struct WorkItem {
+    uint region_idx;
+    bool is_young;
+    bool evacuation_failed;
+
+    WorkItem(HeapRegion* r) {
+      region_idx = r->hrm_index();
+      is_young = r->is_young();
+      evacuation_failed = r->evacuation_failed();
+    }
+  };
+
+  volatile size_t _parallel_work_claim;
+  size_t _num_work_items;
+  WorkItem* _work_items;
+
+  void do_serial_work() {
+    // Need to grab the lock to be allowed to modify the old region list.
+    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
+    _collection_set->iterate(&_cl);
   }
 
-  virtual bool doHeapRegion(HeapRegion* r) {
-    double start_time = os::elapsedTime();
-
-    bool is_young = r->is_young();
-
+  void do_parallel_work_for_region(uint region_idx, bool is_young, bool evacuation_failed) {
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    HeapRegion* r = g1h->region_at(region_idx);
     assert(!g1h->is_on_master_free_list(r), "sanity");
 
-    _rs_lengths += r->rem_set()->occupied_locked();
-
-    assert(r->in_collection_set(), "Region %u should be in collection set.", r->hrm_index());
-    g1h->clear_in_cset(r);
-
-    if (is_young) {
-      int index = r->young_index_in_cset();
-      assert(index != -1, "Young index in collection set must not be -1 for region %u", r->hrm_index());
-      assert((uint) index < g1h->collection_set()->young_region_length(), "invariant");
-      size_t words_survived = _surviving_young_words[index];
-      r->record_surv_words_in_group(words_survived);
-    } else {
-      assert(r->young_index_in_cset() == -1, "Young index for old region %u in collection set must be -1", r->hrm_index());
+    Atomic::add(r->rem_set()->occupied_locked(), &_rs_lengths);
+
+    if (!is_young) {
+      g1h->_hot_card_cache->reset_card_counts(r);
+    }
+
+    if (!evacuation_failed) {
+      r->rem_set()->clear_locked();
+    }
+  }
+
+  class G1PrepareFreeCollectionSetClosure : public HeapRegionClosure {
+  private:
+    size_t _cur_idx;
+    WorkItem* _work_items;
+  public:
+    G1PrepareFreeCollectionSetClosure(WorkItem* work_items) : HeapRegionClosure(), _cur_idx(0), _work_items(work_items) { }
+
+    virtual bool doHeapRegion(HeapRegion* r) {
+      _work_items[_cur_idx++] = WorkItem(r);
+      return false;
     }
-
-    if (!r->evacuation_failed()) {
-      assert(r->not_empty(), "Region %u is an empty region in the collection set.", r->hrm_index());
-      _before_used_bytes += r->used();
-      g1h->free_region(r, &_local_free_list, false /* par */, true /* locked */);
-    } else {
-      r->uninstall_surv_rate_group();
-      r->set_young_index_in_cset(-1);
-      r->set_evacuation_failed(false);
-      // When moving a young gen region to old gen, we "allocate" that whole region
-      // there. This is in addition to any already evacuated objects. Notify the
-      // policy about that.
-      // Old gen regions do not cause an additional allocation: both the objects
-      // still in the region and the ones already moved are accounted for elsewhere.
-      if (is_young) {
-        _bytes_allocated_in_old_since_last_gc += HeapRegion::GrainBytes;
+  };
+
+  void prepare_work() {
+    G1PrepareFreeCollectionSetClosure cl(_work_items);
+    _collection_set->iterate(&cl);
+  }
+
+  void complete_work() {
+    _cl.complete_work();
+
+    G1Policy* policy = G1CollectedHeap::heap()->g1_policy();
+    policy->record_max_rs_lengths(_rs_lengths);
+    policy->cset_regions_freed();
+  }
+public:
+  G1FreeCollectionSetTask(G1CollectionSet* collection_set, EvacuationInfo* evacuation_info, const size_t* surviving_young_words) :
+    AbstractGangTask("G1 Free Collection Set"),
+    _cl(evacuation_info, surviving_young_words),
+    _collection_set(collection_set),
+    _surviving_young_words(surviving_young_words),
+    _serial_work_claim(0),
+    _rs_lengths(0),
+    _parallel_work_claim(0),
+    _num_work_items(collection_set->region_length()),
+    _work_items(NEW_C_HEAP_ARRAY(WorkItem, _num_work_items, mtGC)) {
+    prepare_work();
+  }
+
+  ~G1FreeCollectionSetTask() {
+    complete_work();
+    FREE_C_HEAP_ARRAY(WorkItem, _work_items);
+  }
+
+  // Chunk size for work distribution. The chosen value has been determined experimentally
+  // to be a good tradeoff between overhead and achievable parallelism.
+  static uint chunk_size() { return 32; }
+
+  virtual void work(uint worker_id) {
+    G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times();
+
+    // Claim serial work.
+    if (_serial_work_claim == 0) {
+      jint value = Atomic::add(1, &_serial_work_claim) - 1;
+      if (value == 0) {
+        double serial_time = os::elapsedTime();
+        do_serial_work();
+        timer->record_serial_free_cset_time_ms((os::elapsedTime() - serial_time) * 1000.0);
       }
-      // The region is now considered to be old.
-      r->set_old();
-      // Do some allocation statistics accounting. Regions that failed evacuation
-      // are always made old, so there is no need to update anything in the young
-      // gen statistics, but we need to update old gen statistics.
-      size_t used_words = r->marked_bytes() / HeapWordSize;
-
-      _failure_used_words += used_words;
-      _failure_waste_words += HeapRegion::GrainWords - used_words;
-
-      g1h->old_set_add(r);
-      _after_used_bytes += r->used();
     }
 
-    if (is_young) {
-      _young_time += os::elapsedTime() - start_time;
-    } else {
-      _non_young_time += os::elapsedTime() - start_time;
+    // Start parallel work.
+    double young_time = 0.0;
+    bool has_young_time = false;
+    double non_young_time = 0.0;
+    bool has_non_young_time = false;
+
+    while (true) {
+      size_t end = Atomic::add(chunk_size(), &_parallel_work_claim);
+      size_t cur = end - chunk_size();
+
+      if (cur >= _num_work_items) {
+        break;
+      }
+
+      double start_time = os::elapsedTime();
+
+      end = MIN2(end, _num_work_items);
+
+      for (; cur < end; cur++) {
+        bool is_young = _work_items[cur].is_young;
+
+        do_parallel_work_for_region(_work_items[cur].region_idx, is_young, _work_items[cur].evacuation_failed);
+
+        double end_time = os::elapsedTime();
+        double time_taken = end_time - start_time;
+        if (is_young) {
+          young_time += time_taken;
+          has_young_time = true;
+        } else {
+          non_young_time += time_taken;
+          has_non_young_time = true;
+        }
+        start_time = end_time;
+      }
     }
-    return false;
+
+    if (has_young_time) {
+      timer->record_time_secs(G1GCPhaseTimes::YoungFreeCSet, worker_id, young_time);
+    }
+    if (has_non_young_time) {
+      timer->record_time_secs(G1GCPhaseTimes::NonYoungFreeCSet, worker_id, young_time);
+    }
   }
-
-  FreeRegionList* local_free_list() { return &_local_free_list; }
-  size_t rs_lengths() const { return _rs_lengths; }
-  size_t before_used_bytes() const { return _before_used_bytes; }
-  size_t after_used_bytes() const { return _after_used_bytes; }
-
-  size_t bytes_allocated_in_old_since_last_gc() const { return _bytes_allocated_in_old_since_last_gc; }
-
-  size_t failure_used_words() const { return _failure_used_words; }
-  size_t failure_waste_words() const { return _failure_waste_words; }
-
-  double young_time() const { return _young_time; }
-  double non_young_time() const { return _non_young_time; }
 };
 
 void G1CollectedHeap::free_collection_set(G1CollectionSet* collection_set, EvacuationInfo& evacuation_info, const size_t* surviving_young_words) {
   _eden.clear();
 
-  G1FreeCollectionSetClosure cl(surviving_young_words);
-  collection_set_iterate(&cl);
-
-  evacuation_info.set_regions_freed(cl.local_free_list()->length());
-  evacuation_info.increment_collectionset_used_after(cl.after_used_bytes());
-
-  G1Policy* policy = g1_policy();
-
-  policy->record_max_rs_lengths(cl.rs_lengths());
-  policy->cset_regions_freed();
-
-  prepend_to_freelist(cl.local_free_list());
-  decrement_summary_bytes(cl.before_used_bytes());
-
-  policy->add_bytes_allocated_in_old_since_last_gc(cl.bytes_allocated_in_old_since_last_gc());
-
-  _old_evac_stats.add_failure_used_and_waste(cl.failure_used_words(), cl.failure_waste_words());
-
-  policy->phase_times()->record_young_free_cset_time_ms(cl.young_time() * 1000.0);
-  policy->phase_times()->record_non_young_free_cset_time_ms(cl.non_young_time() * 1000.0);
+  double free_cset_start_time = os::elapsedTime();
+
+  {
+    uint const num_chunks = MAX2(_collection_set.region_length() / G1FreeCollectionSetTask::chunk_size(), 1U);
+    uint const num_workers = MIN2(workers()->active_workers(), num_chunks);
+
+    G1FreeCollectionSetTask cl(collection_set, &evacuation_info, surviving_young_words);
+
+    log_debug(gc, ergo)("Running %s using %u workers for collection set length %u",
+                        cl.name(),
+                        num_workers,
+                        _collection_set.region_length());
+    workers()->run_task(&cl, num_workers);
+  }
+  g1_policy()->phase_times()->record_total_free_cset_time_ms((os::elapsedTime() - free_cset_start_time) * 1000.0);
 
   collection_set->clear();
 }
@@ -4825,7 +4969,7 @@
       _freed_bytes += r->used();
       r->set_containing_set(NULL);
       _humongous_regions_removed++;
-      g1h->free_humongous_region(r, _free_region_list, false);
+      g1h->free_humongous_region(r, _free_region_list, false /* skip_remset */ );
       r = next;
     } while (r != NULL);
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Tue Jul 19 10:31:41 2016 +0200
@@ -118,6 +118,7 @@
 };
 
 class G1CollectedHeap : public CollectedHeap {
+  friend class G1FreeCollectionSetTask;
   friend class VM_CollectForMetadataAllocation;
   friend class VM_G1CollectForAllocation;
   friend class VM_G1CollectFull;
@@ -642,13 +643,15 @@
   // adding it to the free list that's passed as a parameter (this is
   // usually a local list which will be appended to the master free
   // list later). The used bytes of freed regions are accumulated in
-  // pre_used. If par is true, the region's RSet will not be freed
-  // up. The assumption is that this will be done later.
+  // pre_used. If skip_remset is true, the region's RSet will not be freed
+  // up. If skip_hot_card_cache is true, the region's hot card cache will not
+  // be freed up. The assumption is that this will be done later.
   // The locked parameter indicates if the caller has already taken
   // care of proper synchronization. This may allow some optimizations.
   void free_region(HeapRegion* hr,
                    FreeRegionList* free_list,
-                   bool par,
+                   bool skip_remset,
+                   bool skip_hot_card_cache = false,
                    bool locked = false);
 
   // It dirties the cards that cover the block so that the post
@@ -662,11 +665,11 @@
   // will be added to the free list that's passed as a parameter (this
   // is usually a local list which will be appended to the master free
   // list later). The used bytes of freed regions are accumulated in
-  // pre_used. If par is true, the region's RSet will not be freed
+  // pre_used. If skip_remset is true, the region's RSet will not be freed
   // up. The assumption is that this will be done later.
   void free_humongous_region(HeapRegion* hr,
                              FreeRegionList* free_list,
-                             bool par);
+                             bool skip_remset);
 
   // Facility for allocating in 'archive' regions in high heap memory and
   // recording the allocated ranges. These should all be called from the
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Tue Jul 19 10:31:41 2016 +0200
@@ -1159,10 +1159,10 @@
       hr->set_containing_set(NULL);
       if (hr->is_humongous()) {
         _humongous_regions_removed++;
-        _g1->free_humongous_region(hr, _local_cleanup_list, true);
+        _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
       } else {
         _old_regions_removed++;
-        _g1->free_region(hr, _local_cleanup_list, true);
+        _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
       }
     } else {
       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
--- a/hotspot/src/share/vm/gc/g1/g1DefaultPolicy.cpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1DefaultPolicy.cpp	Tue Jul 19 10:31:41 2016 +0200
@@ -501,13 +501,12 @@
 
 double G1DefaultPolicy::young_other_time_ms() const {
   return phase_times()->young_cset_choice_time_ms() +
-         phase_times()->young_free_cset_time_ms();
+         phase_times()->average_time_ms(G1GCPhaseTimes::YoungFreeCSet);
 }
 
 double G1DefaultPolicy::non_young_other_time_ms() const {
   return phase_times()->non_young_cset_choice_time_ms() +
-         phase_times()->non_young_free_cset_time_ms();
-
+         phase_times()->average_time_ms(G1GCPhaseTimes::NonYoungFreeCSet);
 }
 
 double G1DefaultPolicy::other_time_ms(double pause_time_ms) const {
@@ -515,7 +514,7 @@
 }
 
 double G1DefaultPolicy::constant_other_time_ms(double pause_time_ms) const {
-  return other_time_ms(pause_time_ms) - young_other_time_ms() - non_young_other_time_ms();
+  return other_time_ms(pause_time_ms) - phase_times()->total_free_cset_time_ms();
 }
 
 CollectionSetChooser* G1DefaultPolicy::cset_chooser() const {
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Tue Jul 19 10:31:41 2016 +0200
@@ -91,6 +91,9 @@
   _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards:");
   _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards);
 
+  _gc_par_phases[YoungFreeCSet] = new WorkerDataArray<double>(max_gc_threads, "Young Free Collection Set (ms):");
+  _gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray<double>(max_gc_threads, "Non-Young Free Collection Set (ms):");
+
   _gc_par_phases[PreserveCMReferents] = new WorkerDataArray<double>(max_gc_threads, "Parallel Preserve CM Refs (ms):");
 }
 
@@ -278,10 +281,11 @@
   info_line_and_account("Clear Card Table", _cur_clear_ct_time_ms);
   info_line_and_account("Expand Heap After Collection", _cur_expand_heap_time_ms);
 
-  double free_cset_time = _recorded_young_free_cset_time_ms + _recorded_non_young_free_cset_time_ms;
-  info_line_and_account("Free Collection Set", free_cset_time);
-  debug_line("Young Free Collection Set", _recorded_young_free_cset_time_ms);
-  debug_line("Non-Young Free Collection Set", _recorded_non_young_free_cset_time_ms);
+  info_line_and_account("Free Collection Set", _recorded_total_free_cset_time_ms);
+  debug_line("Free Collection Set Serial", _recorded_serial_free_cset_time_ms);
+  debug_phase(_gc_par_phases[YoungFreeCSet]);
+  debug_phase(_gc_par_phases[NonYoungFreeCSet]);
+
   info_line_and_account("Merge Per-Thread State", _recorded_merge_pss_time_ms);
 
   info_line("Other", _gc_pause_time_ms - accounted_time_ms);
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Tue Jul 19 10:31:41 2016 +0200
@@ -67,6 +67,8 @@
     StringDedupTableFixup,
     RedirtyCards,
     PreserveCMReferents,
+    YoungFreeCSet,
+    NonYoungFreeCSet,
     GCParPhasesSentinel
   };
 
@@ -110,8 +112,9 @@
 
   double _recorded_merge_pss_time_ms;
 
-  double _recorded_young_free_cset_time_ms;
-  double _recorded_non_young_free_cset_time_ms;
+  double _recorded_total_free_cset_time_ms;
+
+  double _recorded_serial_free_cset_time_ms;
 
   double _cur_fast_reclaim_humongous_time_ms;
   double _cur_fast_reclaim_humongous_register_time_ms;
@@ -199,12 +202,12 @@
     _root_region_scan_wait_time_ms = time_ms;
   }
 
-  void record_young_free_cset_time_ms(double time_ms) {
-    _recorded_young_free_cset_time_ms = time_ms;
+  void record_total_free_cset_time_ms(double time_ms) {
+    _recorded_total_free_cset_time_ms = time_ms;
   }
 
-  void record_non_young_free_cset_time_ms(double time_ms) {
-    _recorded_non_young_free_cset_time_ms = time_ms;
+  void record_serial_free_cset_time_ms(double time_ms) {
+    _recorded_serial_free_cset_time_ms = time_ms;
   }
 
   void record_fast_reclaim_humongous_stats(double time_ms, size_t total, size_t candidates) {
@@ -278,18 +281,14 @@
     return _recorded_young_cset_choice_time_ms;
   }
 
-  double young_free_cset_time_ms() {
-    return _recorded_young_free_cset_time_ms;
+  double total_free_cset_time_ms() {
+    return _recorded_total_free_cset_time_ms;
   }
 
   double non_young_cset_choice_time_ms() {
     return _recorded_non_young_cset_choice_time_ms;
   }
 
-  double non_young_free_cset_time_ms() {
-    return _recorded_non_young_free_cset_time_ms;
-  }
-
   double fast_reclaim_humongous_time_ms() {
     return _cur_fast_reclaim_humongous_time_ms;
   }
--- a/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp	Tue Jul 19 10:31:41 2016 +0200
@@ -340,7 +340,7 @@
   hr->set_containing_set(NULL);
   _humongous_regions_removed++;
 
-  _g1h->free_humongous_region(hr, &dummy_free_list, false /* par */);
+  _g1h->free_humongous_region(hr, &dummy_free_list, false /* skip_remset */);
   prepare_for_compaction(hr, end);
   dummy_free_list.remove_all();
 }
--- a/hotspot/src/share/vm/gc/g1/heapRegion.cpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.cpp	Tue Jul 19 10:31:41 2016 +0200
@@ -167,7 +167,7 @@
   init_top_at_mark_start();
 }
 
-void HeapRegion::hr_clear(bool par, bool clear_space, bool locked) {
+void HeapRegion::hr_clear(bool keep_remset, bool clear_space, bool locked) {
   assert(_humongous_start_region == NULL,
          "we should have already filtered out humongous regions");
   assert(!in_collection_set(),
@@ -179,15 +179,14 @@
   set_free();
   reset_pre_dummy_top();
 
-  if (!par) {
-    // If this is parallel, this will be done later.
-    HeapRegionRemSet* hrrs = rem_set();
+  if (!keep_remset) {
     if (locked) {
-      hrrs->clear_locked();
+      rem_set()->clear_locked();
     } else {
-      hrrs->clear();
+      rem_set()->clear();
     }
   }
+
   zero_marked_bytes();
 
   init_top_at_mark_start();
--- a/hotspot/src/share/vm/gc/g1/heapRegion.hpp	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.hpp	Tue Jul 19 10:31:41 2016 +0200
@@ -512,8 +512,11 @@
 #endif // ASSERT
 
 
-  // Reset HR stuff to default values.
-  void hr_clear(bool par, bool clear_space, bool locked = false);
+  // Reset the HeapRegion to default values.
+  // If skip_remset is true, do not clear the remembered set.
+  void hr_clear(bool skip_remset, bool clear_space, bool locked = false);
+  // Clear the parts skipped by skip_remset in hr_clear() in the HeapRegion during
+  // a concurrent phase.
   void par_clear();
 
   // Get the start of the unmarked area in this region.
--- a/hotspot/test/gc/g1/TestGCLogMessages.java	Mon Jul 18 14:20:30 2016 -0700
+++ b/hotspot/test/gc/g1/TestGCLogMessages.java	Tue Jul 19 10:31:41 2016 +0200
@@ -95,6 +95,8 @@
         new LogMessageWithLevel("String Dedup Fixup", Level.INFO),
         new LogMessageWithLevel("Expand Heap After Collection", Level.INFO),
         // Free CSet
+        new LogMessageWithLevel("Free Collection Set", Level.INFO),
+        new LogMessageWithLevel("Free Collection Set Serial", Level.DEBUG),
         new LogMessageWithLevel("Young Free Collection Set", Level.DEBUG),
         new LogMessageWithLevel("Non-Young Free Collection Set", Level.DEBUG),
         // Humongous Eager Reclaim