Merge
authorbobv
Wed, 28 Jun 2017 14:13:00 +0200
changeset 46585 af110b4b1981
parent 46584 ec15a827637c (current diff)
parent 46574 9920d284b066 (diff)
child 46586 5c2a3a2e86ea
Merge
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -2360,20 +2360,6 @@
 
 // Iteration functions.
 
-// Applies an ExtendedOopClosure onto all references of objects within a HeapRegion.
-
-class IterateOopClosureRegionClosure: public HeapRegionClosure {
-  ExtendedOopClosure* _cl;
-public:
-  IterateOopClosureRegionClosure(ExtendedOopClosure* cl) : _cl(cl) {}
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->is_continues_humongous()) {
-      r->oop_iterate(_cl);
-    }
-    return false;
-  }
-};
-
 // Iterates an ObjectClosure over all objects within a HeapRegion.
 
 class IterateObjectClosureRegionClosure: public HeapRegionClosure {
@@ -2397,12 +2383,10 @@
   _hrm.iterate(cl);
 }
 
-void
-G1CollectedHeap::heap_region_par_iterate(HeapRegionClosure* cl,
-                                         uint worker_id,
-                                         HeapRegionClaimer *hrclaimer,
-                                         bool concurrent) const {
-  _hrm.par_iterate(cl, worker_id, hrclaimer, concurrent);
+void G1CollectedHeap::heap_region_par_iterate(HeapRegionClosure* cl,
+                                              uint worker_id,
+                                              HeapRegionClaimer *hrclaimer) const {
+  _hrm.par_iterate(cl, worker_id, hrclaimer);
 }
 
 void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
@@ -3274,7 +3258,7 @@
         // investigate this in CR 7178365.
         double sample_end_time_sec = os::elapsedTime();
         double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
-        size_t total_cards_scanned = per_thread_states.total_cards_scanned();
+        size_t total_cards_scanned = g1_policy()->phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScannedCards);
         g1_policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc);
 
         evacuation_info.set_collectionset_used_before(collection_set()->bytes_used_before());
@@ -3458,17 +3442,13 @@
 
       _root_processor->evacuate_roots(pss->closures(), worker_id);
 
-      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss);
-
       // We pass a weak code blobs closure to the remembered set scanning because we want to avoid
       // treating the nmethods visited to act as roots for concurrent marking.
       // We only want to make sure that the oops in the nmethods are adjusted with regard to the
       // objects copied by the current evacuation.
-      size_t cards_scanned = _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
-                                                                             pss->closures()->weak_codeblobs(),
-                                                                             worker_id);
-
-      _pss->add_cards_scanned(worker_id, cards_scanned);
+      _g1h->g1_rem_set()->oops_into_collection_set_do(pss,
+                                                      pss->closures()->weak_codeblobs(),
+                                                      worker_id);
 
       double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -1136,6 +1136,7 @@
   // set. Assumes that the reference points into the heap.
   inline bool is_in_cset(const HeapRegion *hr);
   inline bool is_in_cset(oop obj);
+  inline bool is_in_cset(HeapWord* addr);
 
   inline bool is_in_cset_or_humongous(const oop obj);
 
@@ -1194,17 +1195,14 @@
   inline HeapWord* bottom_addr_for_region(uint index) const;
 
   // Iterate over the heap regions in parallel. Assumes that this will be called
-  // in parallel by ParallelGCThreads worker threads with distinct worker ids
-  // in the range [0..max(ParallelGCThreads-1, 1)]. Applies "blk->doHeapRegion"
+  // in parallel by a number of worker threads with distinct worker ids
+  // in the range passed to the HeapRegionClaimer. Applies "blk->doHeapRegion"
   // to each of the regions, by attempting to claim the region using the
   // HeapRegionClaimer and, if successful, applying the closure to the claimed
-  // region. The concurrent argument should be set to true if iteration is
-  // performed concurrently, during which no assumptions are made for consistent
-  // attributes of the heap regions (as they might be modified while iterating).
+  // region.
   void heap_region_par_iterate(HeapRegionClosure* cl,
                                uint worker_id,
-                               HeapRegionClaimer* hrclaimer,
-                               bool concurrent = false) const;
+                               HeapRegionClaimer* hrclaimer) const;
 
   // Iterate over the regions (if any) in the current collection set.
   void collection_set_iterate(HeapRegionClosure* blk);
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -139,7 +139,11 @@
 }
 
 inline bool G1CollectedHeap::is_in_cset(oop obj) {
-  return _in_cset_fast_test.is_in_cset((HeapWord*)obj);
+  return is_in_cset((HeapWord*)obj);
+}
+
+inline bool G1CollectedHeap::is_in_cset(HeapWord* addr) {
+  return _in_cset_fast_test.is_in_cset(addr);
 }
 
 bool G1CollectedHeap::is_in_cset(const HeapRegion* hr) {
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -703,7 +703,7 @@
 
   void work(uint worker_id) {
     SuspendibleThreadSetJoiner sts_join(_suspendible);
-    G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true);
+    G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
   }
 
   bool is_complete() {
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -78,6 +78,13 @@
   _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms):");
   _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms):");
 
+  _scan_rs_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
+  _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_scanned_cards, ScannedCards);
+  _scan_rs_claimed_cards = new WorkerDataArray<size_t>(max_gc_threads, "Claimed Cards:");
+  _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_claimed_cards, ClaimedCards);
+  _scan_rs_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
+  _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_skipped_cards, SkippedCards);
+
   _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers:");
   _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);
 
@@ -210,8 +217,8 @@
   _gc_par_phases[phase]->add(worker_i, secs);
 }
 
-void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
-  _gc_par_phases[phase]->set_thread_work_item(worker_i, count);
+void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index) {
+  _gc_par_phases[phase]->set_thread_work_item(worker_i, count, index);
 }
 
 // return the average time for a phase in milliseconds
@@ -219,9 +226,9 @@
   return _gc_par_phases[phase]->average() * 1000.0;
 }
 
-size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
-  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
-  return _gc_par_phases[phase]->thread_work_items()->sum();
+size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) {
+  assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items(index)->sum();
 }
 
 template <class T>
@@ -239,11 +246,13 @@
   phase->print_summary_on(out, print_sum);
   details(phase, Indents[indent]);
 
-  WorkerDataArray<size_t>* work_items = phase->thread_work_items();
-  if (work_items != NULL) {
-    out->print("%s", Indents[indent + 1]);
-    work_items->print_summary_on(out, true);
-    details(work_items, Indents[indent + 1]);
+  for (uint i = 0; i < phase->MaxThreadWorkItems; i++) {
+    WorkerDataArray<size_t>* work_items = phase->thread_work_items(i);
+    if (work_items != NULL) {
+      out->print("%s", Indents[indent + 1]);
+      work_items->print_summary_on(out, true);
+      details(work_items, Indents[indent + 1]);
+    }
   }
 }
 
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -76,6 +76,12 @@
     GCParPhasesSentinel
   };
 
+  enum GCScanRSWorkItems {
+    ScannedCards,
+    ClaimedCards,
+    SkippedCards
+  };
+
  private:
   // Markers for grouping the phases in the GCPhases enum above
   static const int GCMainParPhasesLast = GCWorkerEnd;
@@ -83,8 +89,15 @@
   static const int StringDedupPhasesLast = StringDedupTableFixup;
 
   WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
+
   WorkerDataArray<size_t>* _update_rs_processed_buffers;
+
+  WorkerDataArray<size_t>* _scan_rs_scanned_cards;
+  WorkerDataArray<size_t>* _scan_rs_claimed_cards;
+  WorkerDataArray<size_t>* _scan_rs_skipped_cards;
+
   WorkerDataArray<size_t>* _termination_attempts;
+
   WorkerDataArray<size_t>* _redirtied_cards;
 
   double _cur_collection_par_time_ms;
@@ -170,12 +183,12 @@
   // add a number of seconds to a phase
   void add_time_secs(GCParPhases phase, uint worker_i, double secs);
 
-  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);
+  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index = 0);
 
   // return the average time for a phase in milliseconds
   double average_time_ms(GCParPhases phase);
 
-  size_t sum_thread_work_items(GCParPhases phase);
+  size_t sum_thread_work_items(GCParPhases phase, uint index = 0);
 
  public:
 
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -38,7 +38,7 @@
   _cm(_g1->concurrent_mark())
 { }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ScanClosureBase::G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _par_scan_state(par_scan_state)
 { }
 
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
 #define SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
 
+#include "gc/g1/g1InCSetState.hpp"
 #include "memory/iterator.hpp"
 #include "oops/markOop.hpp"
 
@@ -47,34 +48,60 @@
   void set_region(HeapRegion* from) { _from = from; }
 };
 
-class G1ParClosureSuper : public OopsInHeapRegionClosure {
+class G1ScanClosureBase : public OopsInHeapRegionClosure {
 protected:
   G1CollectedHeap* _g1;
   G1ParScanThreadState* _par_scan_state;
 
-  G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
-  ~G1ParClosureSuper() { }
+  G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
+  ~G1ScanClosureBase() { }
 
+  template <class T>
+  inline void prefetch_and_push(T* p, oop const obj);
+
+  template <class T>
+  inline void handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj);
 public:
   // This closure needs special handling for InstanceRefKlass.
   virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
 };
 
-class G1ParPushHeapRSClosure : public G1ParClosureSuper {
+// Used during the Update RS phase to refine remaining cards in the DCQ during garbage collection.
+class G1ScanObjsDuringUpdateRSClosure: public G1ScanClosureBase {
+  uint _worker_i;
+  bool _has_refs_into_cset;
+
 public:
-  G1ParPushHeapRSClosure(G1CollectedHeap* g1,
-                         G1ParScanThreadState* par_scan_state):
-    G1ParClosureSuper(g1, par_scan_state) { }
+  G1ScanObjsDuringUpdateRSClosure(G1CollectedHeap* g1h,
+                                  G1ParScanThreadState* pss,
+                                  uint worker_i) :
+    G1ScanClosureBase(g1h, pss), _has_refs_into_cset(false), _worker_i(worker_i) { }
+
+  void reset_has_refs_into_cset() { _has_refs_into_cset = false; }
+  bool has_refs_into_cset() const { return _has_refs_into_cset; }
+
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+};
+
+// Used during the Scan RS phase to scan cards from the remembered set during garbage collection.
+class G1ScanObjsDuringScanRSClosure : public G1ScanClosureBase {
+public:
+  G1ScanObjsDuringScanRSClosure(G1CollectedHeap* g1,
+                                G1ParScanThreadState* par_scan_state):
+    G1ScanClosureBase(g1, par_scan_state) { }
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
 };
 
-class G1ParScanClosure : public G1ParClosureSuper {
+// This closure is applied to the fields of the objects that have just been copied during evacuation.
+class G1ScanEvacuatedObjClosure : public G1ScanClosureBase {
 public:
-  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
-    G1ParClosureSuper(g1, par_scan_state) { }
+  G1ScanEvacuatedObjClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ScanClosureBase(g1, par_scan_state) { }
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
@@ -186,42 +213,7 @@
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
-  virtual void do_oop(oop* p) { do_oop_nv(p); }
-};
-
-class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure {
-  G1CollectedHeap* _g1;
-  HeapRegion* _from;
-  G1ParPushHeapRSClosure* _push_ref_cl;
-  bool _record_refs_into_cset;
-  uint _worker_i;
-  bool _has_refs_into_cset;
-
-public:
-  G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                                G1ParPushHeapRSClosure* push_ref_cl,
-                                bool record_refs_into_cset,
-                                uint worker_i = 0);
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  bool self_forwarded(oop obj) {
-    markOop m = obj->mark();
-    bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj));
-    return result;
-  }
-
-  bool has_refs_into_cset() const { return _has_refs_into_cset; }
-
-  template <class T> inline void do_oop_nv(T* p);
-  virtual inline void do_oop(narrowOop* p);
-  virtual inline void do_oop(oop* p);
-
-  // This closure needs special handling for InstanceRefKlass.
-  virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
 };
 
 #endif // SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -36,61 +36,51 @@
 #include "memory/iterator.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 
-// This closure is applied to the fields of the objects that have just been copied.
 template <class T>
-inline void G1ParScanClosure::do_oop_nv(T* p) {
-  T heap_oop = oopDesc::load_heap_oop(p);
-
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    const InCSetState state = _g1->in_cset_state(obj);
-    if (state.is_in_cset()) {
-      // We're not going to even bother checking whether the object is
-      // already forwarded or not, as this usually causes an immediate
-      // stall. We'll try to prefetch the object (for write, given that
-      // we might need to install the forwarding reference) and we'll
-      // get back to it when pop it from the queue
-      Prefetch::write(obj->mark_addr(), 0);
-      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+inline void G1ScanClosureBase::prefetch_and_push(T* p, const oop obj) {
+  // We're not going to even bother checking whether the object is
+  // already forwarded or not, as this usually causes an immediate
+  // stall. We'll try to prefetch the object (for write, given that
+  // we might need to install the forwarding reference) and we'll
+  // get back to it when pop it from the queue
+  Prefetch::write(obj->mark_addr(), 0);
+  Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
 
-      // slightly paranoid test; I'm trying to catch potential
-      // problems before we go into push_on_queue to know where the
-      // problem is coming from
-      assert((obj == oopDesc::load_decode_heap_oop(p)) ||
-             (obj->is_forwarded() &&
-                 obj->forwardee() == oopDesc::load_decode_heap_oop(p)),
-             "p should still be pointing to obj or to its forwardee");
+  // slightly paranoid test; I'm trying to catch potential
+  // problems before we go into push_on_queue to know where the
+  // problem is coming from
+  assert((obj == oopDesc::load_decode_heap_oop(p)) ||
+         (obj->is_forwarded() &&
+         obj->forwardee() == oopDesc::load_decode_heap_oop(p)),
+         "p should still be pointing to obj or to its forwardee");
 
-      _par_scan_state->push_on_queue(p);
-    } else {
-      if (state.is_humongous()) {
-        _g1->set_humongous_is_live(obj);
-      } else if (state.is_ext()) {
-        _par_scan_state->do_oop_ext(p);
-      }
-      _par_scan_state->update_rs(_from, p, obj);
-    }
+  _par_scan_state->push_on_queue(p);
+}
+
+template <class T>
+inline void G1ScanClosureBase::handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj) {
+  if (state.is_humongous()) {
+    _g1->set_humongous_is_live(obj);
+  } else if (state.is_ext()) {
+    _par_scan_state->do_oop_ext(p);
   }
 }
 
 template <class T>
-inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+inline void G1ScanEvacuatedObjClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    const InCSetState state = _g1->in_cset_state(obj);
-    if (state.is_in_cset_or_humongous()) {
-      Prefetch::write(obj->mark_addr(), 0);
-      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+  if (oopDesc::is_null(heap_oop)) {
+    return;
+  }
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    prefetch_and_push(p, obj);
+  } else {
+    handle_non_cset_obj_common(state, p, obj);
 
-      // Place on the references queue
-      _par_scan_state->push_on_queue(p);
-    } else if (state.is_ext()) {
-      _par_scan_state->do_oop_ext(p);
-    } else {
-      assert(!_g1->is_in_cset(obj), "checking");
-    }
+    _par_scan_state->update_rs(_from, p, obj);
   }
 }
 
@@ -145,10 +135,10 @@
     // Normally this closure should only be called with cross-region references.
     // But since Java threads are manipulating the references concurrently and we
     // reload the values things may have changed.
-    // This check lets slip through references from a humongous continues region
+    // Also this check lets slip through references from a humongous continues region
     // to its humongous start region, as they are in different regions, and adds a
-    // remembered set entry. This is benign (apart from memory usage), as this
-    // closure is never called during evacuation.
+    // remembered set entry. This is benign (apart from memory usage), as we never
+    // try to either evacuate or eager reclaim humonguous arrays of j.l.O.
     return;
   }
 
@@ -159,79 +149,50 @@
 }
 
 template <class T>
-inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
-  oop obj = oopDesc::load_decode_heap_oop(p);
-  if (obj == NULL) {
-    return;
-  }
-
-#ifdef ASSERT
-  // can't do because of races
-  // assert(obj == NULL || obj->is_oop(), "expected an oop");
-  assert(check_obj_alignment(obj), "not oop aligned");
-  assert(_g1->is_in_reserved(obj), "must be in heap");
-#endif // ASSERT
-
-  assert(_from != NULL, "from region must be non-NULL");
-  assert(_from->is_in_reserved(p) ||
-         (_from->is_humongous() &&
-          _g1->heap_region_containing(p)->is_humongous() &&
-          _from->humongous_start_region() == _g1->heap_region_containing(p)->humongous_start_region()),
-         "p " PTR_FORMAT " is not in the same region %u or part of the correct humongous object starting at region %u.",
-         p2i(p), _from->hrm_index(), _from->humongous_start_region()->hrm_index());
-
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (_from == to) {
-    // Normally this closure should only be called with cross-region references.
-    // But since Java threads are manipulating the references concurrently and we
-    // reload the values things may have changed.
-    // Also this check lets slip through references from a humongous continues region
-    // to its humongous start region, as they are in different regions, and adds a
-    // remembered set entry. This is benign (apart from memory usage), as we never
-    // try to either evacuate or eager reclaim these kind of regions.
+inline void G1ScanObjsDuringUpdateRSClosure::do_oop_nv(T* p) {
+  T o = oopDesc::load_heap_oop(p);
+  if (oopDesc::is_null(o)) {
     return;
   }
+  oop obj = oopDesc::decode_heap_oop_not_null(o);
 
-  // The _record_refs_into_cset flag is true during the RSet
-  // updating part of an evacuation pause. It is false at all
-  // other times:
-  //  * rebuilding the remembered sets after a full GC
-  //  * during concurrent refinement.
-  //  * updating the remembered sets of regions in the collection
-  //    set in the event of an evacuation failure (when deferred
-  //    updates are enabled).
+  check_obj_during_refinement(p, obj);
 
-  if (_record_refs_into_cset && to->in_collection_set()) {
-    // We are recording references that point into the collection
-    // set and this particular reference does exactly that...
-    // If the referenced object has already been forwarded
-    // to itself, we are handling an evacuation failure and
-    // we have already visited/tried to copy this object
-    // there is no need to retry.
-    if (!self_forwarded(obj)) {
-    assert(_push_ref_cl != NULL, "should not be null");
-    // Push the reference in the refs queue of the G1ParScanThreadState
-    // instance for this worker thread.
-      _push_ref_cl->do_oop(p);
+  assert(!_g1->is_in_cset((HeapWord*)p), "Oop originates from " PTR_FORMAT " (region: %u) which is in the collection set.", p2i(p), _g1->addr_to_region((HeapWord*)p));
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    // Since the source is always from outside the collection set, here we implicitly know
+    // that this is a cross-region reference too.
+    prefetch_and_push(p, obj);
+
+    _has_refs_into_cset = true;
+  } else {
+    HeapRegion* to = _g1->heap_region_containing(obj);
+    if (_from == to) {
+      return;
     }
-    _has_refs_into_cset = true;
 
-    // Deferred updates to the CSet are either discarded (in the normal case),
-    // or processed (if an evacuation failure occurs) at the end
-    // of the collection.
-    // See G1RemSet::cleanup_after_oops_into_collection_set_do().
-  } else {
-    // We either don't care about pushing references that point into the
-    // collection set (i.e. we're not during an evacuation pause) _or_
-    // the reference doesn't point into the collection set. Either way
-    // we add the reference directly to the RSet of the region containing
-    // the referenced object.
-    assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+    handle_non_cset_obj_common(state, p, obj);
+
     to->rem_set()->add_reference(p, _worker_i);
   }
 }
-void G1UpdateRSOrPushRefOopClosure::do_oop(oop* p)       { do_oop_nv(p); }
-void G1UpdateRSOrPushRefOopClosure::do_oop(narrowOop* p) { do_oop_nv(p); }
+
+template <class T>
+inline void G1ScanObjsDuringScanRSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (oopDesc::is_null(heap_oop)) {
+    return;
+  }
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    prefetch_and_push(p, obj);
+  } else {
+    handle_non_cset_obj_common(state, p, obj);
+  }
+}
 
 template <class T>
 void G1ParCopyHelper::do_klass_barrier(T* p, oop new_obj) {
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -337,16 +337,6 @@
   return _states[worker_id];
 }
 
-void G1ParScanThreadStateSet::add_cards_scanned(uint worker_id, size_t cards_scanned) {
-  assert(worker_id < _n_workers, "out of bounds access");
-  _cards_scanned[worker_id] += cards_scanned;
-}
-
-size_t G1ParScanThreadStateSet::total_cards_scanned() const {
-  assert(_flushed, "thread local state from the per thread states should have been flushed");
-  return _total_cards_scanned;
-}
-
 const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
   assert(_flushed, "thread local state from the per thread states should have been flushed");
   return _surviving_young_words_total;
@@ -354,7 +344,6 @@
 
 void G1ParScanThreadStateSet::flush() {
   assert(!_flushed, "thread local state from the per thread states should be flushed once");
-  assert(_total_cards_scanned == 0, "should have been cleared");
 
   for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) {
     G1ParScanThreadState* pss = _states[worker_index];
@@ -363,8 +352,6 @@
       continue;
     }
 
-    _total_cards_scanned += _cards_scanned[worker_index];
-
     pss->flush(_surviving_young_words_total);
     delete pss;
     _states[worker_index] = NULL;
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,7 +54,7 @@
   InCSetState       _dest[InCSetState::Num];
   // Local tenuring threshold.
   uint              _tenuring_threshold;
-  G1ParScanClosure  _scanner;
+  G1ScanEvacuatedObjClosure  _scanner;
 
   int  _hash_seed;
   uint _worker_id;
@@ -198,8 +198,6 @@
   G1CollectedHeap* _g1h;
   G1ParScanThreadState** _states;
   size_t* _surviving_young_words_total;
-  size_t* _cards_scanned;
-  size_t _total_cards_scanned;
   size_t _young_cset_length;
   uint _n_workers;
   bool _flushed;
@@ -209,8 +207,6 @@
       _g1h(g1h),
       _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)),
       _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, young_cset_length, mtGC)),
-      _cards_scanned(NEW_C_HEAP_ARRAY(size_t, n_workers, mtGC)),
-      _total_cards_scanned(0),
       _young_cset_length(young_cset_length),
       _n_workers(n_workers),
       _flushed(false) {
@@ -218,22 +214,18 @@
       _states[i] = NULL;
     }
     memset(_surviving_young_words_total, 0, young_cset_length * sizeof(size_t));
-    memset(_cards_scanned, 0, n_workers * sizeof(size_t));
   }
 
   ~G1ParScanThreadStateSet() {
     assert(_flushed, "thread local state from the per thread states should have been flushed");
     FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
     FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
-    FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
   }
 
   void flush();
 
   G1ParScanThreadState* state_for_worker(uint worker_id);
 
-  void add_cards_scanned(uint worker_id, size_t cards_scanned);
-  size_t total_cards_scanned() const;
   const size_t* surviving_young_words() const;
 
  private:
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -327,15 +327,16 @@
 }
 
 G1ScanRSClosure::G1ScanRSClosure(G1RemSetScanState* scan_state,
-                                 G1ParPushHeapRSClosure* push_heap_cl,
+                                 G1ScanObjsDuringScanRSClosure* scan_obj_on_card,
                                  CodeBlobClosure* code_root_cl,
                                  uint worker_i) :
   _scan_state(scan_state),
-  _push_heap_cl(push_heap_cl),
+  _scan_objs_on_card_cl(scan_obj_on_card),
   _code_root_cl(code_root_cl),
   _strong_code_root_scan_time_sec(0.0),
-  _cards(0),
-  _cards_done(0),
+  _cards_claimed(0),
+  _cards_scanned(0),
+  _cards_skipped(0),
   _worker_i(worker_i) {
   _g1h = G1CollectedHeap::heap();
   _bot = _g1h->bot();
@@ -352,9 +353,9 @@
     // but they're benign), which reduces the number of duplicate
     // scans (the rsets of the regions in the cset can intersect).
     _ct_bs->set_card_claimed(index);
-    _push_heap_cl->set_region(r);
-    r->oops_on_card_seq_iterate_careful<true>(mr, _push_heap_cl);
-    _cards_done++;
+    _scan_objs_on_card_cl->set_region(r);
+    r->oops_on_card_seq_iterate_careful<true>(mr, _scan_objs_on_card_cl);
+    _cards_scanned++;
   }
 }
 
@@ -389,12 +390,13 @@
       claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
     }
     if (current_card < claimed_card_block) {
+      _cards_skipped++;
       continue;
     }
     HeapWord* card_start = _g1h->bot()->address_for_index(card_index);
 
     HeapRegion* card_region = _g1h->heap_region_containing(card_start);
-    _cards++;
+    _cards_claimed++;
 
     _scan_state->add_dirty_region(card_region->hrm_index());
 
@@ -411,21 +413,26 @@
   return false;
 }
 
-size_t G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
-                              CodeBlobClosure* heap_region_codeblobs,
-                              uint worker_i) {
+void G1RemSet::scan_rem_set(G1ParScanThreadState* pss,
+                            CodeBlobClosure* heap_region_codeblobs,
+                            uint worker_i) {
   double rs_time_start = os::elapsedTime();
 
-  G1ScanRSClosure cl(_scan_state, oops_in_heap_closure, heap_region_codeblobs, worker_i);
+  G1ScanObjsDuringScanRSClosure scan_cl(_g1, pss);
+  G1ScanRSClosure cl(_scan_state, &scan_cl, heap_region_codeblobs, worker_i);
   _g1->collection_set_iterate_from(&cl, worker_i);
 
-   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
-                              cl.strong_code_root_scan_time_sec();
+  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
+                             cl.strong_code_root_scan_time_sec();
+
+  G1GCPhaseTimes* p = _g1p->phase_times();
 
-  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
-  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
+  p->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
+  p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_scanned(), G1GCPhaseTimes::ScannedCards);
+  p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_claimed(), G1GCPhaseTimes::ClaimedCards);
+  p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_skipped(), G1GCPhaseTimes::SkippedCards);
 
-  return cl.cards_done();
+  p->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
 }
 
 // Closure used for updating RSets and recording references that
@@ -435,12 +442,12 @@
 class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
   G1RemSet* _g1rs;
   DirtyCardQueue* _into_cset_dcq;
-  G1ParPushHeapRSClosure* _cl;
+  G1ScanObjsDuringUpdateRSClosure* _update_rs_cl;
 public:
   RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
                                               DirtyCardQueue* into_cset_dcq,
-                                              G1ParPushHeapRSClosure* cl) :
-    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _cl(cl)
+                                              G1ScanObjsDuringUpdateRSClosure* update_rs_cl) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _update_rs_cl(update_rs_cl)
   {}
 
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
@@ -449,9 +456,8 @@
     // is during RSet updating within an evacuation pause.
     // In this case worker_i should be the id of a GC worker thread.
     assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
-    assert(worker_i < ParallelGCThreads, "should be a GC worker");
 
-    if (_g1rs->refine_card_during_gc(card_ptr, worker_i, _cl)) {
+    if (_g1rs->refine_card_during_gc(card_ptr, _update_rs_cl)) {
       // 'card_ptr' contains references that point into the collection
       // set. We need to record the card in the DCQS
       // (_into_cset_dirty_card_queue_set)
@@ -465,9 +471,10 @@
 };
 
 void G1RemSet::update_rem_set(DirtyCardQueue* into_cset_dcq,
-                              G1ParPushHeapRSClosure* oops_in_heap_closure,
+                              G1ParScanThreadState* pss,
                               uint worker_i) {
-  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, oops_in_heap_closure);
+  G1ScanObjsDuringUpdateRSClosure update_rs_cl(_g1, pss, worker_i);
+  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, &update_rs_cl);
 
   G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   if (G1HotCardCache::default_use_cache()) {
@@ -483,9 +490,9 @@
   HeapRegionRemSet::cleanup();
 }
 
-size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
-                                             CodeBlobClosure* heap_region_codeblobs,
-                                             uint worker_i) {
+void G1RemSet::oops_into_collection_set_do(G1ParScanThreadState* pss,
+                                           CodeBlobClosure* heap_region_codeblobs,
+                                           uint worker_i) {
   // A DirtyCardQueue that is used to hold cards containing references
   // that point into the collection set. This DCQ is associated with a
   // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
@@ -497,8 +504,8 @@
   // DirtyCardQueueSet that is used to manage RSet updates
   DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
 
-  update_rem_set(&into_cset_dcq, cl, worker_i);
-  return scan_rem_set(cl, heap_region_codeblobs, worker_i);;
+  update_rem_set(&into_cset_dcq, pss, worker_i);
+  scan_rem_set(pss, heap_region_codeblobs, worker_i);;
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
@@ -573,17 +580,6 @@
 #endif
 }
 
-G1UpdateRSOrPushRefOopClosure::G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                                                             G1ParPushHeapRSClosure* push_ref_cl,
-                                                             bool record_refs_into_cset,
-                                                             uint worker_i) :
-  _g1(g1h),
-  _from(NULL),
-  _record_refs_into_cset(record_refs_into_cset),
-  _has_refs_into_cset(false),
-  _push_ref_cl(push_ref_cl),
-  _worker_i(worker_i) { }
-
 void G1RemSet::refine_card_concurrently(jbyte* card_ptr,
                                         uint worker_i) {
   assert(!_g1->is_gc_active(), "Only call concurrently");
@@ -734,8 +730,7 @@
 }
 
 bool G1RemSet::refine_card_during_gc(jbyte* card_ptr,
-                                     uint worker_i,
-                                     G1ParPushHeapRSClosure*  oops_in_heap_closure) {
+                                     G1ScanObjsDuringUpdateRSClosure* update_rs_cl) {
   assert(_g1->is_gc_active(), "Only call during GC");
 
   check_card_ptr(card_ptr, _ct_bs);
@@ -769,19 +764,14 @@
   MemRegion dirty_region(card_start, MIN2(scan_limit, card_end));
   assert(!dirty_region.is_empty(), "sanity");
 
-  G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
-                                                 oops_in_heap_closure,
-                                                 true,
-                                                 worker_i);
-  update_rs_oop_cl.set_from(r);
+  update_rs_cl->set_region(r);
+  update_rs_cl->reset_has_refs_into_cset();
 
-  bool card_processed =
-    r->oops_on_card_seq_iterate_careful<true>(dirty_region,
-                                              &update_rs_oop_cl);
+  bool card_processed = r->oops_on_card_seq_iterate_careful<true>(dirty_region, update_rs_cl);
   assert(card_processed, "must be");
   _conc_refine_cards++;
 
-  return update_rs_oop_cl.has_refs_into_cset();
+  return update_rs_cl->has_refs_into_cset();
 }
 
 void G1RemSet::print_periodic_summary_info(const char* header, uint period_count) {
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -41,10 +41,12 @@
 class CodeBlobClosure;
 class G1CollectedHeap;
 class G1HotCardCache;
-class G1ParPushHeapRSClosure;
 class G1RemSetScanState;
+class G1ParScanThreadState;
 class G1Policy;
 class G1SATBCardTableModRefBS;
+class G1ScanObjsDuringScanRSClosure;
+class G1ScanObjsDuringUpdateRSClosure;
 class HeapRegionClaimer;
 
 // A G1RemSet in which each heap region has a rem set that records the
@@ -94,26 +96,14 @@
            G1HotCardCache* hot_card_cache);
   ~G1RemSet();
 
-  // Invoke "cl->do_oop" on all pointers into the collection set
-  // from objects in regions outside the collection set (having
-  // invoked "cl->set_region" to set the "from" region correctly
-  // beforehand.)
-  //
-  // Apply non_heap_roots on the oops of the unmarked nmethods
-  // on the strong code roots list for each region in the
-  // collection set.
+  // Process all oops in the collection set from the cards in the refinement buffers and
+  // remembered sets using pss.
   //
-  // The "worker_i" param is for the parallel case where the id
-  // of the worker thread calling this function can be helpful in
-  // partitioning the work to be done. It should be the same as
-  // the "i" passed to the calling thread's work(i) function.
-  // In the sequential case this param will be ignored.
-  //
-  // Returns the number of cards scanned while looking for pointers
-  // into the collection set.
-  size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
-                                     CodeBlobClosure* heap_region_codeblobs,
-                                     uint worker_i);
+  // Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code
+  // roots list for each region in the collection set.
+  void oops_into_collection_set_do(G1ParScanThreadState* pss,
+                                   CodeBlobClosure* heap_region_codeblobs,
+                                   uint worker_i);
 
   // Prepare for and cleanup after an oops_into_collection_set_do
   // call.  Must call each of these once before and after (in sequential
@@ -123,15 +113,14 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  size_t scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
-                      CodeBlobClosure* heap_region_codeblobs,
-                      uint worker_i);
+  void scan_rem_set(G1ParScanThreadState* pss,
+                    CodeBlobClosure* heap_region_codeblobs,
+                    uint worker_i);
 
   G1RemSetScanState* scan_state() const { return _scan_state; }
 
-  // Flush remaining refinement buffers into the remembered set,
-  // applying oops_in_heap_closure on the references found.
-  void update_rem_set(DirtyCardQueue* into_cset_dcq, G1ParPushHeapRSClosure* oops_in_heap_closure, uint worker_i);
+  // Flush remaining refinement buffers into the remembered set.
+  void update_rem_set(DirtyCardQueue* into_cset_dcq, G1ParScanThreadState* pss, uint worker_i);
 
   // Record, if necessary, the fact that *p (where "p" is in region "from",
   // which is required to be non-NULL) has changed to a new non-NULL value.
@@ -152,8 +141,7 @@
   // Refine the card corresponding to "card_ptr". Returns "true" if the given card contains
   // oops that have references into the current collection set.
   bool refine_card_during_gc(jbyte* card_ptr,
-                             uint worker_i,
-                             G1ParPushHeapRSClosure* oops_in_heap_closure);
+                             G1ScanObjsDuringUpdateRSClosure* update_rs_cl);
 
   // Print accumulated summary info from the start of the VM.
   void print_summary_info();
@@ -185,11 +173,13 @@
 class G1ScanRSClosure : public HeapRegionClosure {
   G1RemSetScanState* _scan_state;
 
-  size_t _cards_done;
-  size_t _cards;
+  size_t _cards_scanned;
+  size_t _cards_claimed;
+  size_t _cards_skipped;
+
   G1CollectedHeap* _g1h;
 
-  G1ParPushHeapRSClosure* _push_heap_cl;
+  G1ScanObjsDuringScanRSClosure* _scan_objs_on_card_cl;
   CodeBlobClosure* _code_root_cl;
 
   G1BlockOffsetTable* _bot;
@@ -203,7 +193,7 @@
   void scan_strong_code_roots(HeapRegion* r);
 public:
   G1ScanRSClosure(G1RemSetScanState* scan_state,
-                  G1ParPushHeapRSClosure* push_heap_cl,
+                  G1ScanObjsDuringScanRSClosure* scan_obj_on_card,
                   CodeBlobClosure* code_root_cl,
                   uint worker_i);
 
@@ -213,8 +203,9 @@
     return _strong_code_root_scan_time_sec;
   }
 
-  size_t cards_done() { return _cards_done;}
-  size_t cards_looked_up() { return _cards;}
+  size_t cards_scanned() const { return _cards_scanned; }
+  size_t cards_claimed() const { return _cards_claimed; }
+  size_t cards_skipped() const { return _cards_skipped; }
 };
 
 class UpdateRSOopClosure: public ExtendedOopClosure {
--- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,6 @@
 class G1CollectedHeap;
 class G1EvacuationRootClosures;
 class G1GCPhaseTimes;
-class G1ParPushHeapRSClosure;
 class G1RootClosures;
 class Monitor;
 class OopClosure;
--- a/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -32,19 +32,19 @@
 
 // Forward declarations.
 
-class G1ParScanClosure;
-class G1ParPushHeapRSClosure;
+class G1ScanEvacuatedObjClosure;
 
-class G1UpdateRSOrPushRefOopClosure;
+class G1ScanObjsDuringUpdateRSClosure;
+class G1ScanObjsDuringScanRSClosure;
 class G1ConcurrentRefineOopClosure;
 
 class G1CMOopClosure;
 class G1RootRegionScanClosure;
 
 #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_G1(f) \
-      f(G1ParScanClosure,_nv)                      \
-      f(G1ParPushHeapRSClosure,_nv)                \
-      f(G1UpdateRSOrPushRefOopClosure,_nv)         \
+      f(G1ScanEvacuatedObjClosure,_nv)             \
+      f(G1ScanObjsDuringUpdateRSClosure,_nv)       \
+      f(G1ScanObjsDuringScanRSClosure,_nv)         \
       f(G1ConcurrentRefineOopClosure,_nv)          \
       f(G1CMOopClosure,_nv)                        \
       f(G1RootRegionScanClosure,_nv)
--- a/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -327,7 +327,7 @@
   return true;
 }
 
-void HeapRegionManager::par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer, bool concurrent) const {
+void HeapRegionManager::par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer) const {
   const uint start_index = hrclaimer->start_region_for_worker(worker_id);
 
   // Every worker will actually look at all regions, skipping over regions that
--- a/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -240,7 +240,7 @@
   // terminating the iteration early if doHeapRegion() returns true.
   void iterate(HeapRegionClosure* blk) const;
 
-  void par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer, bool concurrent) const;
+  void par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer) const;
 
   // Uncommit up to num_regions_to_remove regions that are completely free.
   // Return the actual number of uncommitted regions.
--- a/hotspot/src/share/vm/gc/g1/workerDataArray.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/workerDataArray.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -33,20 +33,25 @@
 template <class T>
 class WorkerDataArray  : public CHeapObj<mtGC> {
   friend class WDAPrinter;
+public:
+  static const uint MaxThreadWorkItems = 3;
+private:
   T*          _data;
   uint        _length;
   const char* _title;
 
-  WorkerDataArray<size_t>* _thread_work_items;
+  WorkerDataArray<size_t>* _thread_work_items[MaxThreadWorkItems];
 
  public:
   WorkerDataArray(uint length, const char* title);
   ~WorkerDataArray();
 
-  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items);
-  void set_thread_work_item(uint worker_i, size_t value);
-  WorkerDataArray<size_t>* thread_work_items() const {
-    return _thread_work_items;
+  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items, uint index = 0);
+  void set_thread_work_item(uint worker_i, size_t value, uint index = 0);
+  void add_thread_work_item(uint worker_i, size_t value, uint index = 0);
+  WorkerDataArray<size_t>* thread_work_items(uint index = 0) const {
+    assert(index < MaxThreadWorkItems, "Tried to access thread work item %u max %u", index, MaxThreadWorkItems);
+    return _thread_work_items[index];
   }
 
   static T uninitialized();
--- a/hotspot/src/share/vm/gc/g1/workerDataArray.inline.hpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/workerDataArray.inline.hpp	Wed Jun 28 14:13:00 2017 +0200
@@ -32,11 +32,13 @@
 template <typename T>
 WorkerDataArray<T>::WorkerDataArray(uint length, const char* title) :
  _title(title),
- _length(0),
- _thread_work_items(NULL) {
+ _length(0) {
   assert(length > 0, "Must have some workers to store data for");
   _length = length;
   _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
+  for (uint i = 0; i < MaxThreadWorkItems; i++) {
+    _thread_work_items[i] = NULL;
+  }
   reset();
 }
 
@@ -59,14 +61,23 @@
 }
 
 template <typename T>
-void WorkerDataArray<T>::link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
-  _thread_work_items = thread_work_items;
+void WorkerDataArray<T>::link_thread_work_items(WorkerDataArray<size_t>* thread_work_items, uint index) {
+  assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
+  _thread_work_items[index] = thread_work_items;
 }
 
 template <typename T>
-void WorkerDataArray<T>::set_thread_work_item(uint worker_i, size_t value) {
-  assert(_thread_work_items != NULL, "No sub count");
-  _thread_work_items->set(worker_i, value);
+void WorkerDataArray<T>::set_thread_work_item(uint worker_i, size_t value, uint index) {
+  assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
+  assert(_thread_work_items[index] != NULL, "No sub count");
+  _thread_work_items[index]->set(worker_i, value);
+}
+
+template <typename T>
+void WorkerDataArray<T>::add_thread_work_item(uint worker_i, size_t value, uint index) {
+  assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
+  assert(_thread_work_items[index] != NULL, "No sub count");
+  _thread_work_items[index]->add(worker_i, value);
 }
 
 template <typename T>
@@ -148,8 +159,10 @@
 template <typename T>
 void WorkerDataArray<T>::reset() {
   set_all(uninitialized());
-  if (_thread_work_items != NULL) {
-    _thread_work_items->reset();
+  for (uint i = 0; i < MaxThreadWorkItems; i++) {
+    if (_thread_work_items[i] != NULL) {
+      _thread_work_items[i]->reset();
+    }
   }
 }
 
--- a/hotspot/test/Makefile	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/test/Makefile	Wed Jun 28 14:13:00 2017 +0200
@@ -95,26 +95,8 @@
 ALT_MAKE ?= closed
 -include $(ALT_MAKE)/Makefile
 
-# flags used to execute java in test targets
-TEST_FLAGS += -version -Xinternalversion -X -help
-
-sanitytest: prep $(PRODUCT_HOME)
-	@for flag in $(TEST_FLAGS);                                             \
-	do                                                                      \
-	    echo Executing java $(JAVA_OPTIONS) $$flag;                         \
-	    $(PRODUCT_HOME)/bin/java $(JAVA_OPTIONS) $$flag;                    \
-	    res=$$?;                                                            \
-	    if [ $$res -ne 0 ]; then                                            \
-	        exit $$res;                                                     \
-	    fi;                                                                 \
-	done
-
-PHONY_LIST += sanitytest
-
 ################################################################
 
-# basicvmtest (make sure various basic java options work)
-
 # Set up the directory in which the jvm directories live (client/, server/, etc.)
 ifeq ($(PLATFORM),windows)
 JVMS_DIR := $(PRODUCT_HOME)/bin
@@ -126,45 +108,6 @@
 CANDIDATE_JVM_VARIANTS := client minimal server
 JVM_VARIANTS := $(strip $(foreach x,$(CANDIDATE_JVM_VARIANTS),$(if $(wildcard $(JVMS_DIR)/$(x)),$(x))))
 
-hotspot_basicvmtest:
-	for variant in $(JVM_VARIANTS);                                           \
-	do                                                                        \
-	    $(MAKE) JAVA_ARGS="$(JAVA_ARGS) -$$variant" hotspot_$${variant}test;  \
-	    res=$$?;                                                              \
-	    if [ $$res -ne 0 ]; then                                              \
-	        exit $$res;                                                       \
-	    fi;                                                                   \
-	done
-
-PHONY_LIST += hotspot_basicvmtest
-
-################################################################
-
-# clienttest (make sure various basic java client options work)
-
-hotspot_clienttest clienttest: sanitytest
-	$(RM) $(PRODUCT_HOME)/jre/lib/*/client/classes.jsa
-	$(RM) $(PRODUCT_HOME)/jre/bin/client/classes.jsa
-	$(PRODUCT_HOME)/bin/java $(JAVA_OPTIONS) -Xshare:dump
-
-PHONY_LIST += hotspot_clienttest clienttest
-
-################################################################
-
-# minimaltest (make sure various basic java minimal options work)
-
-hotspot_minimaltest minimaltest: sanitytest
-
-PHONY_LIST += hotspot_minimaltest minimaltest
-
-################################################################
-
-# servertest (make sure various basic java server options work)
-
-hotspot_servertest servertest: sanitytest
-
-PHONY_LIST += hotspot_servertest servertest
-
 ################################################################
 
 # Run the native gtest tests from the test image
--- a/hotspot/test/TEST.groups	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/test/TEST.groups	Wed Jun 28 14:13:00 2017 +0200
@@ -47,6 +47,10 @@
 hotspot_native_sanity = \
   native_sanity
 
+hotspot_tier1_common = \
+  sanity/BasicVMTest.java \
+  native/GTestWrapper.java
+
 hotspot_tier1_compiler_1 = \
   compiler/aot/ \
   compiler/arraycopy/ \
@@ -196,6 +200,7 @@
   serviceability/logging
 
 hotspot_tier1 = \
+  :hotspot_tier1_common \
   :hotspot_tier1_compiler_1 \
   :hotspot_tier1_compiler_2 \
   :hotspot_tier1_compiler_3 \
--- a/hotspot/test/gc/g1/TestGCLogMessages.java	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/test/gc/g1/TestGCLogMessages.java	Wed Jun 28 14:13:00 2017 +0200
@@ -94,7 +94,14 @@
         new LogMessageWithLevel("Other", Level.INFO),
 
         // Update RS
+        new LogMessageWithLevel("Update RS", Level.DEBUG),
+        new LogMessageWithLevel("Processed Buffers", Level.DEBUG),
         new LogMessageWithLevel("Scan HCC", Level.TRACE),
+        // Scan RS
+        new LogMessageWithLevel("Scan RS", Level.DEBUG),
+        new LogMessageWithLevel("Scanned Cards", Level.DEBUG),
+        new LogMessageWithLevel("Claimed Cards", Level.DEBUG),
+        new LogMessageWithLevel("Skipped Cards", Level.DEBUG),
         // Ext Root Scan
         new LogMessageWithLevel("Thread Roots", Level.TRACE),
         new LogMessageWithLevel("StringTable Roots", Level.TRACE),
--- a/hotspot/test/native/gc/g1/test_workerDataArray.cpp	Tue Jun 27 14:34:00 2017 -0400
+++ b/hotspot/test/native/gc/g1/test_workerDataArray.cpp	Wed Jun 28 14:13:00 2017 +0200
@@ -34,7 +34,11 @@
  protected:
   WorkerDataArrayTest() :
     title("Test array"),
-    array(3, title) {
+    array(3, title),
+    sub_item_title("Sub item array"),
+    sub_item(3, sub_item_title) {
+
+    array.link_thread_work_items(&sub_item);
   }
 
   const char* print_summary() {
@@ -65,6 +69,9 @@
   const char* title;
   WorkerDataArray<T> array;
 
+  const char* sub_item_title;
+  WorkerDataArray<size_t> sub_item;
+
  private:
   virtual const char* expected_summary() = 0;
   virtual const char* expected_details() = 0;
@@ -111,6 +118,10 @@
     array.set(0, 5);
     array.set(1, 3);
     array.set(2, 7);
+
+    array.set_thread_work_item(0, 1);
+    array.set_thread_work_item(1, 2);
+    array.set_thread_work_item(2, 3);
   }
 
  private:
@@ -125,10 +136,12 @@
 
 TEST_VM_F(BasicWorkerDataArrayTest, sum_test) {
   ASSERT_EQ(15u, array.sum());
+  ASSERT_EQ(6u, array.thread_work_items(0)->sum());
 }
 
 TEST_VM_F(BasicWorkerDataArrayTest, average_test) {
   ASSERT_NEAR(5.0, array.average(), epsilon);
+  ASSERT_NEAR(2.0, array.thread_work_items(0)->average(), epsilon);
 }
 
 TEST_VM_F(BasicWorkerDataArrayTest, print_summary_on_test) {
@@ -149,6 +162,16 @@
     for (uint i = 0; i < 3; i++) {
       array.add(i, 1);
     }
+
+    WorkerDataArray<size_t>* sub_items = array.thread_work_items(0);
+
+    sub_items->set(0, 1);
+    sub_items->set(1, 2);
+    sub_items->set(2, 3);
+
+    for (uint i = 0; i < 3; i++) {
+      array.add_thread_work_item(i, 1);
+    }
   }
 
  private:
@@ -163,10 +186,12 @@
 
 TEST_VM_F(AddWorkerDataArrayTest, sum_test) {
   ASSERT_EQ(18u, array.sum());
+  ASSERT_EQ(9u, array.thread_work_items(0)->sum());
 }
 
 TEST_VM_F(AddWorkerDataArrayTest, average_test) {
   ASSERT_NEAR(6.0, array.average(), epsilon);
+  ASSERT_NEAR(3.0, array.thread_work_items(0)->average(), epsilon);
 }
 
 TEST_VM_F(AddWorkerDataArrayTest, print_summary_on_test) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/sanity/BasicVMTest.java	Wed Jun 28 14:13:00 2017 +0200
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sanity;
+
+import jdk.test.lib.process.ProcessTools;
+
+import java.util.List;
+
+/*
+ * @test
+ * @summary make sure various basic java options work
+ * @library /test/lib
+ *
+ * @run driver sanity.BasicVMTest
+ */
+public class BasicVMTest {
+    public static void main(String[] args) throws Exception {
+        List<String> flags = List.of(
+                "-version",
+                "-Xinternalversion",
+                "-X",
+                "-help");
+        for (String flag : flags) {
+            ProcessTools.executeTestJvm(flag)
+                        .shouldHaveExitValue(0);
+        }
+    }
+}