7143511: G1: Another instance of high GC Worker Other time (50ms)
authorjohnc
Tue, 13 Mar 2012 11:05:32 -0700
changeset 12270 9625585c6047
parent 12236 51d6463cfd9d
child 12271 8cf95843833b
7143511: G1: Another instance of high GC Worker Other time (50ms) Summary: Tiered compilation has increased the number of nmethods in the code cache. This has, in turn, significantly increased the number of marked nmethods processed during the StrongRootsScope destructor. Create a specialized version of CodeBlobToOopClosure for G1 which places only those nmethods that contain pointers into the collection set on to the marked nmethods list. Reviewed-by: iveresov, tonyp
hotspot/src/share/vm/code/nmethod.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
--- a/hotspot/src/share/vm/code/nmethod.hpp	Tue Mar 27 10:29:59 2012 +0200
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Tue Mar 13 11:05:32 2012 -0700
@@ -553,7 +553,7 @@
   static void oops_do_marking_prologue();
   static void oops_do_marking_epilogue();
   static bool oops_do_marking_is_active() { return _oops_do_mark_nmethods != NULL; }
-  DEBUG_ONLY(bool test_oops_do_mark() { return _oops_do_mark_link != NULL; })
+  bool test_oops_do_mark() { return _oops_do_mark_link != NULL; }
 
   // ScopeDesc for an instruction
   ScopeDesc* scope_desc_at(address pc);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Mar 27 10:29:59 2012 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Mar 13 11:05:32 2012 -0700
@@ -4675,67 +4675,74 @@
     double start_time_ms = os::elapsedTime() * 1000.0;
     _g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
 
-    ResourceMark rm;
-    HandleMark   hm;
-
-    ReferenceProcessor*             rp = _g1h->ref_processor_stw();
-
-    G1ParScanThreadState            pss(_g1h, worker_id);
-    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
-    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
-    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
-
-    pss.set_evac_closure(&scan_evac_cl);
-    pss.set_evac_failure_closure(&evac_failure_cl);
-    pss.set_partial_scan_closure(&partial_scan_cl);
-
-    G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
-    G1ParScanPermClosure           only_scan_perm_cl(_g1h, &pss, rp);
-
-    G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
-    G1ParScanAndMarkPermClosure    scan_mark_perm_cl(_g1h, &pss, rp);
-
-    OopClosure*                    scan_root_cl = &only_scan_root_cl;
-    OopsInHeapRegionClosure*       scan_perm_cl = &only_scan_perm_cl;
-
-    if (_g1h->g1_policy()->during_initial_mark_pause()) {
-      // We also need to mark copied objects.
-      scan_root_cl = &scan_mark_root_cl;
-      scan_perm_cl = &scan_mark_perm_cl;
+    {
+      ResourceMark rm;
+      HandleMark   hm;
+
+      ReferenceProcessor*             rp = _g1h->ref_processor_stw();
+
+      G1ParScanThreadState            pss(_g1h, worker_id);
+      G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
+      G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
+      G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
+
+      pss.set_evac_closure(&scan_evac_cl);
+      pss.set_evac_failure_closure(&evac_failure_cl);
+      pss.set_partial_scan_closure(&partial_scan_cl);
+
+      G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
+      G1ParScanPermClosure           only_scan_perm_cl(_g1h, &pss, rp);
+
+      G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
+      G1ParScanAndMarkPermClosure    scan_mark_perm_cl(_g1h, &pss, rp);
+
+      OopClosure*                    scan_root_cl = &only_scan_root_cl;
+      OopsInHeapRegionClosure*       scan_perm_cl = &only_scan_perm_cl;
+
+      if (_g1h->g1_policy()->during_initial_mark_pause()) {
+        // We also need to mark copied objects.
+        scan_root_cl = &scan_mark_root_cl;
+        scan_perm_cl = &scan_mark_perm_cl;
+      }
+
+      G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
+
+      pss.start_strong_roots();
+      _g1h->g1_process_strong_roots(/* not collecting perm */ false,
+                                    SharedHeap::SO_AllClasses,
+                                    scan_root_cl,
+                                    &push_heap_rs_cl,
+                                    scan_perm_cl,
+                                    worker_id);
+      pss.end_strong_roots();
+
+      {
+        double start = os::elapsedTime();
+        G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
+        evac.do_void();
+        double elapsed_ms = (os::elapsedTime()-start)*1000.0;
+        double term_ms = pss.term_time()*1000.0;
+        _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
+        _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
+      }
+      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
+      _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
+
+      // Clean up any par-expanded rem sets.
+      HeapRegionRemSet::par_cleanup();
+
+      if (ParallelGCVerbose) {
+        MutexLocker x(stats_lock());
+        pss.print_termination_stats(worker_id);
+      }
+
+      assert(pss.refs()->is_empty(), "should be empty");
+
+      // Close the inner scope so that the ResourceMark and HandleMark
+      // destructors are executed here and are included as part of the
+      // "GC Worker Time".
     }
 
-    G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
-
-    pss.start_strong_roots();
-    _g1h->g1_process_strong_roots(/* not collecting perm */ false,
-                                  SharedHeap::SO_AllClasses,
-                                  scan_root_cl,
-                                  &push_heap_rs_cl,
-                                  scan_perm_cl,
-                                  worker_id);
-    pss.end_strong_roots();
-
-    {
-      double start = os::elapsedTime();
-      G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
-      evac.do_void();
-      double elapsed_ms = (os::elapsedTime()-start)*1000.0;
-      double term_ms = pss.term_time()*1000.0;
-      _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
-      _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
-    }
-    _g1h->g1_policy()->record_thread_age_table(pss.age_table());
-    _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
-
-    // Clean up any par-expanded rem sets.
-    HeapRegionRemSet::par_cleanup();
-
-    if (ParallelGCVerbose) {
-      MutexLocker x(stats_lock());
-      pss.print_termination_stats(worker_id);
-    }
-
-    assert(pss.refs()->is_empty(), "should be empty");
     double end_time_ms = os::elapsedTime() * 1000.0;
     _g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
   }
@@ -4743,6 +4750,67 @@
 
 // *** Common G1 Evacuation Stuff
 
+// Closures that support the filtering of CodeBlobs scanned during
+// external root scanning.
+
+// Closure applied to reference fields in code blobs (specifically nmethods)
+// to determine whether an nmethod contains references that point into
+// the collection set. Used as a predicate when walking code roots so
+// that only nmethods that point into the collection set are added to the
+// 'marked' list.
+
+class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure {
+
+  class G1PointsIntoCSOopClosure : public OopClosure {
+    G1CollectedHeap* _g1;
+    bool _points_into_cs;
+  public:
+    G1PointsIntoCSOopClosure(G1CollectedHeap* g1) :
+      _g1(g1), _points_into_cs(false) { }
+
+    bool points_into_cs() const { return _points_into_cs; }
+
+    template <class T>
+    void do_oop_nv(T* p) {
+      if (!_points_into_cs) {
+        T heap_oop = oopDesc::load_heap_oop(p);
+        if (!oopDesc::is_null(heap_oop) &&
+            _g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) {
+          _points_into_cs = true;
+        }
+      }
+    }
+
+    virtual void do_oop(oop* p)        { do_oop_nv(p); }
+    virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+  };
+
+  G1CollectedHeap* _g1;
+
+public:
+  G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) :
+    CodeBlobToOopClosure(cl, true), _g1(g1) { }
+
+  virtual void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = cb->as_nmethod_or_null();
+    if (nm != NULL && !(nm->test_oops_do_mark())) {
+      G1PointsIntoCSOopClosure predicate_cl(_g1);
+      nm->oops_do(&predicate_cl);
+
+      if (predicate_cl.points_into_cs()) {
+        // At least one of the reference fields or the oop relocations
+        // in the nmethod points into the collection set. We have to
+        // 'mark' this nmethod.
+        // Note: Revisit the following if CodeBlobToOopClosure::do_code_blob()
+        // or MarkingCodeBlobClosure::do_code_blob() change.
+        if (!nm->test_set_oops_do_mark()) {
+          do_newly_marked_nmethod(nm);
+        }
+      }
+    }
+  }
+};
+
 // This method is run in a GC worker.
 
 void
@@ -4764,7 +4832,7 @@
 
   // Walk the code cache w/o buffering, because StarTask cannot handle
   // unaligned oop locations.
-  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, /*do_marking=*/ true);
+  G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots);
 
   process_strong_roots(false, // no scoping; this is parallel code
                        collecting_perm_gen, so,
@@ -5378,25 +5446,39 @@
   rem_set()->prepare_for_younger_refs_iterate(true);
 
   assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
-  double start_par = os::elapsedTime();
-
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    // The individual threads will set their evac-failure closures.
+  double start_par_time_sec = os::elapsedTime();
+  double end_par_time_sec;
+
+  {
     StrongRootsScope srs(this);
-    if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
-    // These tasks use ShareHeap::_process_strong_tasks
-    assert(UseDynamicNumberOfGCThreads ||
-           workers()->active_workers() == workers()->total_workers(),
-           "If not dynamic should be using all the  workers");
-    workers()->run_task(&g1_par_task);
-  } else {
-    StrongRootsScope srs(this);
-    g1_par_task.set_for_termination(n_workers);
-    g1_par_task.work(0);
-  }
-
-  double par_time = (os::elapsedTime() - start_par) * 1000.0;
-  g1_policy()->record_par_time(par_time);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      // The individual threads will set their evac-failure closures.
+      if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
+      // These tasks use ShareHeap::_process_strong_tasks
+      assert(UseDynamicNumberOfGCThreads ||
+             workers()->active_workers() == workers()->total_workers(),
+             "If not dynamic should be using all the  workers");
+      workers()->run_task(&g1_par_task);
+    } else {
+      g1_par_task.set_for_termination(n_workers);
+      g1_par_task.work(0);
+    }
+    end_par_time_sec = os::elapsedTime();
+
+    // Closing the inner scope will execute the destructor
+    // for the StrongRootsScope object. We record the current
+    // elapsed time before closing the scope so that time
+    // taken for the SRS destructor is NOT included in the
+    // reported parallel time.
+  }
+
+  double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
+  g1_policy()->record_par_time(par_time_ms);
+
+  double code_root_fixup_time_ms =
+        (os::elapsedTime() - end_par_time_sec) * 1000.0;
+  g1_policy()->record_code_root_fixup_time(code_root_fixup_time_ms);
 
   set_par_threads(0);
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Mar 27 10:29:59 2012 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Mar 13 11:05:32 2012 -0700
@@ -1261,6 +1261,9 @@
     other_time_ms -= known_time;
   }
 
+  // Now subtract the time taken to fix up roots in generated code
+  other_time_ms -= _cur_collection_code_root_fixup_time_ms;
+
   // Subtract the time taken to clean the card table from the
   // current value of "other time"
   other_time_ms -= _cur_clear_ct_time_ms;
@@ -1401,10 +1404,10 @@
       print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
       print_par_stats(2, "Termination", _par_last_termination_times_ms);
       print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
-      print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
 
       for (int i = 0; i < _parallel_gc_threads; i++) {
-        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
+        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] -
+                                          _par_last_gc_worker_start_times_ms[i];
 
         double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
                                    _par_last_satb_filtering_times_ms[i] +
@@ -1413,10 +1416,13 @@
                                    _par_last_obj_copy_times_ms[i] +
                                    _par_last_termination_times_ms[i];
 
-        _par_last_gc_worker_other_times_ms[i] = _cur_collection_par_time_ms - worker_known_time;
+        _par_last_gc_worker_other_times_ms[i] = _par_last_gc_worker_times_ms[i] -
+                                                worker_known_time;
       }
-      print_par_stats(2, "GC Worker", _par_last_gc_worker_times_ms);
+
       print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
+      print_par_stats(2, "GC Worker Total", _par_last_gc_worker_times_ms);
+      print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
     } else {
       print_stats(1, "Ext Root Scanning", ext_root_scan_time);
       if (print_marking_info) {
@@ -1427,6 +1433,7 @@
       print_stats(1, "Scan RS", scan_rs_time);
       print_stats(1, "Object Copying", obj_copy_time);
     }
+    print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
     if (print_marking_info) {
       print_stats(1, "Complete CSet Marking", _mark_closure_time_ms);
     }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Mar 27 10:29:59 2012 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Mar 13 11:05:32 2012 -0700
@@ -179,6 +179,9 @@
   size_t _cur_collection_pause_used_at_start_bytes;
   size_t _cur_collection_pause_used_regions_at_start;
   double _cur_collection_par_time_ms;
+
+  double _cur_collection_code_root_fixup_time_ms;
+
   double _cur_satb_drain_time_ms;
   double _cur_clear_ct_time_ms;
   double _cur_ref_proc_time_ms;
@@ -226,8 +229,8 @@
   double* _par_last_gc_worker_times_ms;
 
   // Each workers 'other' time i.e. the elapsed time of the parallel
-  // phase of the pause minus the sum of the individual sub-phase
-  // times for a given worker thread.
+  // code executed by a worker minus the sum of the individual sub-phase
+  // times for that worker thread.
   double* _par_last_gc_worker_other_times_ms;
 
   // indicates whether we are in young or mixed GC mode
@@ -897,6 +900,10 @@
     _cur_collection_par_time_ms = ms;
   }
 
+  void record_code_root_fixup_time(double ms) {
+    _cur_collection_code_root_fixup_time_ms = ms;
+  }
+
   void record_aux_start_time(int i) {
     guarantee(i < _aux_num, "should be within range");
     _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;