7068240: G1: Long "parallel other time" and "ext root scanning" when running specific benchmark
Summary: In root processing, move the scanning of the reference processor's discovered lists to before RSet updating and scanning. When scanning the reference processor's discovered lists, use a buffering closure so that the time spent copying any reference object is correctly attributed. Also removed a couple of unused and irrelevant timers.
Reviewed-by: ysr, jmasa
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Sat Jul 23 10:42:20 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Aug 01 10:04:28 2011 -0700
@@ -4834,6 +4834,7 @@
scan_perm_cl,
i);
pss.end_strong_roots();
+
{
double start = os::elapsedTime();
G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
@@ -4890,17 +4891,29 @@
&eager_scan_code_roots,
&buf_scan_perm);
- // Finish up any enqueued closure apps.
+ // Now the ref_processor roots.
+ if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
+ // We need to treat the discovered reference lists as roots and
+ // keep entries (which are added by the marking threads) on them
+ // live until they can be processed at the end of marking.
+ ref_processor()->weak_oops_do(&buf_scan_non_heap_roots);
+ ref_processor()->oops_do(&buf_scan_non_heap_roots);
+ }
+
+ // Finish up any enqueued closure apps (attributed as object copy time).
buf_scan_non_heap_roots.done();
buf_scan_perm.done();
+
double ext_roots_end = os::elapsedTime();
+
g1_policy()->reset_obj_copy_time(worker_i);
- double obj_copy_time_sec =
- buf_scan_non_heap_roots.closure_app_seconds() +
- buf_scan_perm.closure_app_seconds();
+ double obj_copy_time_sec = buf_scan_perm.closure_app_seconds() +
+ buf_scan_non_heap_roots.closure_app_seconds();
g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
+
double ext_root_time_ms =
((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
+
g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
// Scan strong roots in mark stack.
@@ -4910,21 +4923,11 @@
double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
- // XXX What should this be doing in the parallel case?
- g1_policy()->record_collection_pause_end_CH_strong_roots();
// Now scan the complement of the collection set.
if (scan_rs != NULL) {
g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
}
- // Finish with the ref_processor roots.
- if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
- // We need to treat the discovered reference lists as roots and
- // keep entries (which are added by the marking threads) on them
- // live until they can be processed at the end of marking.
- ref_processor()->weak_oops_do(scan_non_heap_roots);
- ref_processor()->oops_do(scan_non_heap_roots);
- }
- g1_policy()->record_collection_pause_end_G1_strong_roots();
+
_process_strong_tasks->all_tasks_completed();
}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Sat Jul 23 10:42:20 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Aug 01 10:04:28 2011 -0700
@@ -393,8 +393,7 @@
// Perform finalization stuff on all allocation regions.
void retire_all_alloc_regions();
- // The number of regions allocated to hold humongous objects.
- int _num_humongous_regions;
+ // The young region list.
YoungList* _young_list;
// The current policy object for the collector.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Sat Jul 23 10:42:20 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Mon Aug 01 10:04:28 2011 -0700
@@ -134,13 +134,10 @@
G1CollectorPolicy::G1CollectorPolicy() :
_parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
- ? ParallelGCThreads : 1),
-
+ ? ParallelGCThreads : 1),
_n_pauses(0),
- _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
- _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
- _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+ _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
_recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
_recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
_recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
@@ -1050,18 +1047,6 @@
void G1CollectorPolicy::record_concurrent_pause_end() {
}
-void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() {
- _cur_CH_strong_roots_end_sec = os::elapsedTime();
- _cur_CH_strong_roots_dur_ms =
- (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0;
-}
-
-void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() {
- _cur_G1_strong_roots_end_sec = os::elapsedTime();
- _cur_G1_strong_roots_dur_ms =
- (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0;
-}
-
template<class T>
T sum_of(T* sum_arr, int start, int n, int N) {
T sum = (T)0;
@@ -1183,7 +1168,6 @@
double end_time_sec = os::elapsedTime();
double elapsed_ms = _last_pause_time_ms;
bool parallel = G1CollectedHeap::use_parallel_gc_threads();
- double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0;
size_t rs_size =
_cur_collection_pause_used_regions_at_start - collection_set_size();
size_t cur_used_bytes = _g1->used();
@@ -1256,14 +1240,52 @@
_n_pauses++;
+ double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
+ double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+ double update_rs_time = avg_value(_par_last_update_rs_times_ms);
+ double update_rs_processed_buffers =
+ sum_of_values(_par_last_update_rs_processed_buffers);
+ double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
+ double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
+ double termination_time = avg_value(_par_last_termination_times_ms);
+
+ double parallel_known_time = update_rs_time +
+ ext_root_scan_time +
+ mark_stack_scan_time +
+ scan_rs_time +
+ obj_copy_time +
+ termination_time;
+
+ double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time;
+
+ PauseSummary* summary = _summary;
+
if (update_stats) {
- _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
- _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
- _recent_evac_times_ms->add(evac_ms);
+ _recent_rs_scan_times_ms->add(scan_rs_time);
_recent_pause_times_ms->add(elapsed_ms);
-
_recent_rs_sizes->add(rs_size);
+ MainBodySummary* body_summary = summary->main_body_summary();
+ guarantee(body_summary != NULL, "should not be null!");
+
+ if (_satb_drain_time_set)
+ body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+ else
+ body_summary->record_satb_drain_time_ms(0.0);
+
+ body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
+ body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+ body_summary->record_update_rs_time_ms(update_rs_time);
+ body_summary->record_scan_rs_time_ms(scan_rs_time);
+ body_summary->record_obj_copy_time_ms(obj_copy_time);
+ if (parallel) {
+ body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
+ body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
+ body_summary->record_termination_time_ms(termination_time);
+ body_summary->record_parallel_other_time_ms(parallel_other_time);
+ }
+ body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
+
// We exempt parallel collection from this check because Alloc Buffer
// fragmentation can produce negative collections. Same with evac
// failure.
@@ -1328,56 +1350,12 @@
gclog_or_tty->print_cr(" Recording collection pause(%d)", _n_pauses);
}
- PauseSummary* summary = _summary;
-
- double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
- double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
- double update_rs_time = avg_value(_par_last_update_rs_times_ms);
- double update_rs_processed_buffers =
- sum_of_values(_par_last_update_rs_processed_buffers);
- double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
- double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
- double termination_time = avg_value(_par_last_termination_times_ms);
-
- double parallel_other_time = _cur_collection_par_time_ms -
- (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
- scan_rs_time + obj_copy_time + termination_time);
- if (update_stats) {
- MainBodySummary* body_summary = summary->main_body_summary();
- guarantee(body_summary != NULL, "should not be null!");
-
- if (_satb_drain_time_set)
- body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
- else
- body_summary->record_satb_drain_time_ms(0.0);
- body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
- body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
- body_summary->record_update_rs_time_ms(update_rs_time);
- body_summary->record_scan_rs_time_ms(scan_rs_time);
- body_summary->record_obj_copy_time_ms(obj_copy_time);
- if (parallel) {
- body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
- body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
- body_summary->record_termination_time_ms(termination_time);
- body_summary->record_parallel_other_time_ms(parallel_other_time);
- }
- body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
- }
-
if (G1PolicyVerbose > 1) {
gclog_or_tty->print_cr(" ET: %10.6f ms (avg: %10.6f ms)\n"
- " CH Strong: %10.6f ms (avg: %10.6f ms)\n"
- " G1 Strong: %10.6f ms (avg: %10.6f ms)\n"
- " Evac: %10.6f ms (avg: %10.6f ms)\n"
" ET-RS: %10.6f ms (avg: %10.6f ms)\n"
" |RS|: " SIZE_FORMAT,
elapsed_ms, recent_avg_time_for_pauses_ms(),
- _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(),
- _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(),
- evac_ms, recent_avg_time_for_evac_ms(),
- scan_rs_time,
- recent_avg_time_for_pauses_ms() -
- recent_avg_time_for_G1_strong_ms(),
+ scan_rs_time, recent_avg_time_for_rs_scan_ms(),
rs_size);
gclog_or_tty->print_cr(" Used at start: " SIZE_FORMAT"K"
@@ -1438,7 +1416,7 @@
}
print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms);
- print_stats(2, "Other", parallel_other_time);
+ print_stats(2, "Parallel Other", parallel_other_time);
print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
} else {
print_stats(1, "Update RS", update_rs_time);
@@ -1967,38 +1945,27 @@
}
double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
- if (_recent_pause_times_ms->num() == 0) return (double) MaxGCPauseMillis;
- else return _recent_pause_times_ms->avg();
-}
-
-double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() {
- if (_recent_CH_strong_roots_times_ms->num() == 0)
- return (double)MaxGCPauseMillis/3.0;
- else return _recent_CH_strong_roots_times_ms->avg();
+ if (_recent_pause_times_ms->num() == 0) {
+ return (double) MaxGCPauseMillis;
+ }
+ return _recent_pause_times_ms->avg();
}
-double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() {
- if (_recent_G1_strong_roots_times_ms->num() == 0)
+double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() {
+ if (_recent_rs_scan_times_ms->num() == 0) {
return (double)MaxGCPauseMillis/3.0;
- else return _recent_G1_strong_roots_times_ms->avg();
-}
-
-double G1CollectorPolicy::recent_avg_time_for_evac_ms() {
- if (_recent_evac_times_ms->num() == 0) return (double)MaxGCPauseMillis/3.0;
- else return _recent_evac_times_ms->avg();
+ }
+ return _recent_rs_scan_times_ms->avg();
}
int G1CollectorPolicy::number_of_recent_gcs() {
- assert(_recent_CH_strong_roots_times_ms->num() ==
- _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync");
- assert(_recent_G1_strong_roots_times_ms->num() ==
- _recent_evac_times_ms->num(), "Sequence out of sync");
- assert(_recent_evac_times_ms->num() ==
+ assert(_recent_rs_scan_times_ms->num() ==
_recent_pause_times_ms->num(), "Sequence out of sync");
assert(_recent_pause_times_ms->num() ==
_recent_CS_bytes_used_before->num(), "Sequence out of sync");
assert(_recent_CS_bytes_used_before->num() ==
_recent_CS_bytes_surviving->num(), "Sequence out of sync");
+
return _recent_pause_times_ms->num();
}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Sat Jul 23 10:42:20 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Mon Aug 01 10:04:28 2011 -0700
@@ -129,15 +129,9 @@
jlong _num_cc_clears; // number of times the card count cache has been cleared
#endif
- double _cur_CH_strong_roots_end_sec;
- double _cur_CH_strong_roots_dur_ms;
- double _cur_G1_strong_roots_end_sec;
- double _cur_G1_strong_roots_dur_ms;
+ // Statistics for recent GC pauses. See below for how indexed.
+ TruncatedSeq* _recent_rs_scan_times_ms;
- // Statistics for recent GC pauses. See below for how indexed.
- TruncatedSeq* _recent_CH_strong_roots_times_ms;
- TruncatedSeq* _recent_G1_strong_roots_times_ms;
- TruncatedSeq* _recent_evac_times_ms;
// These exclude marking times.
TruncatedSeq* _recent_pause_times_ms;
TruncatedSeq* _recent_gc_times_ms;
@@ -692,17 +686,11 @@
// The average time in ms per collection pause, averaged over recent pauses.
double recent_avg_time_for_pauses_ms();
- // The average time in ms for processing CollectedHeap strong roots, per
- // collection pause, averaged over recent pauses.
- double recent_avg_time_for_CH_strong_ms();
-
- // The average time in ms for processing the G1 remembered set, per
- // pause, averaged over recent pauses.
- double recent_avg_time_for_G1_strong_ms();
-
- // The average time in ms for "evacuating followers", per pause, averaged
- // over recent pauses.
- double recent_avg_time_for_evac_ms();
+ // The average time in ms for RS scanning, per pause, averaged
+ // over recent pauses. (Note the RS scanning time for a pause
+ // is itself an average of the RS scanning time for each worker
+ // thread.)
+ double recent_avg_time_for_rs_scan_ms();
// The number of "recent" GCs recorded in the number sequences
int number_of_recent_gcs();
@@ -887,9 +875,6 @@
virtual void record_concurrent_pause();
virtual void record_concurrent_pause_end();
- virtual void record_collection_pause_end_CH_strong_roots();
- virtual void record_collection_pause_end_G1_strong_roots();
-
virtual void record_collection_pause_end();
void print_heap_transition();