# HG changeset patch # User tschatzl # Date 1575019214 -3600 # Node ID 9ee940f1de901e966c6daaa510eb9f34cf0e832e # Parent 70021dbed82bdfff495c0a93c7fbf3256684c601 8227739: Merge cost predictions for scanning cards and log buffer entries Summary: Revamp the cost predictions for the changes in JDK-8200545 and JDK-8213108. Reviewed-by: sjohanss, kbarrett diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1Analytics.cpp --- a/src/hotspot/share/gc/g1/g1Analytics.cpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1Analytics.cpp Fri Nov 29 10:20:14 2019 +0100 @@ -45,11 +45,11 @@ }; // all the same -static double young_cards_per_entry_ratio_defaults[] = { +static double young_card_merge_to_scan_ratio_defaults[] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; -static double young_only_cost_per_remset_card_ms_defaults[] = { +static double young_only_cost_per_card_scan_ms_defaults[] = { 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 }; @@ -62,7 +62,6 @@ 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0 }; - static double young_other_cost_per_region_ms_defaults[] = { 0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1 }; @@ -81,13 +80,13 @@ _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), - _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _copy_cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _non_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -109,11 +108,10 @@ _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]); // Some applications have very low rates for logging cards. _logged_cards_rate_ms_seq->add(0.0); - _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]); - _cost_scan_hcc_seq->add(0.0); - _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]); - _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]); - _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); + _young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]); + _young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]); + + _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]); _non_young_other_cost_per_region_ms_seq->add(non_young_other_cost_per_region_ms_defaults[index]); @@ -123,6 +121,10 @@ _concurrent_mark_cleanup_times_ms->add(0.20); } +bool G1Analytics::enough_samples_available(TruncatedSeq const* seq) const { + return seq->num() >= 3; +} + double G1Analytics::get_new_prediction(TruncatedSeq const* seq) const { return _predictor->get_new_prediction(seq); } @@ -166,27 +168,27 @@ _logged_cards_rate_ms_seq->add(cards_per_ms); } -void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) { - _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms); -} - -void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) { - _cost_scan_hcc_seq->add(cost_scan_hcc); -} - -void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) { +void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) { if (for_young_gc) { - _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); + _young_cost_per_card_scan_ms_seq->add(cost_per_card_ms); } else { - _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); + _mixed_cost_per_card_scan_ms_seq->add(cost_per_card_ms); } } -void G1Analytics::report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc) { +void G1Analytics::report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc) { if (for_young_gc) { - _young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + _young_cost_per_card_merge_ms_seq->add(cost_per_card_ms); } else { - _mixed_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + _mixed_cost_per_card_merge_ms_seq->add(cost_per_card_ms); + } +} + +void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_gc) { + if (for_young_gc) { + _young_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio); + } else { + _mixed_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio); } } @@ -198,7 +200,7 @@ if (mark_or_rebuild_in_progress) { _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms); } else { - _cost_per_byte_ms_seq->add(cost_per_byte_ms); + _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms); } } @@ -234,57 +236,37 @@ return get_new_prediction(_logged_cards_rate_ms_seq); } -double G1Analytics::predict_cost_per_logged_card_ms() const { - return get_new_prediction(_cost_per_logged_card_ms_seq); -} - -double G1Analytics::predict_scan_hcc_ms() const { - return get_new_prediction(_cost_scan_hcc_seq); +double G1Analytics::predict_young_card_merge_to_scan_ratio() const { + return get_new_prediction(_young_card_merge_to_scan_ratio_seq); } -double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const { - return pending_cards * predict_cost_per_logged_card_ms() + predict_scan_hcc_ms(); -} - -double G1Analytics::predict_young_cards_per_entry_ratio() const { - return get_new_prediction(_young_cards_per_entry_ratio_seq); -} - -double G1Analytics::predict_mixed_cards_per_entry_ratio() const { - if (_mixed_cards_per_entry_ratio_seq->num() < 2) { - return predict_young_cards_per_entry_ratio(); +size_t G1Analytics::predict_scan_card_num(size_t rs_length, bool for_young_gc) const { + if (for_young_gc || !enough_samples_available(_mixed_card_merge_to_scan_ratio_seq)) { + return (size_t) (rs_length * predict_young_card_merge_to_scan_ratio()); } else { - return get_new_prediction(_mixed_cards_per_entry_ratio_seq); + return (size_t) (rs_length * get_new_prediction(_mixed_card_merge_to_scan_ratio_seq)); } } -size_t G1Analytics::predict_card_num(size_t rs_length, bool for_young_gc) const { - if (for_young_gc) { - return (size_t) (rs_length * predict_young_cards_per_entry_ratio()); +double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const { + if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_merge_ms_seq)) { + return card_num * get_new_prediction(_young_cost_per_card_merge_ms_seq); } else { - return (size_t) (rs_length * predict_mixed_cards_per_entry_ratio()); + return card_num * get_new_prediction(_mixed_cost_per_card_merge_ms_seq); } } -double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const { - if (for_young_gc) { - return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); +double G1Analytics::predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const { + if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_scan_ms_seq)) { + return card_num * get_new_prediction(_young_cost_per_card_scan_ms_seq); } else { - return predict_mixed_rs_scan_time_ms(card_num); - } -} - -double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const { - if (_mixed_cost_per_remset_card_ms_seq->num() < 3) { - return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); - } else { - return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq); + return card_num * get_new_prediction(_mixed_cost_per_card_scan_ms_seq); } } double G1Analytics::predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const { - if (_cost_per_byte_ms_during_cm_seq->num() < 3) { - return (1.1 * bytes_to_copy) * get_new_prediction(_cost_per_byte_ms_seq); + if (!enough_samples_available(_cost_per_byte_ms_during_cm_seq)) { + return (1.1 * bytes_to_copy) * get_new_prediction(_copy_cost_per_byte_ms_seq); } else { return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_during_cm_seq); } @@ -294,14 +276,10 @@ if (during_concurrent_mark) { return predict_object_copy_time_ms_during_cm(bytes_to_copy); } else { - return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_seq); + return bytes_to_copy * get_new_prediction(_copy_cost_per_byte_ms_seq); } } -double G1Analytics::predict_cost_per_byte_ms() const { - return get_new_prediction(_cost_per_byte_ms_seq); -} - double G1Analytics::predict_constant_other_time_ms() const { return get_new_prediction(_constant_other_time_ms_seq); } diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1Analytics.hpp --- a/src/hotspot/share/gc/g1/g1Analytics.hpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1Analytics.hpp Fri Nov 29 10:20:14 2019 +0100 @@ -48,13 +48,21 @@ TruncatedSeq* _rs_length_diff_seq; TruncatedSeq* _concurrent_refine_rate_ms_seq; TruncatedSeq* _logged_cards_rate_ms_seq; - TruncatedSeq* _cost_per_logged_card_ms_seq; - TruncatedSeq* _cost_scan_hcc_seq; - TruncatedSeq* _young_cards_per_entry_ratio_seq; - TruncatedSeq* _mixed_cards_per_entry_ratio_seq; - TruncatedSeq* _young_only_cost_per_remset_card_ms_seq; - TruncatedSeq* _mixed_cost_per_remset_card_ms_seq; - TruncatedSeq* _cost_per_byte_ms_seq; + // The ratio between the number of merged cards and actually scanned cards, for + // young-only and mixed gcs. + TruncatedSeq* _young_card_merge_to_scan_ratio_seq; + TruncatedSeq* _mixed_card_merge_to_scan_ratio_seq; + + // The cost to scan a card during young-only and mixed gcs in ms. + TruncatedSeq* _young_cost_per_card_scan_ms_seq; + TruncatedSeq* _mixed_cost_per_card_scan_ms_seq; + + // The cost to merge a card during young-only and mixed gcs in ms. + TruncatedSeq* _young_cost_per_card_merge_ms_seq; + TruncatedSeq* _mixed_cost_per_card_merge_ms_seq; + + // The cost to copy a byte in ms. + TruncatedSeq* _copy_cost_per_byte_ms_seq; TruncatedSeq* _constant_other_time_ms_seq; TruncatedSeq* _young_other_cost_per_region_ms_seq; TruncatedSeq* _non_young_other_cost_per_region_ms_seq; @@ -72,6 +80,10 @@ double _recent_avg_pause_time_ratio; double _last_pause_time_ratio; + // Returns whether the sequence have enough samples to get a "good" prediction. + // The constant used is random but "small". + bool enough_samples_available(TruncatedSeq const* seq) const; + double get_new_prediction(TruncatedSeq const* seq) const; size_t get_new_size_prediction(TruncatedSeq const* seq) const; @@ -103,10 +115,9 @@ void report_alloc_rate_ms(double alloc_rate); void report_concurrent_refine_rate_ms(double cards_per_ms); void report_logged_cards_rate_ms(double cards_per_ms); - void report_cost_per_logged_card_ms(double cost_per_logged_card_ms); - void report_cost_scan_hcc(double cost_scan_hcc); - void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc); - void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc); + void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc); + void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc); + void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc); void report_rs_length_diff(double rs_length_diff); void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress); void report_young_other_cost_per_region_ms(double other_cost_per_region_ms); @@ -120,21 +131,14 @@ double predict_concurrent_refine_rate_ms() const; double predict_logged_cards_rate_ms() const; - double predict_cost_per_logged_card_ms() const; - - double predict_scan_hcc_ms() const; + double predict_young_card_merge_to_scan_ratio() const; - double predict_rs_update_time_ms(size_t pending_cards) const; - - double predict_young_cards_per_entry_ratio() const; + double predict_mixed_card_merge_to_scan_ratio() const; - double predict_mixed_cards_per_entry_ratio() const; - - size_t predict_card_num(size_t rs_length, bool for_young_gc) const; + size_t predict_scan_card_num(size_t rs_length, bool for_young_gc) const; - double predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const; - - double predict_mixed_rs_scan_time_ms(size_t card_num) const; + double predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const; + double predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const; double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const; @@ -153,8 +157,6 @@ size_t predict_rs_length() const; size_t predict_pending_cards() const; - double predict_cost_per_byte_ms() const; - // Add a new GC of the given duration and end time to the record. void update_recent_gc_times(double end_time_sec, double elapsed_ms); void compute_pause_time_ratio(double interval_ms, double pause_time_ms); diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1CardTable.hpp --- a/src/hotspot/share/gc/g1/g1CardTable.hpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1CardTable.hpp Fri Nov 29 10:20:14 2019 +0100 @@ -92,12 +92,16 @@ return pointer_delta(p, _byte_map, sizeof(CardValue)); } - // Mark the given card as Dirty if it is Clean. - inline void mark_clean_as_dirty(size_t card_index); + // Mark the given card as Dirty if it is Clean. Returns the number of dirtied + // cards that were not yet dirty. This result may be inaccurate as it does not + // perform the dirtying atomically. + inline size_t mark_clean_as_dirty(size_t card_index); // Change Clean cards in a (large) area on the card table as Dirty, preserving // already scanned cards. Assumes that most cards in that area are Clean. - inline void mark_region_dirty(size_t start_card_index, size_t num_cards); + // Returns the number of dirtied cards that were not yet dirty. This result may + // be inaccurate as it does not perform the dirtying atomically. + inline size_t mark_region_dirty(size_t start_card_index, size_t num_cards); // Mark the given range of cards as Scanned. All of these cards must be Dirty. inline void mark_as_scanned(size_t start_card_index, size_t num_cards); diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1CardTable.inline.hpp --- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Fri Nov 29 10:20:14 2019 +0100 @@ -33,17 +33,21 @@ return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift)); } -inline void G1CardTable::mark_clean_as_dirty(size_t card_index) { +inline size_t G1CardTable::mark_clean_as_dirty(size_t card_index) { CardValue value = _byte_map[card_index]; if (value == clean_card_val()) { _byte_map[card_index] = dirty_card_val(); + return 1; } + return 0; } -inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { +inline size_t G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned."); assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible."); + size_t result = 0; + size_t const num_chunks = num_cards / sizeof(size_t); size_t* cur_word = (size_t*)&_byte_map[start_card_index]; @@ -52,6 +56,7 @@ size_t value = *cur_word; if (value == WordAllClean) { *cur_word = WordAllDirty; + result += sizeof(value); } else if (value == WordAllDirty) { // do nothing. } else { @@ -61,12 +66,15 @@ CardValue value = *cur; if (value == clean_card_val()) { *cur = dirty_card_val(); + result++; } cur++; } } cur_word++; } + + return result; } inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) { diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1CollectionSet.cpp --- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp Fri Nov 29 10:20:14 2019 +0100 @@ -27,6 +27,7 @@ #include "gc/g1/g1CollectionSet.hpp" #include "gc/g1/g1CollectionSetCandidates.hpp" #include "gc/g1/g1CollectorState.hpp" +#include "gc/g1/g1HotCardCache.hpp" #include "gc/g1/g1ParScanThreadState.hpp" #include "gc/g1/g1Policy.hpp" #include "gc/g1/heapRegion.inline.hpp" @@ -410,7 +411,7 @@ guarantee(target_pause_time_ms > 0.0, "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms); - size_t pending_cards = _policy->pending_cards_at_gc_start(); + size_t pending_cards = _policy->pending_cards_at_gc_start() + _g1h->hot_card_cache()->num_entries(); double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards); double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0); diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Fri Nov 29 10:20:14 2019 +0100 @@ -72,6 +72,8 @@ _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine); _merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse); + _merge_rs_dirty_cards = new WorkerDataArray(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_dirty_cards, MergeRSDirtyCards); _gc_par_phases[OptMergeRS] = new WorkerDataArray(max_gc_threads, "Optional Remembered Sets (ms):"); _opt_merge_rs_merged_sparse = new WorkerDataArray(max_gc_threads, "Merged Sparse:"); @@ -80,6 +82,8 @@ _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine); _opt_merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse); + _opt_merge_rs_dirty_cards = new WorkerDataArray(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_dirty_cards, MergeRSDirtyCards); _gc_par_phases[MergeLB] = new WorkerDataArray(max_gc_threads, "Log Buffers (ms):"); if (G1HotCardCache::default_use_cache()) { @@ -304,10 +308,16 @@ // return the average time for a phase in milliseconds double G1GCPhaseTimes::average_time_ms(GCParPhases phase) { + if (_gc_par_phases[phase] == NULL) { + return 0.0; + } return _gc_par_phases[phase]->average() * 1000.0; } size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) { + if (_gc_par_phases[phase] == NULL) { + return 0; + } assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count"); return _gc_par_phases[phase]->thread_work_items(index)->sum(); } diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Fri Nov 29 10:20:14 2019 +0100 @@ -88,7 +88,8 @@ enum GCMergeRSWorkTimes { MergeRSMergedSparse, MergeRSMergedFine, - MergeRSMergedCoarse + MergeRSMergedCoarse, + MergeRSDirtyCards }; enum GCScanHRWorkItems { @@ -124,6 +125,7 @@ WorkerDataArray* _merge_rs_merged_sparse; WorkerDataArray* _merge_rs_merged_fine; WorkerDataArray* _merge_rs_merged_coarse; + WorkerDataArray* _merge_rs_dirty_cards; WorkerDataArray* _merge_hcc_dirty_cards; WorkerDataArray* _merge_hcc_skipped_cards; @@ -138,6 +140,7 @@ WorkerDataArray* _opt_merge_rs_merged_sparse; WorkerDataArray* _opt_merge_rs_merged_fine; WorkerDataArray* _opt_merge_rs_merged_coarse; + WorkerDataArray* _opt_merge_rs_dirty_cards; WorkerDataArray* _opt_scan_hr_scanned_cards; WorkerDataArray* _opt_scan_hr_scanned_blocks; diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1HotCardCache.cpp --- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp Fri Nov 29 10:20:14 2019 +0100 @@ -32,7 +32,7 @@ G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h): _g1h(g1h), _use_cache(false), _card_counts(g1h), _hot_cache(NULL), _hot_cache_size(0), _hot_cache_par_chunk_size(0), - _hot_cache_idx(0), _hot_cache_par_claimed_idx(0) + _hot_cache_idx(0), _hot_cache_par_claimed_idx(0), _cache_wrapped_around(false) {} void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) { @@ -48,6 +48,8 @@ _hot_cache_par_chunk_size = ClaimChunkSize; _hot_cache_par_claimed_idx = 0; + _cache_wrapped_around = false; + _card_counts.initialize(card_counts_storage); } } @@ -69,6 +71,11 @@ } // Otherwise, the card is hot. size_t index = Atomic::add(&_hot_cache_idx, 1u) - 1; + if (index == _hot_cache_size) { + // Can use relaxed store because all racing threads are writing the same + // value and there aren't any concurrent readers. + Atomic::store(&_cache_wrapped_around, true); + } size_t masked_index = index & (_hot_cache_size - 1); CardValue* current_ptr = _hot_cache[masked_index]; diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1HotCardCache.hpp --- a/src/hotspot/share/gc/g1/g1HotCardCache.hpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1HotCardCache.hpp Fri Nov 29 10:20:14 2019 +0100 @@ -81,6 +81,11 @@ char _pad_after[DEFAULT_CACHE_LINE_SIZE]; + // Records whether insertion overflowed the hot card cache at least once. This + // avoids the need for a separate atomic counter of how many valid entries are + // in the HCC. + volatile bool _cache_wrapped_around; + // The number of cached cards a thread claims when flushing the cache static const int ClaimChunkSize = 32; @@ -125,13 +130,17 @@ assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); if (default_use_cache()) { - reset_hot_cache_internal(); + reset_hot_cache_internal(); } } // Zeros the values in the card counts table for the given region void reset_card_counts(HeapRegion* hr); + // Number of entries in the HCC. + size_t num_entries() const { + return _cache_wrapped_around ? _hot_cache_size : _hot_cache_idx + 1; + } private: void reset_hot_cache_internal() { assert(_hot_cache != NULL, "Logic"); @@ -139,6 +148,7 @@ for (size_t i = 0; i < _hot_cache_size; i++) { _hot_cache[i] = NULL; } + _cache_wrapped_around = false; } }; diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1Policy.cpp --- a/src/hotspot/share/gc/g1/g1Policy.cpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1Policy.cpp Fri Nov 29 10:20:14 2019 +0100 @@ -329,9 +329,8 @@ const double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; const double survivor_regions_evac_time = predict_survivor_regions_evac_time(); const size_t pending_cards = _analytics->predict_pending_cards(); - const size_t scanned_cards = _analytics->predict_card_num(rs_length, true /* for_young_gc */); const double base_time_ms = - predict_base_elapsed_time_ms(pending_cards, scanned_cards) + + predict_base_elapsed_time_ms(pending_cards, rs_length) + survivor_regions_evac_time; const uint available_free_regions = _free_regions_at_end_of_collection; const uint base_free_regions = @@ -713,67 +712,58 @@ } _short_lived_surv_rate_group->start_adding_regions(); - // Do that for any other surv rate groups - double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0; - + double merge_hcc_time_ms = average_time_ms(G1GCPhaseTimes::MergeHCC); if (update_stats) { - double cost_per_logged_card = 0.0; - size_t const pending_logged_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); - if (pending_logged_cards > 0) { - cost_per_logged_card = logged_cards_processing_time() / pending_logged_cards; - _analytics->report_cost_per_logged_card_ms(cost_per_logged_card); - } - _analytics->report_cost_scan_hcc(scan_hcc_time_ms); + size_t const total_log_buffer_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeHCC, G1GCPhaseTimes::MergeHCCDirtyCards) + + p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); + // Update prediction for card merge; MergeRSDirtyCards includes the cards from the Eager Reclaim phase. + size_t const total_cards_merged = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSDirtyCards) + + p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSDirtyCards) + + total_log_buffer_cards; - size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + - p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); - size_t remset_cards_scanned = 0; - // There might have been duplicate log buffer entries in the queues which could - // increase this value beyond the cards scanned. In this case attribute all cards - // to the log buffers. - if (pending_logged_cards <= total_cards_scanned) { - remset_cards_scanned = total_cards_scanned - pending_logged_cards; + // The threshold for the number of cards in a given sampling which we consider + // large enough so that the impact from setup and other costs is negligible. + size_t const CardsNumSamplingThreshold = 10; + + if (total_cards_merged > CardsNumSamplingThreshold) { + double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) + + average_time_ms(G1GCPhaseTimes::MergeRS) + + average_time_ms(G1GCPhaseTimes::MergeHCC) + + average_time_ms(G1GCPhaseTimes::MergeLB) + + average_time_ms(G1GCPhaseTimes::OptMergeRS); + _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, this_pause_was_young_only); } - double cost_per_remset_card_ms = 0.0; - if (remset_cards_scanned > 10) { - double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) * - remset_cards_scanned / total_cards_scanned) + - average_time_ms(G1GCPhaseTimes::MergeER) + - average_time_ms(G1GCPhaseTimes::MergeRS) + - average_time_ms(G1GCPhaseTimes::OptMergeRS); + // Update prediction for card scan + size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + + p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); - cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned; - _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only); - } + if (total_cards_scanned > CardsNumSamplingThreshold) { + double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) + + average_time_ms(G1GCPhaseTimes::OptScanHR); - if (_rs_length > 0) { - double cards_per_entry_ratio = - (double) remset_cards_scanned / (double) _rs_length; - _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only); + _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, this_pause_was_young_only); } - // This is defensive. For a while _rs_length could get - // smaller than _recorded_rs_length which was causing - // rs_length_diff to get very large and mess up the RSet length - // predictions. The reason was unsafe concurrent updates to the - // _inc_cset_recorded_rs_length field which the code below guards - // against (see CR 7118202). This bug has now been fixed (see CR - // 7119027). However, I'm still worried that - // _inc_cset_recorded_rs_length might still end up somewhat - // inaccurate. The concurrent refinement thread calculates an - // RSet's length concurrently with other CR threads updating it - // which might cause it to calculate the length incorrectly (if, - // say, it's in mid-coarsening). So I'll leave in the defensive - // conditional below just in case. - size_t rs_length_diff = 0; - size_t recorded_rs_length = _collection_set->recorded_rs_length(); - if (_rs_length > recorded_rs_length) { - rs_length_diff = _rs_length - recorded_rs_length; + // Update prediction for the ratio between cards from the remembered + // sets and actually scanned cards from the remembered sets. + // Cards from the remembered sets are all cards not duplicated by cards from + // the logs. + // Due to duplicates in the log buffers, the number of actually scanned cards + // can be smaller than the cards in the log buffers. + const size_t from_rs_length_cards = (total_cards_scanned > total_log_buffer_cards) ? total_cards_scanned - total_log_buffer_cards : 0; + double merge_to_scan_ratio = 0.0; + if (total_cards_scanned > 0) { + merge_to_scan_ratio = (double) from_rs_length_cards / total_cards_scanned; } - _analytics->report_rs_length_diff((double) rs_length_diff); + _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, this_pause_was_young_only); + const size_t recorded_rs_length = _collection_set->recorded_rs_length(); + const size_t rs_length_diff = _rs_length > recorded_rs_length ? _rs_length - recorded_rs_length : 0; + _analytics->report_rs_length_diff(rs_length_diff); + + // Update prediction for copy cost per byte size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes); if (copied_bytes > 0) { @@ -842,21 +832,21 @@ // Note that _mmu_tracker->max_gc_time() returns the time in seconds. double scan_logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; - if (scan_logged_cards_time_goal_ms < scan_hcc_time_ms) { + if (scan_logged_cards_time_goal_ms < merge_hcc_time_ms) { log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)." "Logged Cards Scan time goal: %1.2fms Scan HCC time: %1.2fms", - scan_logged_cards_time_goal_ms, scan_hcc_time_ms); + scan_logged_cards_time_goal_ms, merge_hcc_time_ms); scan_logged_cards_time_goal_ms = 0; } else { - scan_logged_cards_time_goal_ms -= scan_hcc_time_ms; + scan_logged_cards_time_goal_ms -= merge_hcc_time_ms; } _pending_cards_at_prev_gc_end = _g1h->pending_card_num(); double const logged_cards_time = logged_cards_processing_time(); log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms", - scan_logged_cards_time_goal_ms, logged_cards_time, scan_hcc_time_ms); + scan_logged_cards_time_goal_ms, logged_cards_time, merge_hcc_time_ms); _g1h->concurrent_refine()->adjust(logged_cards_time, phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards), @@ -936,17 +926,17 @@ } double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards, - size_t scanned_cards) const { + size_t rs_length) const { + size_t effective_scanned_cards = _analytics->predict_scan_card_num(rs_length, collector_state()->in_young_only_phase()); return - _analytics->predict_rs_update_time_ms(pending_cards) + - _analytics->predict_rs_scan_time_ms(scanned_cards, collector_state()->in_young_only_phase()) + + _analytics->predict_card_merge_time_ms(pending_cards + rs_length, collector_state()->in_young_only_phase()) + + _analytics->predict_card_scan_time_ms(effective_scanned_cards, collector_state()->in_young_only_phase()) + _analytics->predict_constant_other_time_ms(); } double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards) const { size_t rs_length = _analytics->predict_rs_length(); - size_t card_num = _analytics->predict_card_num(rs_length, collector_state()->in_young_only_phase()); - return predict_base_elapsed_time_ms(pending_cards, card_num); + return predict_base_elapsed_time_ms(pending_cards, rs_length); } size_t G1Policy::predict_bytes_to_copy(HeapRegion* hr) const { @@ -965,13 +955,13 @@ double G1Policy::predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const { size_t rs_length = hr->rem_set()->occupied(); - // Predicting the number of cards is based on which type of GC - // we're predicting for. - size_t card_num = _analytics->predict_card_num(rs_length, for_young_gc); + size_t scan_card_num = _analytics->predict_scan_card_num(rs_length, for_young_gc); + size_t bytes_to_copy = predict_bytes_to_copy(hr); double region_elapsed_time_ms = - _analytics->predict_rs_scan_time_ms(card_num, collector_state()->in_young_only_phase()) + + _analytics->predict_card_merge_time_ms(rs_length, collector_state()->in_young_only_phase()) + + _analytics->predict_card_scan_time_ms(scan_card_num, collector_state()->in_young_only_phase()) + _analytics->predict_object_copy_time_ms(bytes_to_copy, collector_state()->mark_or_rebuild_in_progress()); // The prediction of the "other" time for this region is based diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1Policy.hpp --- a/src/hotspot/share/gc/g1/g1Policy.hpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1Policy.hpp Fri Nov 29 10:20:14 2019 +0100 @@ -140,9 +140,9 @@ _rs_length = rs_length; } - double predict_base_elapsed_time_ms(size_t pending_cards) const; - double predict_base_elapsed_time_ms(size_t pending_cards, - size_t scanned_cards) const; + double predict_base_elapsed_time_ms(size_t num_pending_cards) const; + double predict_base_elapsed_time_ms(size_t num_pending_cards, + size_t rs_length) const; size_t predict_bytes_to_copy(HeapRegion* hr) const; double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const; diff -r 70021dbed82b -r 9ee940f1de90 src/hotspot/share/gc/g1/g1RemSet.cpp --- a/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Nov 29 11:28:39 2019 +0300 +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Nov 29 10:20:14 2019 +0100 @@ -920,6 +920,8 @@ uint _merged_fine; uint _merged_coarse; + size_t _cards_dirty; + // Returns if the region contains cards we need to scan. If so, remember that // region in the current set of dirty regions. bool remember_if_interesting(uint const region_idx) { @@ -935,7 +937,8 @@ _ct(G1CollectedHeap::heap()->card_table()), _merged_sparse(0), _merged_fine(0), - _merged_coarse(0) { } + _merged_coarse(0), + _cards_dirty(0) { } void next_coarse_prt(uint const region_idx) { if (!remember_if_interesting(region_idx)) { @@ -945,7 +948,7 @@ _merged_coarse++; size_t region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; - _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion); + _cards_dirty += _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion); _scan_state->set_chunk_region_dirty(region_base_idx); } @@ -959,7 +962,7 @@ size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; BitMap::idx_t cur = bm->get_next_one_offset(0); while (cur != bm->size()) { - _ct->mark_clean_as_dirty(region_base_idx + cur); + _cards_dirty += _ct->mark_clean_as_dirty(region_base_idx + cur); _scan_state->set_chunk_dirty(region_base_idx + cur); cur = bm->get_next_one_offset(cur + 1); } @@ -975,7 +978,7 @@ size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; for (uint i = 0; i < num_cards; i++) { size_t card_idx = region_base_idx + cards[i]; - _ct->mark_clean_as_dirty(card_idx); + _cards_dirty += _ct->mark_clean_as_dirty(card_idx); _scan_state->set_chunk_dirty(card_idx); } } @@ -994,6 +997,8 @@ size_t merged_sparse() const { return _merged_sparse; } size_t merged_fine() const { return _merged_fine; } size_t merged_coarse() const { return _merged_coarse; } + + size_t cards_dirty() const { return _cards_dirty; } }; // Visitor for the remembered sets of humongous candidate regions to merge their @@ -1039,6 +1044,8 @@ size_t merged_sparse() const { return _cl.merged_sparse(); } size_t merged_fine() const { return _cl.merged_fine(); } size_t merged_coarse() const { return _cl.merged_coarse(); } + + size_t cards_dirty() const { return _cl.cards_dirty(); } }; // Visitor for the log buffer entries to merge them into the card table. @@ -1140,6 +1147,7 @@ p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards); } // Merge remembered sets of current candidates. @@ -1151,6 +1159,7 @@ p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards); } // Apply closure to log entries in the HCC.