8227739: Merge cost predictions for scanning cards and log buffer entries
Summary: Revamp the cost predictions for the changes in JDK-8200545 and JDK-8213108.
Reviewed-by: sjohanss, kbarrett
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp Fri Nov 29 10:20:14 2019 +0100
@@ -45,11 +45,11 @@
};
// all the same
-static double young_cards_per_entry_ratio_defaults[] = {
+static double young_card_merge_to_scan_ratio_defaults[] = {
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
};
-static double young_only_cost_per_remset_card_ms_defaults[] = {
+static double young_only_cost_per_card_scan_ms_defaults[] = {
0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
};
@@ -62,7 +62,6 @@
5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
};
-
static double young_other_cost_per_region_ms_defaults[] = {
0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1
};
@@ -81,13 +80,13 @@
_rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
_concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
- _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
- _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
- _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
- _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
- _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
- _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
- _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+ _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+ _mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+ _young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+ _mixed_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+ _young_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+ _mixed_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+ _copy_cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_non_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -109,11 +108,10 @@
_concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
// Some applications have very low rates for logging cards.
_logged_cards_rate_ms_seq->add(0.0);
- _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]);
- _cost_scan_hcc_seq->add(0.0);
- _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
- _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]);
- _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
+ _young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]);
+ _young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]);
+
+ _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
_constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
_young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]);
_non_young_other_cost_per_region_ms_seq->add(non_young_other_cost_per_region_ms_defaults[index]);
@@ -123,6 +121,10 @@
_concurrent_mark_cleanup_times_ms->add(0.20);
}
+bool G1Analytics::enough_samples_available(TruncatedSeq const* seq) const {
+ return seq->num() >= 3;
+}
+
double G1Analytics::get_new_prediction(TruncatedSeq const* seq) const {
return _predictor->get_new_prediction(seq);
}
@@ -166,27 +168,27 @@
_logged_cards_rate_ms_seq->add(cards_per_ms);
}
-void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) {
- _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms);
-}
-
-void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) {
- _cost_scan_hcc_seq->add(cost_scan_hcc);
-}
-
-void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) {
+void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) {
if (for_young_gc) {
- _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
+ _young_cost_per_card_scan_ms_seq->add(cost_per_card_ms);
} else {
- _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
+ _mixed_cost_per_card_scan_ms_seq->add(cost_per_card_ms);
}
}
-void G1Analytics::report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc) {
+void G1Analytics::report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc) {
if (for_young_gc) {
- _young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+ _young_cost_per_card_merge_ms_seq->add(cost_per_card_ms);
} else {
- _mixed_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+ _mixed_cost_per_card_merge_ms_seq->add(cost_per_card_ms);
+ }
+}
+
+void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_gc) {
+ if (for_young_gc) {
+ _young_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio);
+ } else {
+ _mixed_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio);
}
}
@@ -198,7 +200,7 @@
if (mark_or_rebuild_in_progress) {
_cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
} else {
- _cost_per_byte_ms_seq->add(cost_per_byte_ms);
+ _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms);
}
}
@@ -234,57 +236,37 @@
return get_new_prediction(_logged_cards_rate_ms_seq);
}
-double G1Analytics::predict_cost_per_logged_card_ms() const {
- return get_new_prediction(_cost_per_logged_card_ms_seq);
-}
-
-double G1Analytics::predict_scan_hcc_ms() const {
- return get_new_prediction(_cost_scan_hcc_seq);
+double G1Analytics::predict_young_card_merge_to_scan_ratio() const {
+ return get_new_prediction(_young_card_merge_to_scan_ratio_seq);
}
-double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const {
- return pending_cards * predict_cost_per_logged_card_ms() + predict_scan_hcc_ms();
-}
-
-double G1Analytics::predict_young_cards_per_entry_ratio() const {
- return get_new_prediction(_young_cards_per_entry_ratio_seq);
-}
-
-double G1Analytics::predict_mixed_cards_per_entry_ratio() const {
- if (_mixed_cards_per_entry_ratio_seq->num() < 2) {
- return predict_young_cards_per_entry_ratio();
+size_t G1Analytics::predict_scan_card_num(size_t rs_length, bool for_young_gc) const {
+ if (for_young_gc || !enough_samples_available(_mixed_card_merge_to_scan_ratio_seq)) {
+ return (size_t) (rs_length * predict_young_card_merge_to_scan_ratio());
} else {
- return get_new_prediction(_mixed_cards_per_entry_ratio_seq);
+ return (size_t) (rs_length * get_new_prediction(_mixed_card_merge_to_scan_ratio_seq));
}
}
-size_t G1Analytics::predict_card_num(size_t rs_length, bool for_young_gc) const {
- if (for_young_gc) {
- return (size_t) (rs_length * predict_young_cards_per_entry_ratio());
+double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const {
+ if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_merge_ms_seq)) {
+ return card_num * get_new_prediction(_young_cost_per_card_merge_ms_seq);
} else {
- return (size_t) (rs_length * predict_mixed_cards_per_entry_ratio());
+ return card_num * get_new_prediction(_mixed_cost_per_card_merge_ms_seq);
}
}
-double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const {
- if (for_young_gc) {
- return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
+double G1Analytics::predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const {
+ if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_scan_ms_seq)) {
+ return card_num * get_new_prediction(_young_cost_per_card_scan_ms_seq);
} else {
- return predict_mixed_rs_scan_time_ms(card_num);
- }
-}
-
-double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const {
- if (_mixed_cost_per_remset_card_ms_seq->num() < 3) {
- return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
- } else {
- return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq);
+ return card_num * get_new_prediction(_mixed_cost_per_card_scan_ms_seq);
}
}
double G1Analytics::predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const {
- if (_cost_per_byte_ms_during_cm_seq->num() < 3) {
- return (1.1 * bytes_to_copy) * get_new_prediction(_cost_per_byte_ms_seq);
+ if (!enough_samples_available(_cost_per_byte_ms_during_cm_seq)) {
+ return (1.1 * bytes_to_copy) * get_new_prediction(_copy_cost_per_byte_ms_seq);
} else {
return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_during_cm_seq);
}
@@ -294,14 +276,10 @@
if (during_concurrent_mark) {
return predict_object_copy_time_ms_during_cm(bytes_to_copy);
} else {
- return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_seq);
+ return bytes_to_copy * get_new_prediction(_copy_cost_per_byte_ms_seq);
}
}
-double G1Analytics::predict_cost_per_byte_ms() const {
- return get_new_prediction(_cost_per_byte_ms_seq);
-}
-
double G1Analytics::predict_constant_other_time_ms() const {
return get_new_prediction(_constant_other_time_ms_seq);
}
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp Fri Nov 29 10:20:14 2019 +0100
@@ -48,13 +48,21 @@
TruncatedSeq* _rs_length_diff_seq;
TruncatedSeq* _concurrent_refine_rate_ms_seq;
TruncatedSeq* _logged_cards_rate_ms_seq;
- TruncatedSeq* _cost_per_logged_card_ms_seq;
- TruncatedSeq* _cost_scan_hcc_seq;
- TruncatedSeq* _young_cards_per_entry_ratio_seq;
- TruncatedSeq* _mixed_cards_per_entry_ratio_seq;
- TruncatedSeq* _young_only_cost_per_remset_card_ms_seq;
- TruncatedSeq* _mixed_cost_per_remset_card_ms_seq;
- TruncatedSeq* _cost_per_byte_ms_seq;
+ // The ratio between the number of merged cards and actually scanned cards, for
+ // young-only and mixed gcs.
+ TruncatedSeq* _young_card_merge_to_scan_ratio_seq;
+ TruncatedSeq* _mixed_card_merge_to_scan_ratio_seq;
+
+ // The cost to scan a card during young-only and mixed gcs in ms.
+ TruncatedSeq* _young_cost_per_card_scan_ms_seq;
+ TruncatedSeq* _mixed_cost_per_card_scan_ms_seq;
+
+ // The cost to merge a card during young-only and mixed gcs in ms.
+ TruncatedSeq* _young_cost_per_card_merge_ms_seq;
+ TruncatedSeq* _mixed_cost_per_card_merge_ms_seq;
+
+ // The cost to copy a byte in ms.
+ TruncatedSeq* _copy_cost_per_byte_ms_seq;
TruncatedSeq* _constant_other_time_ms_seq;
TruncatedSeq* _young_other_cost_per_region_ms_seq;
TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
@@ -72,6 +80,10 @@
double _recent_avg_pause_time_ratio;
double _last_pause_time_ratio;
+ // Returns whether the sequence have enough samples to get a "good" prediction.
+ // The constant used is random but "small".
+ bool enough_samples_available(TruncatedSeq const* seq) const;
+
double get_new_prediction(TruncatedSeq const* seq) const;
size_t get_new_size_prediction(TruncatedSeq const* seq) const;
@@ -103,10 +115,9 @@
void report_alloc_rate_ms(double alloc_rate);
void report_concurrent_refine_rate_ms(double cards_per_ms);
void report_logged_cards_rate_ms(double cards_per_ms);
- void report_cost_per_logged_card_ms(double cost_per_logged_card_ms);
- void report_cost_scan_hcc(double cost_scan_hcc);
- void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
- void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc);
+ void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc);
+ void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc);
+ void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc);
void report_rs_length_diff(double rs_length_diff);
void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress);
void report_young_other_cost_per_region_ms(double other_cost_per_region_ms);
@@ -120,21 +131,14 @@
double predict_concurrent_refine_rate_ms() const;
double predict_logged_cards_rate_ms() const;
- double predict_cost_per_logged_card_ms() const;
-
- double predict_scan_hcc_ms() const;
+ double predict_young_card_merge_to_scan_ratio() const;
- double predict_rs_update_time_ms(size_t pending_cards) const;
-
- double predict_young_cards_per_entry_ratio() const;
+ double predict_mixed_card_merge_to_scan_ratio() const;
- double predict_mixed_cards_per_entry_ratio() const;
-
- size_t predict_card_num(size_t rs_length, bool for_young_gc) const;
+ size_t predict_scan_card_num(size_t rs_length, bool for_young_gc) const;
- double predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const;
-
- double predict_mixed_rs_scan_time_ms(size_t card_num) const;
+ double predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const;
+ double predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const;
double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const;
@@ -153,8 +157,6 @@
size_t predict_rs_length() const;
size_t predict_pending_cards() const;
- double predict_cost_per_byte_ms() const;
-
// Add a new GC of the given duration and end time to the record.
void update_recent_gc_times(double end_time_sec, double elapsed_ms);
void compute_pause_time_ratio(double interval_ms, double pause_time_ms);
--- a/src/hotspot/share/gc/g1/g1CardTable.hpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1CardTable.hpp Fri Nov 29 10:20:14 2019 +0100
@@ -92,12 +92,16 @@
return pointer_delta(p, _byte_map, sizeof(CardValue));
}
- // Mark the given card as Dirty if it is Clean.
- inline void mark_clean_as_dirty(size_t card_index);
+ // Mark the given card as Dirty if it is Clean. Returns the number of dirtied
+ // cards that were not yet dirty. This result may be inaccurate as it does not
+ // perform the dirtying atomically.
+ inline size_t mark_clean_as_dirty(size_t card_index);
// Change Clean cards in a (large) area on the card table as Dirty, preserving
// already scanned cards. Assumes that most cards in that area are Clean.
- inline void mark_region_dirty(size_t start_card_index, size_t num_cards);
+ // Returns the number of dirtied cards that were not yet dirty. This result may
+ // be inaccurate as it does not perform the dirtying atomically.
+ inline size_t mark_region_dirty(size_t start_card_index, size_t num_cards);
// Mark the given range of cards as Scanned. All of these cards must be Dirty.
inline void mark_as_scanned(size_t start_card_index, size_t num_cards);
--- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Fri Nov 29 10:20:14 2019 +0100
@@ -33,17 +33,21 @@
return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift));
}
-inline void G1CardTable::mark_clean_as_dirty(size_t card_index) {
+inline size_t G1CardTable::mark_clean_as_dirty(size_t card_index) {
CardValue value = _byte_map[card_index];
if (value == clean_card_val()) {
_byte_map[card_index] = dirty_card_val();
+ return 1;
}
+ return 0;
}
-inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
+inline size_t G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
+ size_t result = 0;
+
size_t const num_chunks = num_cards / sizeof(size_t);
size_t* cur_word = (size_t*)&_byte_map[start_card_index];
@@ -52,6 +56,7 @@
size_t value = *cur_word;
if (value == WordAllClean) {
*cur_word = WordAllDirty;
+ result += sizeof(value);
} else if (value == WordAllDirty) {
// do nothing.
} else {
@@ -61,12 +66,15 @@
CardValue value = *cur;
if (value == clean_card_val()) {
*cur = dirty_card_val();
+ result++;
}
cur++;
}
}
cur_word++;
}
+
+ return result;
}
inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) {
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp Fri Nov 29 10:20:14 2019 +0100
@@ -27,6 +27,7 @@
#include "gc/g1/g1CollectionSet.hpp"
#include "gc/g1/g1CollectionSetCandidates.hpp"
#include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1HotCardCache.hpp"
#include "gc/g1/g1ParScanThreadState.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/g1/heapRegion.inline.hpp"
@@ -410,7 +411,7 @@
guarantee(target_pause_time_ms > 0.0,
"target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
- size_t pending_cards = _policy->pending_cards_at_gc_start();
+ size_t pending_cards = _policy->pending_cards_at_gc_start() + _g1h->hot_card_cache()->num_entries();
double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards);
double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Fri Nov 29 10:20:14 2019 +0100
@@ -72,6 +72,8 @@
_gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine);
_merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
_gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse);
+ _merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:");
+ _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_dirty_cards, MergeRSDirtyCards);
_gc_par_phases[OptMergeRS] = new WorkerDataArray<double>(max_gc_threads, "Optional Remembered Sets (ms):");
_opt_merge_rs_merged_sparse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Sparse:");
@@ -80,6 +82,8 @@
_gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine);
_opt_merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
_gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse);
+ _opt_merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:");
+ _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_dirty_cards, MergeRSDirtyCards);
_gc_par_phases[MergeLB] = new WorkerDataArray<double>(max_gc_threads, "Log Buffers (ms):");
if (G1HotCardCache::default_use_cache()) {
@@ -304,10 +308,16 @@
// return the average time for a phase in milliseconds
double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
+ if (_gc_par_phases[phase] == NULL) {
+ return 0.0;
+ }
return _gc_par_phases[phase]->average() * 1000.0;
}
size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) {
+ if (_gc_par_phases[phase] == NULL) {
+ return 0;
+ }
assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count");
return _gc_par_phases[phase]->thread_work_items(index)->sum();
}
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Fri Nov 29 10:20:14 2019 +0100
@@ -88,7 +88,8 @@
enum GCMergeRSWorkTimes {
MergeRSMergedSparse,
MergeRSMergedFine,
- MergeRSMergedCoarse
+ MergeRSMergedCoarse,
+ MergeRSDirtyCards
};
enum GCScanHRWorkItems {
@@ -124,6 +125,7 @@
WorkerDataArray<size_t>* _merge_rs_merged_sparse;
WorkerDataArray<size_t>* _merge_rs_merged_fine;
WorkerDataArray<size_t>* _merge_rs_merged_coarse;
+ WorkerDataArray<size_t>* _merge_rs_dirty_cards;
WorkerDataArray<size_t>* _merge_hcc_dirty_cards;
WorkerDataArray<size_t>* _merge_hcc_skipped_cards;
@@ -138,6 +140,7 @@
WorkerDataArray<size_t>* _opt_merge_rs_merged_sparse;
WorkerDataArray<size_t>* _opt_merge_rs_merged_fine;
WorkerDataArray<size_t>* _opt_merge_rs_merged_coarse;
+ WorkerDataArray<size_t>* _opt_merge_rs_dirty_cards;
WorkerDataArray<size_t>* _opt_scan_hr_scanned_cards;
WorkerDataArray<size_t>* _opt_scan_hr_scanned_blocks;
--- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp Fri Nov 29 10:20:14 2019 +0100
@@ -32,7 +32,7 @@
G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h):
_g1h(g1h), _use_cache(false), _card_counts(g1h),
_hot_cache(NULL), _hot_cache_size(0), _hot_cache_par_chunk_size(0),
- _hot_cache_idx(0), _hot_cache_par_claimed_idx(0)
+ _hot_cache_idx(0), _hot_cache_par_claimed_idx(0), _cache_wrapped_around(false)
{}
void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) {
@@ -48,6 +48,8 @@
_hot_cache_par_chunk_size = ClaimChunkSize;
_hot_cache_par_claimed_idx = 0;
+ _cache_wrapped_around = false;
+
_card_counts.initialize(card_counts_storage);
}
}
@@ -69,6 +71,11 @@
}
// Otherwise, the card is hot.
size_t index = Atomic::add(&_hot_cache_idx, 1u) - 1;
+ if (index == _hot_cache_size) {
+ // Can use relaxed store because all racing threads are writing the same
+ // value and there aren't any concurrent readers.
+ Atomic::store(&_cache_wrapped_around, true);
+ }
size_t masked_index = index & (_hot_cache_size - 1);
CardValue* current_ptr = _hot_cache[masked_index];
--- a/src/hotspot/share/gc/g1/g1HotCardCache.hpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.hpp Fri Nov 29 10:20:14 2019 +0100
@@ -81,6 +81,11 @@
char _pad_after[DEFAULT_CACHE_LINE_SIZE];
+ // Records whether insertion overflowed the hot card cache at least once. This
+ // avoids the need for a separate atomic counter of how many valid entries are
+ // in the HCC.
+ volatile bool _cache_wrapped_around;
+
// The number of cached cards a thread claims when flushing the cache
static const int ClaimChunkSize = 32;
@@ -125,13 +130,17 @@
assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
if (default_use_cache()) {
- reset_hot_cache_internal();
+ reset_hot_cache_internal();
}
}
// Zeros the values in the card counts table for the given region
void reset_card_counts(HeapRegion* hr);
+ // Number of entries in the HCC.
+ size_t num_entries() const {
+ return _cache_wrapped_around ? _hot_cache_size : _hot_cache_idx + 1;
+ }
private:
void reset_hot_cache_internal() {
assert(_hot_cache != NULL, "Logic");
@@ -139,6 +148,7 @@
for (size_t i = 0; i < _hot_cache_size; i++) {
_hot_cache[i] = NULL;
}
+ _cache_wrapped_around = false;
}
};
--- a/src/hotspot/share/gc/g1/g1Policy.cpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp Fri Nov 29 10:20:14 2019 +0100
@@ -329,9 +329,8 @@
const double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
const double survivor_regions_evac_time = predict_survivor_regions_evac_time();
const size_t pending_cards = _analytics->predict_pending_cards();
- const size_t scanned_cards = _analytics->predict_card_num(rs_length, true /* for_young_gc */);
const double base_time_ms =
- predict_base_elapsed_time_ms(pending_cards, scanned_cards) +
+ predict_base_elapsed_time_ms(pending_cards, rs_length) +
survivor_regions_evac_time;
const uint available_free_regions = _free_regions_at_end_of_collection;
const uint base_free_regions =
@@ -713,67 +712,58 @@
}
_short_lived_surv_rate_group->start_adding_regions();
- // Do that for any other surv rate groups
- double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0;
-
+ double merge_hcc_time_ms = average_time_ms(G1GCPhaseTimes::MergeHCC);
if (update_stats) {
- double cost_per_logged_card = 0.0;
- size_t const pending_logged_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
- if (pending_logged_cards > 0) {
- cost_per_logged_card = logged_cards_processing_time() / pending_logged_cards;
- _analytics->report_cost_per_logged_card_ms(cost_per_logged_card);
- }
- _analytics->report_cost_scan_hcc(scan_hcc_time_ms);
+ size_t const total_log_buffer_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeHCC, G1GCPhaseTimes::MergeHCCDirtyCards) +
+ p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
+ // Update prediction for card merge; MergeRSDirtyCards includes the cards from the Eager Reclaim phase.
+ size_t const total_cards_merged = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSDirtyCards) +
+ p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSDirtyCards) +
+ total_log_buffer_cards;
- size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
- p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
- size_t remset_cards_scanned = 0;
- // There might have been duplicate log buffer entries in the queues which could
- // increase this value beyond the cards scanned. In this case attribute all cards
- // to the log buffers.
- if (pending_logged_cards <= total_cards_scanned) {
- remset_cards_scanned = total_cards_scanned - pending_logged_cards;
+ // The threshold for the number of cards in a given sampling which we consider
+ // large enough so that the impact from setup and other costs is negligible.
+ size_t const CardsNumSamplingThreshold = 10;
+
+ if (total_cards_merged > CardsNumSamplingThreshold) {
+ double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
+ average_time_ms(G1GCPhaseTimes::MergeRS) +
+ average_time_ms(G1GCPhaseTimes::MergeHCC) +
+ average_time_ms(G1GCPhaseTimes::MergeLB) +
+ average_time_ms(G1GCPhaseTimes::OptMergeRS);
+ _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, this_pause_was_young_only);
}
- double cost_per_remset_card_ms = 0.0;
- if (remset_cards_scanned > 10) {
- double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) *
- remset_cards_scanned / total_cards_scanned) +
- average_time_ms(G1GCPhaseTimes::MergeER) +
- average_time_ms(G1GCPhaseTimes::MergeRS) +
- average_time_ms(G1GCPhaseTimes::OptMergeRS);
+ // Update prediction for card scan
+ size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
+ p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
- cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned;
- _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only);
- }
+ if (total_cards_scanned > CardsNumSamplingThreshold) {
+ double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) +
+ average_time_ms(G1GCPhaseTimes::OptScanHR);
- if (_rs_length > 0) {
- double cards_per_entry_ratio =
- (double) remset_cards_scanned / (double) _rs_length;
- _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only);
+ _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, this_pause_was_young_only);
}
- // This is defensive. For a while _rs_length could get
- // smaller than _recorded_rs_length which was causing
- // rs_length_diff to get very large and mess up the RSet length
- // predictions. The reason was unsafe concurrent updates to the
- // _inc_cset_recorded_rs_length field which the code below guards
- // against (see CR 7118202). This bug has now been fixed (see CR
- // 7119027). However, I'm still worried that
- // _inc_cset_recorded_rs_length might still end up somewhat
- // inaccurate. The concurrent refinement thread calculates an
- // RSet's length concurrently with other CR threads updating it
- // which might cause it to calculate the length incorrectly (if,
- // say, it's in mid-coarsening). So I'll leave in the defensive
- // conditional below just in case.
- size_t rs_length_diff = 0;
- size_t recorded_rs_length = _collection_set->recorded_rs_length();
- if (_rs_length > recorded_rs_length) {
- rs_length_diff = _rs_length - recorded_rs_length;
+ // Update prediction for the ratio between cards from the remembered
+ // sets and actually scanned cards from the remembered sets.
+ // Cards from the remembered sets are all cards not duplicated by cards from
+ // the logs.
+ // Due to duplicates in the log buffers, the number of actually scanned cards
+ // can be smaller than the cards in the log buffers.
+ const size_t from_rs_length_cards = (total_cards_scanned > total_log_buffer_cards) ? total_cards_scanned - total_log_buffer_cards : 0;
+ double merge_to_scan_ratio = 0.0;
+ if (total_cards_scanned > 0) {
+ merge_to_scan_ratio = (double) from_rs_length_cards / total_cards_scanned;
}
- _analytics->report_rs_length_diff((double) rs_length_diff);
+ _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, this_pause_was_young_only);
+ const size_t recorded_rs_length = _collection_set->recorded_rs_length();
+ const size_t rs_length_diff = _rs_length > recorded_rs_length ? _rs_length - recorded_rs_length : 0;
+ _analytics->report_rs_length_diff(rs_length_diff);
+
+ // Update prediction for copy cost per byte
size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
if (copied_bytes > 0) {
@@ -842,21 +832,21 @@
// Note that _mmu_tracker->max_gc_time() returns the time in seconds.
double scan_logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
- if (scan_logged_cards_time_goal_ms < scan_hcc_time_ms) {
+ if (scan_logged_cards_time_goal_ms < merge_hcc_time_ms) {
log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)."
"Logged Cards Scan time goal: %1.2fms Scan HCC time: %1.2fms",
- scan_logged_cards_time_goal_ms, scan_hcc_time_ms);
+ scan_logged_cards_time_goal_ms, merge_hcc_time_ms);
scan_logged_cards_time_goal_ms = 0;
} else {
- scan_logged_cards_time_goal_ms -= scan_hcc_time_ms;
+ scan_logged_cards_time_goal_ms -= merge_hcc_time_ms;
}
_pending_cards_at_prev_gc_end = _g1h->pending_card_num();
double const logged_cards_time = logged_cards_processing_time();
log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
- scan_logged_cards_time_goal_ms, logged_cards_time, scan_hcc_time_ms);
+ scan_logged_cards_time_goal_ms, logged_cards_time, merge_hcc_time_ms);
_g1h->concurrent_refine()->adjust(logged_cards_time,
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards),
@@ -936,17 +926,17 @@
}
double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards,
- size_t scanned_cards) const {
+ size_t rs_length) const {
+ size_t effective_scanned_cards = _analytics->predict_scan_card_num(rs_length, collector_state()->in_young_only_phase());
return
- _analytics->predict_rs_update_time_ms(pending_cards) +
- _analytics->predict_rs_scan_time_ms(scanned_cards, collector_state()->in_young_only_phase()) +
+ _analytics->predict_card_merge_time_ms(pending_cards + rs_length, collector_state()->in_young_only_phase()) +
+ _analytics->predict_card_scan_time_ms(effective_scanned_cards, collector_state()->in_young_only_phase()) +
_analytics->predict_constant_other_time_ms();
}
double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards) const {
size_t rs_length = _analytics->predict_rs_length();
- size_t card_num = _analytics->predict_card_num(rs_length, collector_state()->in_young_only_phase());
- return predict_base_elapsed_time_ms(pending_cards, card_num);
+ return predict_base_elapsed_time_ms(pending_cards, rs_length);
}
size_t G1Policy::predict_bytes_to_copy(HeapRegion* hr) const {
@@ -965,13 +955,13 @@
double G1Policy::predict_region_elapsed_time_ms(HeapRegion* hr,
bool for_young_gc) const {
size_t rs_length = hr->rem_set()->occupied();
- // Predicting the number of cards is based on which type of GC
- // we're predicting for.
- size_t card_num = _analytics->predict_card_num(rs_length, for_young_gc);
+ size_t scan_card_num = _analytics->predict_scan_card_num(rs_length, for_young_gc);
+
size_t bytes_to_copy = predict_bytes_to_copy(hr);
double region_elapsed_time_ms =
- _analytics->predict_rs_scan_time_ms(card_num, collector_state()->in_young_only_phase()) +
+ _analytics->predict_card_merge_time_ms(rs_length, collector_state()->in_young_only_phase()) +
+ _analytics->predict_card_scan_time_ms(scan_card_num, collector_state()->in_young_only_phase()) +
_analytics->predict_object_copy_time_ms(bytes_to_copy, collector_state()->mark_or_rebuild_in_progress());
// The prediction of the "other" time for this region is based
--- a/src/hotspot/share/gc/g1/g1Policy.hpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp Fri Nov 29 10:20:14 2019 +0100
@@ -140,9 +140,9 @@
_rs_length = rs_length;
}
- double predict_base_elapsed_time_ms(size_t pending_cards) const;
- double predict_base_elapsed_time_ms(size_t pending_cards,
- size_t scanned_cards) const;
+ double predict_base_elapsed_time_ms(size_t num_pending_cards) const;
+ double predict_base_elapsed_time_ms(size_t num_pending_cards,
+ size_t rs_length) const;
size_t predict_bytes_to_copy(HeapRegion* hr) const;
double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const;
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Nov 29 10:20:14 2019 +0100
@@ -920,6 +920,8 @@
uint _merged_fine;
uint _merged_coarse;
+ size_t _cards_dirty;
+
// Returns if the region contains cards we need to scan. If so, remember that
// region in the current set of dirty regions.
bool remember_if_interesting(uint const region_idx) {
@@ -935,7 +937,8 @@
_ct(G1CollectedHeap::heap()->card_table()),
_merged_sparse(0),
_merged_fine(0),
- _merged_coarse(0) { }
+ _merged_coarse(0),
+ _cards_dirty(0) { }
void next_coarse_prt(uint const region_idx) {
if (!remember_if_interesting(region_idx)) {
@@ -945,7 +948,7 @@
_merged_coarse++;
size_t region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion;
- _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion);
+ _cards_dirty += _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion);
_scan_state->set_chunk_region_dirty(region_base_idx);
}
@@ -959,7 +962,7 @@
size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion;
BitMap::idx_t cur = bm->get_next_one_offset(0);
while (cur != bm->size()) {
- _ct->mark_clean_as_dirty(region_base_idx + cur);
+ _cards_dirty += _ct->mark_clean_as_dirty(region_base_idx + cur);
_scan_state->set_chunk_dirty(region_base_idx + cur);
cur = bm->get_next_one_offset(cur + 1);
}
@@ -975,7 +978,7 @@
size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion;
for (uint i = 0; i < num_cards; i++) {
size_t card_idx = region_base_idx + cards[i];
- _ct->mark_clean_as_dirty(card_idx);
+ _cards_dirty += _ct->mark_clean_as_dirty(card_idx);
_scan_state->set_chunk_dirty(card_idx);
}
}
@@ -994,6 +997,8 @@
size_t merged_sparse() const { return _merged_sparse; }
size_t merged_fine() const { return _merged_fine; }
size_t merged_coarse() const { return _merged_coarse; }
+
+ size_t cards_dirty() const { return _cards_dirty; }
};
// Visitor for the remembered sets of humongous candidate regions to merge their
@@ -1039,6 +1044,8 @@
size_t merged_sparse() const { return _cl.merged_sparse(); }
size_t merged_fine() const { return _cl.merged_fine(); }
size_t merged_coarse() const { return _cl.merged_coarse(); }
+
+ size_t cards_dirty() const { return _cl.cards_dirty(); }
};
// Visitor for the log buffer entries to merge them into the card table.
@@ -1140,6 +1147,7 @@
p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse);
p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine);
p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse);
+ p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards);
}
// Merge remembered sets of current candidates.
@@ -1151,6 +1159,7 @@
p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse);
p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine);
p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse);
+ p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards);
}
// Apply closure to log entries in the HCC.