# HG changeset patch # User tschatzl # Date 1561628912 -7200 # Node ID 3e31a8beaae45072bdade73d4ff7842ca09a4c85 # Parent d58442b8abc1f27c5dc4631e457875ede48872ef 8213108: Improve work distribution during remembered set scan Summary: Before scanning the heap for roots into the collection set, merge them into a single remembered set (card table) and do work distribution based on location like other collectors do. Reviewed-by: kbarrett, lkorinth diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1Analytics.cpp --- a/src/hotspot/share/gc/g1/g1Analytics.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Analytics.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -38,7 +38,7 @@ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; -static double cost_per_card_ms_defaults[] = { +static double cost_per_log_buffer_entry_ms_defaults[] = { 0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015 }; @@ -47,7 +47,7 @@ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; -static double cost_per_entry_ms_defaults[] = { +static double young_only_cost_per_remset_card_ms_defaults[] = { 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 }; @@ -77,12 +77,12 @@ _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _prev_collection_pause_end_ms(0.0), _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_log_buffer_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -101,10 +101,10 @@ int index = MIN2(ParallelGCThreads - 1, 7u); _rs_length_diff_seq->add(rs_length_diff_defaults[index]); - _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]); + _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms_defaults[index]); _cost_scan_hcc_seq->add(0.0); _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]); - _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]); + _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]); _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]); @@ -158,19 +158,19 @@ (pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms; } -void G1Analytics::report_cost_per_card_ms(double cost_per_card_ms) { - _cost_per_card_ms_seq->add(cost_per_card_ms); +void G1Analytics::report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms) { + _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms); } void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) { _cost_scan_hcc_seq->add(cost_scan_hcc); } -void G1Analytics::report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc) { +void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) { if (for_young_gc) { - _cost_per_entry_ms_seq->add(cost_per_entry_ms); + _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); } else { - _mixed_cost_per_entry_ms_seq->add(cost_per_entry_ms); + _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); } } @@ -222,8 +222,8 @@ return get_new_prediction(_alloc_rate_ms_seq); } -double G1Analytics::predict_cost_per_card_ms() const { - return get_new_prediction(_cost_per_card_ms_seq); +double G1Analytics::predict_cost_per_log_buffer_entry_ms() const { + return get_new_prediction(_cost_per_log_buffer_entry_ms_seq); } double G1Analytics::predict_scan_hcc_ms() const { @@ -231,7 +231,7 @@ } double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const { - return pending_cards * predict_cost_per_card_ms() + predict_scan_hcc_ms(); + return pending_cards * predict_cost_per_log_buffer_entry_ms() + predict_scan_hcc_ms(); } double G1Analytics::predict_young_cards_per_entry_ratio() const { @@ -256,17 +256,17 @@ double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const { if (for_young_gc) { - return card_num * get_new_prediction(_cost_per_entry_ms_seq); + return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); } else { return predict_mixed_rs_scan_time_ms(card_num); } } double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const { - if (_mixed_cost_per_entry_ms_seq->num() < 3) { - return card_num * get_new_prediction(_cost_per_entry_ms_seq); + if (_mixed_cost_per_remset_card_ms_seq->num() < 3) { + return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); } else { - return card_num * get_new_prediction(_mixed_cost_per_entry_ms_seq); + return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq); } } diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1Analytics.hpp --- a/src/hotspot/share/gc/g1/g1Analytics.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Analytics.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -46,12 +46,12 @@ double _prev_collection_pause_end_ms; TruncatedSeq* _rs_length_diff_seq; - TruncatedSeq* _cost_per_card_ms_seq; + TruncatedSeq* _cost_per_log_buffer_entry_ms_seq; TruncatedSeq* _cost_scan_hcc_seq; TruncatedSeq* _young_cards_per_entry_ratio_seq; TruncatedSeq* _mixed_cards_per_entry_ratio_seq; - TruncatedSeq* _cost_per_entry_ms_seq; - TruncatedSeq* _mixed_cost_per_entry_ms_seq; + TruncatedSeq* _young_only_cost_per_remset_card_ms_seq; + TruncatedSeq* _mixed_cost_per_remset_card_ms_seq; TruncatedSeq* _cost_per_byte_ms_seq; TruncatedSeq* _constant_other_time_ms_seq; TruncatedSeq* _young_other_cost_per_region_ms_seq; @@ -99,9 +99,9 @@ void report_concurrent_mark_remark_times_ms(double ms); void report_concurrent_mark_cleanup_times_ms(double ms); void report_alloc_rate_ms(double alloc_rate); - void report_cost_per_card_ms(double cost_per_card_ms); + void report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms); void report_cost_scan_hcc(double cost_scan_hcc); - void report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc); + void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc); void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc); void report_rs_length_diff(double rs_length_diff); void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress); @@ -116,7 +116,7 @@ double predict_alloc_rate_ms() const; int num_alloc_rate_ms() const; - double predict_cost_per_card_ms() const; + double predict_cost_per_log_buffer_entry_ms() const; double predict_scan_hcc_ms() const; diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CardTable.cpp --- a/src/hotspot/share/gc/g1/g1CardTable.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CardTable.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -30,28 +30,6 @@ #include "runtime/atomic.hpp" #include "runtime/orderAccess.hpp" -bool G1CardTable::mark_card_deferred(size_t card_index) { - CardValue val = _byte_map[card_index]; - // It's already processed - if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { - return false; - } - - // Cached bit can be installed either on a clean card or on a claimed card. - CardValue new_val = val; - if (val == clean_card_val()) { - new_val = deferred_card_val(); - } else { - if (val & claimed_card_val()) { - new_val = val | deferred_card_val(); - } - } - if (new_val != val) { - Atomic::cmpxchg(new_val, &_byte_map[card_index], val); - } - return true; -} - void G1CardTable::g1_mark_as_young(const MemRegion& mr) { CardValue *const first = byte_for(mr.start()); CardValue *const last = byte_after(mr.last()); diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CardTable.hpp --- a/src/hotspot/share/gc/g1/g1CardTable.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CardTable.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -44,55 +44,65 @@ virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled); }; -class G1CardTable: public CardTable { +class G1CardTable : public CardTable { friend class VMStructs; friend class G1CardTableChangedListener; G1CardTableChangedListener _listener; +public: enum G1CardValues { - g1_young_gen = CT_MR_BS_last_reserved << 1 + g1_young_gen = CT_MR_BS_last_reserved << 1, + + // During evacuation we use the card table to consolidate the cards we need to + // scan for roots onto the card table from the various sources. Further it is + // used to record already completely scanned cards to avoid re-scanning them + // when incrementally evacuating the old gen regions of a collection set. + // This means that already scanned cards should be preserved. + // + // The merge at the start of each evacuation round simply sets cards to dirty + // that are clean; scanned cards are set to 0x1. + // + // This means that the LSB determines what to do with the card during evacuation + // given the following possible values: + // + // 11111111 - clean, do not scan + // 00000001 - already scanned, do not scan + // 00000000 - dirty, needs to be scanned. + // + g1_card_already_scanned = 0x1 }; -public: + static const size_t WordAllClean = SIZE_MAX; + static const size_t WordAllDirty = 0; + + STATIC_ASSERT(BitsPerByte == 8); + static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned; + G1CardTable(MemRegion whole_heap): CardTable(whole_heap, /* scanned concurrently */ true), _listener() { _listener.set_card_table(this); } - bool is_card_dirty(size_t card_index) { - return _byte_map[card_index] == dirty_card_val(); - } static CardValue g1_young_card_val() { return g1_young_gen; } -/* - Claimed and deferred bits are used together in G1 during the evacuation - pause. These bits can have the following state transitions: - 1. The claimed bit can be put over any other card state. Except that - the "dirty -> dirty and claimed" transition is checked for in - G1 code and is not used. - 2. Deferred bit can be set only if the previous state of the card - was either clean or claimed. mark_card_deferred() is wait-free. - We do not care if the operation is be successful because if - it does not it will only result in duplicate entry in the update - buffer because of the "cache-miss". So it's not worth spinning. - */ - - bool is_card_claimed(size_t card_index) { - CardValue val = _byte_map[card_index]; - return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); - } - - inline void set_card_claimed(size_t card_index); - void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN; void g1_mark_as_young(const MemRegion& mr); - bool mark_card_deferred(size_t card_index); + size_t index_for_cardvalue(CardValue const* p) const { + return pointer_delta(p, _byte_map, sizeof(CardValue)); + } + + // Mark the given card as Dirty if it is Clean. + inline void mark_clean_as_dirty(size_t card_index); - bool is_card_deferred(size_t card_index) { - CardValue val = _byte_map[card_index]; - return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val(); - } + // Change Clean cards in a (large) area on the card table as Dirty, preserving + // already scanned cards. Assumes that most cards in that area are Clean. + inline void mark_region_dirty(size_t start_card_index, size_t num_cards); + + // Mark the given range of cards as Scanned. All of these cards must be Dirty. + inline void mark_as_scanned(size_t start_card_index, size_t num_cards); + + inline uint region_idx_for(CardValue* p); static size_t compute_size(size_t mem_region_size_in_words) { size_t number_of_slots = (mem_region_size_in_words / card_size_in_words); diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CardTable.inline.hpp --- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -26,15 +26,58 @@ #define SHARE_GC_G1_G1CARDTABLE_INLINE_HPP #include "gc/g1/g1CardTable.hpp" +#include "gc/g1/heapRegion.hpp" -void G1CardTable::set_card_claimed(size_t card_index) { - jbyte val = _byte_map[card_index]; - if (val == clean_card_val()) { - val = (jbyte)claimed_card_val(); - } else { - val |= (jbyte)claimed_card_val(); +inline uint G1CardTable::region_idx_for(CardValue* p) { + size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue)); + return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift)); +} + +inline void G1CardTable::mark_clean_as_dirty(size_t card_index) { + CardValue value = _byte_map[card_index]; + if (value == clean_card_val()) { + _byte_map[card_index] = dirty_card_val(); } - _byte_map[card_index] = val; } -#endif // SHARE_GC_G1_G1CARDTABLE_INLINE_HPP +inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { + assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned."); + assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible."); + + size_t const num_chunks = num_cards / sizeof(size_t); + + size_t* cur_word = (size_t*)&_byte_map[start_card_index]; + size_t* const end_word_map = cur_word + num_chunks; + while (cur_word < end_word_map) { + size_t value = *cur_word; + if (value == WordAllClean) { + *cur_word = WordAllDirty; + } else if (value == WordAllDirty) { + // do nothing. + } else { + // There is a mix of cards in there. Tread slowly. + CardValue* cur = (CardValue*)cur_word; + for (size_t i = 0; i < sizeof(size_t); i++) { + CardValue value = *cur; + if (value == clean_card_val()) { + *cur = dirty_card_val(); + } + cur++; + } + } + cur_word++; + } +} + +inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) { + CardValue* start = &_byte_map[start_card_index]; + CardValue* const end = start + num_cards; + while (start < end) { + CardValue value = *start; + assert(value == dirty_card_val(), + "Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start), p2i(end)); + *start++ = g1_card_already_scanned; + } +} + +#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */ diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CollectedHeap.cpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -1954,7 +1954,7 @@ n_completed_buffers++; } assert(dcqs.completed_buffers_num() == 0, "Completed buffers exist!"); - phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers, G1GCPhaseTimes::UpdateRSProcessedBuffers); + phase_times()->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_i, n_completed_buffers, G1GCPhaseTimes::MergeLBProcessedBuffers); } // Computes the sum of the storage used by the various regions. @@ -2238,8 +2238,8 @@ _collection_set.iterate(cl); } -void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, uint worker_id) { - _collection_set.iterate_incremental_part_from(cl, worker_id, workers()->active_workers()); +void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, HeapRegionClaimer* hr_claimer, uint worker_id) { + _collection_set.iterate_incremental_part_from(cl, hr_claimer, worker_id, workers()->active_workers()); } HeapWord* G1CollectedHeap::block_start(const void* addr) const { @@ -2630,8 +2630,6 @@ size_t _total_humongous; size_t _candidate_humongous; - G1DirtyCardQueue _dcq; - bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const { assert(region->is_starts_humongous(), "Must start a humongous object"); @@ -2691,8 +2689,7 @@ public: RegisterRegionsWithRegionAttrTableClosure() : _total_humongous(0), - _candidate_humongous(0), - _dcq(&G1BarrierSet::dirty_card_queue_set()) { + _candidate_humongous(0) { } virtual bool do_heap_region(HeapRegion* r) { @@ -2707,49 +2704,9 @@ uint rindex = r->hrm_index(); g1h->set_humongous_reclaim_candidate(rindex, is_candidate); if (is_candidate) { + g1h->register_humongous_region_with_region_attr(rindex); _candidate_humongous++; - g1h->register_humongous_region_with_region_attr(rindex); - // Is_candidate already filters out humongous object with large remembered sets. - // If we have a humongous object with a few remembered sets, we simply flush these - // remembered set entries into the DCQS. That will result in automatic - // re-evaluation of their remembered set entries during the following evacuation - // phase. - if (!r->rem_set()->is_empty()) { - guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries), - "Found a not-small remembered set here. This is inconsistent with previous assumptions."); - G1CardTable* ct = g1h->card_table(); - HeapRegionRemSetIterator hrrs(r->rem_set()); - size_t card_index; - while (hrrs.has_next(card_index)) { - CardTable::CardValue* card_ptr = ct->byte_for_index(card_index); - // The remembered set might contain references to already freed - // regions. Filter out such entries to avoid failing card table - // verification. - if (g1h->is_in(ct->addr_for(card_ptr))) { - if (*card_ptr != G1CardTable::dirty_card_val()) { - *card_ptr = G1CardTable::dirty_card_val(); - _dcq.enqueue(card_ptr); - } - } - } - assert(hrrs.n_yielded() == r->rem_set()->occupied(), - "Remembered set hash maps out of sync, cur: " SIZE_FORMAT " entries, next: " SIZE_FORMAT " entries", - hrrs.n_yielded(), r->rem_set()->occupied()); - // We should only clear the card based remembered set here as we will not - // implicitly rebuild anything else during eager reclaim. Note that at the moment - // (and probably never) we do not enter this path if there are other kind of - // remembered sets for this region. - r->rem_set()->clear_locked(true /* only_cardset */); - // Clear_locked() above sets the state to Empty. However we want to continue - // collecting remembered set entries for humongous regions that were not - // reclaimed. - r->rem_set()->set_state_complete(); -#ifdef ASSERT - G1HeapRegionAttr region_attr = g1h->region_attr(oop(r->bottom())); - assert(region_attr.needs_remset_update(), "must be"); -#endif - } - assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty."); + // We will later handle the remembered sets of these regions. } else { g1h->register_region_with_region_attr(r); } @@ -2760,8 +2717,6 @@ size_t total_humongous() const { return _total_humongous; } size_t candidate_humongous() const { return _candidate_humongous; } - - void flush_rem_set_entries() { _dcq.flush(); } }; void G1CollectedHeap::register_regions_with_region_attr() { @@ -2774,9 +2729,6 @@ cl.total_humongous(), cl.candidate_humongous()); _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0; - - // Finally flush all remembered set entries to re-check into the global DCQS. - cl.flush_rem_set_entries(); } #ifndef PRODUCT @@ -3071,7 +3023,7 @@ workers()->active_workers(), collection_set()->young_region_length(), collection_set()->optional_region_length()); - pre_evacuate_collection_set(evacuation_info); + pre_evacuate_collection_set(evacuation_info, &per_thread_states); // Actually do the work... evacuate_initial_collection_set(&per_thread_states); @@ -3104,9 +3056,7 @@ double sample_end_time_sec = os::elapsedTime(); double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS; - size_t total_cards_scanned = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScanRSScannedCards) + - phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::ScanRSScannedCards); - policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc); + policy()->record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc); } verify_after_young_collection(verify_type); @@ -3580,7 +3530,7 @@ phase_times()->record_merge_pss_time_ms((os::elapsedTime() - merge_pss_time_start) * 1000.0); } -void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info) { +void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) { _expand_heap_after_alloc_failure = true; _evacuation_failed = false; @@ -3591,10 +3541,15 @@ // Initialize the GC alloc regions. _allocator->init_gc_alloc_regions(evacuation_info); + { + Ticks start = Ticks::now(); + rem_set()->prepare_for_scan_heap_roots(); + phase_times()->record_prepare_heap_roots_time_ms((Ticks::now() - start).seconds() * 1000.0); + } + register_regions_with_region_attr(); assert(_verifier->check_region_attr_table(), "Inconsistency in the region attributes table."); - rem_set()->prepare_for_scan_rem_set(); _preserved_marks_set.assert_empty(); #if COMPILER2_OR_JVMCI @@ -3696,8 +3651,8 @@ void scan_roots(G1ParScanThreadState* pss, uint worker_id) { _root_processor->evacuate_roots(pss, worker_id); - _g1h->rem_set()->update_rem_set(pss, worker_id); - _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ObjCopy, G1GCPhaseTimes::CodeRoots); + _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ObjCopy); + _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::ObjCopy); } void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) { @@ -3724,6 +3679,14 @@ }; void G1CollectedHeap::evacuate_initial_collection_set(G1ParScanThreadStateSet* per_thread_states) { + G1GCPhaseTimes* p = phase_times(); + + { + Ticks start = Ticks::now(); + rem_set()->merge_heap_roots(false /* remset_only */, G1GCPhaseTimes::MergeRS); + p->record_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0); + } + Tickspan task_time; const uint num_workers = workers()->active_workers(); @@ -3738,7 +3701,6 @@ } Tickspan total_processing = Ticks::now() - start_processing; - G1GCPhaseTimes* p = phase_times(); p->record_initial_evac_time(task_time.seconds() * 1000.0); p->record_or_add_code_root_fixup_time((total_processing - task_time).seconds() * 1000.0); } @@ -3746,7 +3708,8 @@ class G1EvacuateOptionalRegionsTask : public G1EvacuateRegionsBaseTask { void scan_roots(G1ParScanThreadState* pss, uint worker_id) { - _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::OptObjCopy, G1GCPhaseTimes::OptCodeRoots); + _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptObjCopy); + _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::OptObjCopy); } void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) { @@ -3782,8 +3745,6 @@ void G1CollectedHeap::evacuate_optional_collection_set(G1ParScanThreadStateSet* per_thread_states) { const double gc_start_time_ms = phase_times()->cur_collection_start_sec() * 1000.0; - Ticks start = Ticks::now(); - while (!evacuation_failed() && _collection_set.optional_region_length() > 0) { double time_used_ms = os::elapsedTime() * 1000.0 - gc_start_time_ms; @@ -3796,18 +3757,24 @@ break; } - evacuate_next_optional_regions(per_thread_states); + { + Ticks start = Ticks::now(); + rem_set()->merge_heap_roots(true /* remset_only */, G1GCPhaseTimes::OptMergeRS); + phase_times()->record_or_add_optional_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0); + } + + { + Ticks start = Ticks::now(); + evacuate_next_optional_regions(per_thread_states); + phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0); + } } _collection_set.abandon_optional_collection_set(per_thread_states); - - phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0); } void G1CollectedHeap::post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) { - // Also cleans the card table from temporary duplicate detection information used - // during UpdateRS/ScanRS. - rem_set()->cleanup_after_scan_rem_set(); + rem_set()->cleanup_after_scan_heap_roots(); // Process any discovered reference objects - we have // to do this _before_ we retire the GC alloc regions diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CollectedHeap.hpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -78,7 +78,6 @@ class G1HotCardCache; class G1RemSet; class G1YoungRemSetSamplingThread; -class HeapRegionRemSetIterator; class G1ConcurrentMark; class G1ConcurrentMarkThread; class G1ConcurrentRefine; @@ -757,7 +756,7 @@ void evacuate_next_optional_regions(G1ParScanThreadStateSet* per_thread_states); public: - void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info); + void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss); void post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss); void expand_heap_after_young_collection(); @@ -1115,7 +1114,8 @@ public: - inline G1HeapRegionAttr region_attr(const void* obj); + inline G1HeapRegionAttr region_attr(const void* obj) const; + inline G1HeapRegionAttr region_attr(uint idx) const; // Return "TRUE" iff the given object address is in the reserved // region of g1. @@ -1182,7 +1182,12 @@ // Starts the iteration so that the start regions of a given worker id over the // set active_workers are evenly spread across the set of collection set regions // to be iterated. - void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id); + // The variant with the HeapRegionClaimer guarantees that the closure will be + // applied to a particular region exactly once. + void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id) { + collection_set_iterate_increment_from(blk, NULL, worker_id); + } + void collection_set_iterate_increment_from(HeapRegionClosure *blk, HeapRegionClaimer* hr_claimer, uint worker_id); // Returns the HeapRegion that contains addr. addr must not be NULL. template diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -163,10 +163,14 @@ return _region_attr.is_in_cset_or_humongous((HeapWord*)obj); } -G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) { +G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) const { return _region_attr.at((HeapWord*)addr); } +G1HeapRegionAttr G1CollectedHeap::region_attr(uint idx) const { + return _region_attr.get_by_index(idx); +} + void G1CollectedHeap::register_humongous_region_with_region_attr(uint index) { _region_attr.set_humongous(index, region_at(index)->rem_set()->is_tracked()); } @@ -177,7 +181,7 @@ void G1CollectedHeap::register_old_region_with_region_attr(HeapRegion* r) { _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked()); - _rem_set->prepare_for_scan_rem_set(r->hrm_index()); + _rem_set->prepare_for_scan_heap_roots(r->hrm_index()); } void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) { diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CollectionSet.cpp --- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -217,10 +217,13 @@ } } -void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const { +void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, + HeapRegionClaimer* hr_claimer, + uint worker_id, + uint total_workers) const { assert_at_safepoint(); - size_t len = _collection_set_cur_length - _inc_part_start; + size_t len = increment_length(); if (len == 0) { return; } @@ -229,9 +232,12 @@ size_t cur_pos = start_pos; do { - HeapRegion* r = _g1h->region_at(_collection_set_regions[cur_pos + _inc_part_start]); - bool result = cl->do_heap_region(r); - guarantee(!result, "Must not cancel iteration"); + uint region_idx = _collection_set_regions[cur_pos + _inc_part_start]; + if (hr_claimer == NULL || hr_claimer->claim_region(region_idx)) { + HeapRegion* r = _g1h->region_at(region_idx); + bool result = cl->do_heap_region(r); + guarantee(!result, "Must not cancel iteration"); + } cur_pos++; if (cur_pos == len) { diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1CollectionSet.hpp --- a/src/hotspot/share/gc/g1/g1CollectionSet.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -36,6 +36,7 @@ class G1Policy; class G1SurvivorRegions; class HeapRegion; +class HeapRegionClaimer; class HeapRegionClosure; // The collection set. @@ -279,7 +280,12 @@ // Iterate over the current collection set increment applying the given HeapRegionClosure // from a starting position determined by the given worker id. - void iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const; + void iterate_incremental_part_from(HeapRegionClosure* cl, HeapRegionClaimer* hr_claimer, uint worker_id, uint total_workers) const; + + // Returns the length of the current increment in number of regions. + size_t increment_length() const { return _collection_set_cur_length - _inc_part_start; } + // Returns the length of the whole current collection set in number of regions + size_t cur_length() const { return _collection_set_cur_length; } // Iterate over the entire collection set (all increments calculated so far), applying // the given HeapRegionClosure on all of them. diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp --- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -206,7 +206,7 @@ // available buffers near green_zone value. When yellow_size is // large we don't want to allow a full step to accumulate before // doing any processing, as that might lead to significantly more - // than green_zone buffers to be processed by update_rs. + // than green_zone buffers to be processed during scanning. step = MIN2(step, ParallelGCThreads / 2.0); } size_t activate_offset = static_cast(ceil(step * (worker_i + 1))); @@ -322,18 +322,18 @@ } static size_t calc_new_green_zone(size_t green, - double update_rs_time, - size_t update_rs_processed_buffers, + double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { // Adjust green zone based on whether we're meeting the time goal. // Limit to max_green_zone. const double inc_k = 1.1, dec_k = 0.9; - if (update_rs_time > goal_ms) { + if (log_buffer_scan_time > goal_ms) { if (green > 0) { green = static_cast(green * dec_k); } - } else if (update_rs_time < goal_ms && - update_rs_processed_buffers > green) { + } else if (log_buffer_scan_time < goal_ms && + processed_log_buffers > green) { green = static_cast(MAX2(green * inc_k, green + 1.0)); green = MIN2(green, max_green_zone); } @@ -350,20 +350,20 @@ return MIN2(yellow + (yellow - green), max_red_zone); } -void G1ConcurrentRefine::update_zones(double update_rs_time, - size_t update_rs_processed_buffers, +void G1ConcurrentRefine::update_zones(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { log_trace( CTRL_TAGS )("Updating Refinement Zones: " - "update_rs time: %.3fms, " - "update_rs buffers: " SIZE_FORMAT ", " - "update_rs goal time: %.3fms", - update_rs_time, - update_rs_processed_buffers, + "log buffer scan time: %.3fms, " + "processed buffers: " SIZE_FORMAT ", " + "goal time: %.3fms", + log_buffer_scan_time, + processed_log_buffers, goal_ms); _green_zone = calc_new_green_zone(_green_zone, - update_rs_time, - update_rs_processed_buffers, + log_buffer_scan_time, + processed_log_buffers, goal_ms); _yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size); _red_zone = calc_new_red_zone(_green_zone, _yellow_zone); @@ -376,13 +376,13 @@ _green_zone, _yellow_zone, _red_zone); } -void G1ConcurrentRefine::adjust(double update_rs_time, - size_t update_rs_processed_buffers, +void G1ConcurrentRefine::adjust(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); if (G1UseAdaptiveConcRefinement) { - update_zones(update_rs_time, update_rs_processed_buffers, goal_ms); + update_zones(log_buffer_scan_time, processed_log_buffers, goal_ms); // Change the barrier params if (max_num_threads() == 0) { diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp --- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -97,8 +97,8 @@ size_t min_yellow_zone_size); // Update green/yellow/red zone values based on how well goals are being met. - void update_zones(double update_rs_time, - size_t update_rs_processed_buffers, + void update_zones(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms); static uint worker_id_offset(); @@ -115,7 +115,7 @@ void stop(); // Adjust refinement thresholds based on work done during the pause and the goal time. - void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms); + void adjust(double log_buffer_scan_time, size_t processed_log_buffers, double goal_ms); size_t activation_threshold(uint worker_id) const; size_t deactivation_threshold(uint worker_id) const; diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1EvacFailure.cpp --- a/src/hotspot/share/gc/g1/g1EvacFailure.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1EvacFailure.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -37,15 +37,19 @@ #include "oops/compressedOops.inline.hpp" #include "oops/oop.inline.hpp" -class UpdateRSetDeferred : public BasicOopIterateClosure { +class UpdateLogBuffersDeferred : public BasicOopIterateClosure { private: G1CollectedHeap* _g1h; G1DirtyCardQueue* _dcq; G1CardTable* _ct; + // Remember the last enqueued card to avoid enqueuing the same card over and over; + // since we only ever handle a card once, this is sufficient. + size_t _last_enqueued_card; + public: - UpdateRSetDeferred(G1DirtyCardQueue* dcq) : - _g1h(G1CollectedHeap::heap()), _dcq(dcq), _ct(_g1h->card_table()) {} + UpdateLogBuffersDeferred(G1DirtyCardQueue* dcq) : + _g1h(G1CollectedHeap::heap()), _dcq(dcq), _ct(_g1h->card_table()), _last_enqueued_card(SIZE_MAX) {} virtual void do_oop(narrowOop* p) { do_oop_work(p); } virtual void do_oop( oop* p) { do_oop_work(p); } @@ -62,8 +66,9 @@ return; } size_t card_index = _ct->index_for(p); - if (_ct->mark_card_deferred(card_index)) { + if (card_index != _last_enqueued_card) { _dcq->enqueue(_ct->byte_for_index(card_index)); + _last_enqueued_card = card_index; } } }; @@ -73,21 +78,21 @@ G1ConcurrentMark* _cm; HeapRegion* _hr; size_t _marked_bytes; - UpdateRSetDeferred* _update_rset_cl; + UpdateLogBuffersDeferred* _log_buffer_cl; bool _during_initial_mark; uint _worker_id; HeapWord* _last_forwarded_object_end; public: RemoveSelfForwardPtrObjClosure(HeapRegion* hr, - UpdateRSetDeferred* update_rset_cl, + UpdateLogBuffersDeferred* log_buffer_cl, bool during_initial_mark, uint worker_id) : _g1h(G1CollectedHeap::heap()), _cm(_g1h->concurrent_mark()), _hr(hr), _marked_bytes(0), - _update_rset_cl(update_rset_cl), + _log_buffer_cl(log_buffer_cl), _during_initial_mark(during_initial_mark), _worker_id(worker_id), _last_forwarded_object_end(hr->bottom()) { } @@ -144,7 +149,7 @@ // The problem is that, if evacuation fails, we might have // remembered set entries missing given that we skipped cards on // the collection set. So, we'll recreate such entries now. - obj->oop_iterate(_update_rset_cl); + obj->oop_iterate(_log_buffer_cl); HeapWord* obj_end = obj_addr + obj_size; _last_forwarded_object_end = obj_end; @@ -193,25 +198,22 @@ class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure { G1CollectedHeap* _g1h; uint _worker_id; - HeapRegionClaimer* _hrclaimer; G1DirtyCardQueue _dcq; - UpdateRSetDeferred _update_rset_cl; + UpdateLogBuffersDeferred _log_buffer_cl; public: - RemoveSelfForwardPtrHRClosure(uint worker_id, - HeapRegionClaimer* hrclaimer) : + RemoveSelfForwardPtrHRClosure(uint worker_id) : _g1h(G1CollectedHeap::heap()), _worker_id(worker_id), - _hrclaimer(hrclaimer), _dcq(&_g1h->dirty_card_queue_set()), - _update_rset_cl(&_dcq){ + _log_buffer_cl(&_dcq) { } size_t remove_self_forward_ptr_by_walking_hr(HeapRegion* hr, bool during_initial_mark) { RemoveSelfForwardPtrObjClosure rspc(hr, - &_update_rset_cl, + &_log_buffer_cl, during_initial_mark, _worker_id); hr->object_iterate(&rspc); @@ -225,26 +227,24 @@ assert(!hr->is_pinned(), "Unexpected pinned region at index %u", hr->hrm_index()); assert(hr->in_collection_set(), "bad CS"); - if (_hrclaimer->claim_region(hr->hrm_index())) { - if (hr->evacuation_failed()) { - hr->clear_index_in_opt_cset(); + if (hr->evacuation_failed()) { + hr->clear_index_in_opt_cset(); - bool during_initial_mark = _g1h->collector_state()->in_initial_mark_gc(); - bool during_conc_mark = _g1h->collector_state()->mark_or_rebuild_in_progress(); + bool during_initial_mark = _g1h->collector_state()->in_initial_mark_gc(); + bool during_conc_mark = _g1h->collector_state()->mark_or_rebuild_in_progress(); - hr->note_self_forwarding_removal_start(during_initial_mark, + hr->note_self_forwarding_removal_start(during_initial_mark, during_conc_mark); - _g1h->verifier()->check_bitmaps("Self-Forwarding Ptr Removal", hr); + _g1h->verifier()->check_bitmaps("Self-Forwarding Ptr Removal", hr); - hr->reset_bot(); - - size_t live_bytes = remove_self_forward_ptr_by_walking_hr(hr, during_initial_mark); + hr->reset_bot(); - hr->rem_set()->clean_strong_code_roots(hr); - hr->rem_set()->clear_locked(true); + size_t live_bytes = remove_self_forward_ptr_by_walking_hr(hr, during_initial_mark); - hr->note_self_forwarding_removal_end(live_bytes); - } + hr->rem_set()->clean_strong_code_roots(hr); + hr->rem_set()->clear_locked(true); + + hr->note_self_forwarding_removal_end(live_bytes); } return false; } @@ -256,7 +256,7 @@ _hrclaimer(_g1h->workers()->active_workers()) { } void G1ParRemoveSelfForwardPtrsTask::work(uint worker_id) { - RemoveSelfForwardPtrHRClosure rsfp_cl(worker_id, &_hrclaimer); + RemoveSelfForwardPtrHRClosure rsfp_cl(worker_id); - _g1h->collection_set_iterate_increment_from(&rsfp_cl, worker_id); + _g1h->collection_set_iterate_increment_from(&rsfp_cl, &_hrclaimer, worker_id); } diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -66,14 +66,30 @@ _gc_par_phases[WaitForStrongCLD] = new WorkerDataArray(max_gc_threads, "Wait For Strong CLD (ms):"); _gc_par_phases[WeakCLDRoots] = new WorkerDataArray(max_gc_threads, "Weak CLD Roots (ms):"); - _gc_par_phases[UpdateRS] = new WorkerDataArray(max_gc_threads, "Update RS (ms):"); + _gc_par_phases[MergeRS] = new WorkerDataArray(max_gc_threads, "Remembered Sets (ms):"); + _merge_rs_merged_sparse = new WorkerDataArray(max_gc_threads, "Merged Sparse:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_sparse, MergeRSMergedSparse); + _merge_rs_merged_fine = new WorkerDataArray(max_gc_threads, "Merged Fine:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine); + _merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse); + + _gc_par_phases[OptMergeRS] = new WorkerDataArray(max_gc_threads, "Optional Remembered Sets (ms):"); + _opt_merge_rs_merged_sparse = new WorkerDataArray(max_gc_threads, "Merged Sparse:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_sparse, MergeRSMergedSparse); + _opt_merge_rs_merged_fine = new WorkerDataArray(max_gc_threads, "Merged Fine:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine); + _opt_merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse); + + _gc_par_phases[MergeLB] = new WorkerDataArray(max_gc_threads, "Log Buffers (ms):"); if (G1HotCardCache::default_use_cache()) { - _gc_par_phases[ScanHCC] = new WorkerDataArray(max_gc_threads, "Scan HCC (ms):"); + _gc_par_phases[MergeHCC] = new WorkerDataArray(max_gc_threads, "Hot Card Cache (ms):"); } else { - _gc_par_phases[ScanHCC] = NULL; + _gc_par_phases[MergeHCC] = NULL; } - _gc_par_phases[ScanRS] = new WorkerDataArray(max_gc_threads, "Scan RS (ms):"); - _gc_par_phases[OptScanRS] = new WorkerDataArray(max_gc_threads, "Optional Scan RS (ms):"); + _gc_par_phases[ScanHR] = new WorkerDataArray(max_gc_threads, "Scan Heap Roots (ms):"); + _gc_par_phases[OptScanHR] = new WorkerDataArray(max_gc_threads, "Optional Scan Heap Roots (ms):"); _gc_par_phases[CodeRoots] = new WorkerDataArray(max_gc_threads, "Code Root Scan (ms):"); _gc_par_phases[OptCodeRoots] = new WorkerDataArray(max_gc_threads, "Optional Code Root Scan (ms):"); _gc_par_phases[ObjCopy] = new WorkerDataArray(max_gc_threads, "Object Copy (ms):"); @@ -84,30 +100,30 @@ _gc_par_phases[GCWorkerEnd] = new WorkerDataArray(max_gc_threads, "GC Worker End (ms):"); _gc_par_phases[Other] = new WorkerDataArray(max_gc_threads, "GC Worker Other (ms):"); - _scan_rs_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); - _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_scanned_cards, ScanRSScannedCards); - _scan_rs_claimed_cards = new WorkerDataArray(max_gc_threads, "Claimed Cards:"); - _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_claimed_cards, ScanRSClaimedCards); - _scan_rs_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); - _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_skipped_cards, ScanRSSkippedCards); + _scan_hr_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); + _gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_scanned_cards, ScanHRScannedCards); + _scan_hr_scanned_blocks = new WorkerDataArray(max_gc_threads, "Scanned Blocks:"); + _gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_scanned_blocks, ScanHRScannedBlocks); + _scan_hr_claimed_chunks = new WorkerDataArray(max_gc_threads, "Claimed Chunks:"); + _gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_claimed_chunks, ScanHRClaimedChunks); - _opt_scan_rs_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_scanned_cards, ScanRSScannedCards); - _opt_scan_rs_claimed_cards = new WorkerDataArray(max_gc_threads, "Claimed Cards:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_claimed_cards, ScanRSClaimedCards); - _opt_scan_rs_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_skipped_cards, ScanRSSkippedCards); - _opt_scan_rs_scanned_opt_refs = new WorkerDataArray(max_gc_threads, "Scanned Refs:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_scanned_opt_refs, ScanRSScannedOptRefs); - _opt_scan_rs_used_memory = new WorkerDataArray(max_gc_threads, "Used Memory:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_used_memory, ScanRSUsedMemory); + _opt_scan_hr_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_cards, ScanHRScannedCards); + _opt_scan_hr_scanned_blocks = new WorkerDataArray(max_gc_threads, "Scanned Blocks:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_blocks, ScanHRScannedBlocks); + _opt_scan_hr_claimed_chunks = new WorkerDataArray(max_gc_threads, "Claimed Chunks:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_claimed_chunks, ScanHRClaimedChunks); + _opt_scan_hr_scanned_opt_refs = new WorkerDataArray(max_gc_threads, "Scanned Refs:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_opt_refs, ScanHRScannedOptRefs); + _opt_scan_hr_used_memory = new WorkerDataArray(max_gc_threads, "Used Memory:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_used_memory, ScanHRUsedMemory); - _update_rs_processed_buffers = new WorkerDataArray(max_gc_threads, "Processed Buffers:"); - _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers, UpdateRSProcessedBuffers); - _update_rs_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); - _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_scanned_cards, UpdateRSScannedCards); - _update_rs_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); - _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_skipped_cards, UpdateRSSkippedCards); + _merge_lb_processed_buffers = new WorkerDataArray(max_gc_threads, "Processed Buffers:"); + _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_processed_buffers, MergeLBProcessedBuffers); + _merge_lb_dirty_cards = new WorkerDataArray(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_dirty_cards, MergeLBDirtyCards); + _merge_lb_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); + _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_skipped_cards, MergeLBSkippedCards); _obj_copy_lab_waste = new WorkerDataArray(max_gc_threads, "LAB Waste"); _gc_par_phases[ObjCopy]->link_thread_work_items(_obj_copy_lab_waste, ObjCopyLABWaste); @@ -148,6 +164,8 @@ _cur_optional_evac_ms = 0.0; _cur_collection_code_root_fixup_time_ms = 0.0; _cur_strong_code_root_purge_time_ms = 0.0; + _cur_merge_heap_roots_time_ms = 0.0; + _cur_optional_merge_heap_roots_time_ms = 0.0; _cur_evac_fail_recalc_used = 0.0; _cur_evac_fail_remove_self_forwards = 0.0; _cur_string_deduplication_time_ms = 0.0; @@ -160,6 +178,7 @@ _cur_collection_start_sec = 0.0; _root_region_scan_wait_time_ms = 0.0; _external_accounted_time_ms = 0.0; + _recorded_prepare_heap_roots_time_ms = 0.0; _recorded_clear_claimed_marks_time_ms = 0.0; _recorded_young_cset_choice_time_ms = 0.0; _recorded_non_young_cset_choice_time_ms = 0.0; @@ -219,9 +238,7 @@ record_time_secs(GCWorkerTotal, i , total_worker_time); double worker_known_time = worker_time(ExtRootScan, i) + - worker_time(ScanHCC, i) + - worker_time(UpdateRS, i) + - worker_time(ScanRS, i) + + worker_time(ScanHR, i) + worker_time(CodeRoots, i) + worker_time(ObjCopy, i) + worker_time(Termination, i); @@ -231,11 +248,15 @@ // Make sure all slots are uninitialized since this thread did not seem to have been started ASSERT_PHASE_UNINITIALIZED(GCWorkerEnd); ASSERT_PHASE_UNINITIALIZED(ExtRootScan); - ASSERT_PHASE_UNINITIALIZED(ScanHCC); - ASSERT_PHASE_UNINITIALIZED(UpdateRS); - ASSERT_PHASE_UNINITIALIZED(ScanRS); + ASSERT_PHASE_UNINITIALIZED(MergeHCC); + ASSERT_PHASE_UNINITIALIZED(MergeRS); + ASSERT_PHASE_UNINITIALIZED(OptMergeRS); + ASSERT_PHASE_UNINITIALIZED(MergeLB); + ASSERT_PHASE_UNINITIALIZED(ScanHR); ASSERT_PHASE_UNINITIALIZED(CodeRoots); + ASSERT_PHASE_UNINITIALIZED(OptCodeRoots); ASSERT_PHASE_UNINITIALIZED(ObjCopy); + ASSERT_PHASE_UNINITIALIZED(OptObjCopy); ASSERT_PHASE_UNINITIALIZED(Termination); } } @@ -365,6 +386,7 @@ _recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms + _cur_region_register_time + + _recorded_prepare_heap_roots_time_ms + _recorded_clear_claimed_marks_time_ms; info_time("Pre Evacuate Collection Set", sum_ms); @@ -380,6 +402,7 @@ trace_count("Humongous Candidate", _cur_fast_reclaim_humongous_candidates); } + debug_time("Prepare Heap Roots", _recorded_prepare_heap_roots_time_ms); if (_recorded_clear_claimed_marks_time_ms > 0.0) { debug_time("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms); } @@ -387,10 +410,13 @@ } double G1GCPhaseTimes::print_evacuate_optional_collection_set() const { - const double sum_ms = _cur_optional_evac_ms; + const double sum_ms = _cur_optional_evac_ms + _cur_optional_merge_heap_roots_time_ms; if (sum_ms > 0) { - info_time("Evacuate Optional Collection Set", sum_ms); - debug_phase(_gc_par_phases[OptScanRS]); + info_time("Merge Optional Heap Roots", _cur_optional_merge_heap_roots_time_ms); + debug_phase(_gc_par_phases[OptMergeRS]); + + info_time("Evacuate Optional Collection Set", _cur_optional_evac_ms); + debug_phase(_gc_par_phases[OptScanHR]); debug_phase(_gc_par_phases[OptObjCopy]); debug_phase(_gc_par_phases[OptCodeRoots]); debug_phase(_gc_par_phases[OptTermination]); @@ -398,21 +424,23 @@ return sum_ms; } -double G1GCPhaseTimes::print_evacuate_collection_set() const { - const double sum_ms = _cur_collection_initial_evac_time_ms; +double G1GCPhaseTimes::print_evacuate_initial_collection_set() const { + info_time("Merge Heap Roots", _cur_merge_heap_roots_time_ms); - info_time("Evacuate Collection Set", sum_ms); + debug_phase(_gc_par_phases[MergeRS]); + if (G1HotCardCache::default_use_cache()) { + debug_phase(_gc_par_phases[MergeHCC]); + } + debug_phase(_gc_par_phases[MergeLB]); + + info_time("Evacuate Collection Set", _cur_collection_initial_evac_time_ms); trace_phase(_gc_par_phases[GCWorkerStart], false); debug_phase(_gc_par_phases[ExtRootScan]); for (int i = ExtRootScanSubPhasesFirst; i <= ExtRootScanSubPhasesLast; i++) { trace_phase(_gc_par_phases[i]); } - if (G1HotCardCache::default_use_cache()) { - debug_phase(_gc_par_phases[ScanHCC]); - } - debug_phase(_gc_par_phases[UpdateRS]); - debug_phase(_gc_par_phases[ScanRS]); + debug_phase(_gc_par_phases[ScanHR]); debug_phase(_gc_par_phases[CodeRoots]); debug_phase(_gc_par_phases[ObjCopy]); debug_phase(_gc_par_phases[Termination]); @@ -420,7 +448,7 @@ debug_phase(_gc_par_phases[GCWorkerTotal]); trace_phase(_gc_par_phases[GCWorkerEnd], false); - return sum_ms; + return _cur_collection_initial_evac_time_ms + _cur_merge_heap_roots_time_ms; } double G1GCPhaseTimes::print_post_evacuate_collection_set() const { @@ -503,7 +531,7 @@ double accounted_ms = 0.0; accounted_ms += print_pre_evacuate_collection_set(); - accounted_ms += print_evacuate_collection_set(); + accounted_ms += print_evacuate_initial_collection_set(); accounted_ms += print_evacuate_optional_collection_set(); accounted_ms += print_post_evacuate_collection_set(); print_other(accounted_ms); @@ -530,10 +558,12 @@ "CMRefRoots", "WaitForStrongCLD", "WeakCLDRoots", - "UpdateRS", - "ScanHCC", - "ScanRS", - "OptScanRS", + "MergeRS", + "OptMergeRS", + "MergeLB", + "MergeHCC", + "ScanHR", + "OptScanHR", "CodeRoots", "OptCodeRoots", "ObjCopy", @@ -580,8 +610,8 @@ _stopped = true; } -G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) : - _start_time(), _phase(phase), _phase_times(phase_times), _worker_id(worker_id), _event() { +G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id, bool must_record) : + _start_time(), _phase(phase), _phase_times(phase_times), _worker_id(worker_id), _event(), _must_record(must_record) { if (_phase_times != NULL) { _start_time = Ticks::now(); } @@ -589,7 +619,11 @@ G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() { if (_phase_times != NULL) { - _phase_times->record_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds()); + if (_must_record) { + _phase_times->record_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds()); + } else { + _phase_times->record_or_add_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds()); + } _event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(_phase)); } } diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -60,10 +60,12 @@ CMRefRoots, WaitForStrongCLD, WeakCLDRoots, - UpdateRS, - ScanHCC, - ScanRS, - OptScanRS, + MergeRS, + OptMergeRS, + MergeLB, + MergeHCC, + ScanHR, + OptScanHR, CodeRoots, OptCodeRoots, ObjCopy, @@ -84,18 +86,24 @@ static const GCParPhases ExtRootScanSubPhasesFirst = ThreadRoots; static const GCParPhases ExtRootScanSubPhasesLast = WeakCLDRoots; - enum GCScanRSWorkItems { - ScanRSScannedCards, - ScanRSClaimedCards, - ScanRSSkippedCards, - ScanRSScannedOptRefs, - ScanRSUsedMemory + enum GCMergeRSWorkTimes { + MergeRSMergedSparse, + MergeRSMergedFine, + MergeRSMergedCoarse }; - enum GCUpdateRSWorkItems { - UpdateRSProcessedBuffers, - UpdateRSScannedCards, - UpdateRSSkippedCards + enum GCScanHRWorkItems { + ScanHRScannedCards, + ScanHRScannedBlocks, + ScanHRClaimedChunks, + ScanHRScannedOptRefs, + ScanHRUsedMemory + }; + + enum GCMergeLBWorkItems { + MergeLBProcessedBuffers, + MergeLBDirtyCards, + MergeLBSkippedCards }; enum GCObjCopyWorkItems { @@ -109,19 +117,27 @@ WorkerDataArray* _gc_par_phases[GCParPhasesSentinel]; - WorkerDataArray* _update_rs_processed_buffers; - WorkerDataArray* _update_rs_scanned_cards; - WorkerDataArray* _update_rs_skipped_cards; + WorkerDataArray* _merge_rs_merged_sparse; + WorkerDataArray* _merge_rs_merged_fine; + WorkerDataArray* _merge_rs_merged_coarse; + + WorkerDataArray* _merge_lb_processed_buffers; + WorkerDataArray* _merge_lb_dirty_cards; + WorkerDataArray* _merge_lb_skipped_cards; - WorkerDataArray* _scan_rs_scanned_cards; - WorkerDataArray* _scan_rs_claimed_cards; - WorkerDataArray* _scan_rs_skipped_cards; + WorkerDataArray* _scan_hr_scanned_cards; + WorkerDataArray* _scan_hr_scanned_blocks; + WorkerDataArray* _scan_hr_claimed_chunks; - WorkerDataArray* _opt_scan_rs_scanned_cards; - WorkerDataArray* _opt_scan_rs_claimed_cards; - WorkerDataArray* _opt_scan_rs_skipped_cards; - WorkerDataArray* _opt_scan_rs_scanned_opt_refs; - WorkerDataArray* _opt_scan_rs_used_memory; + WorkerDataArray* _opt_merge_rs_merged_sparse; + WorkerDataArray* _opt_merge_rs_merged_fine; + WorkerDataArray* _opt_merge_rs_merged_coarse; + + WorkerDataArray* _opt_scan_hr_scanned_cards; + WorkerDataArray* _opt_scan_hr_scanned_blocks; + WorkerDataArray* _opt_scan_hr_claimed_chunks; + WorkerDataArray* _opt_scan_hr_scanned_opt_refs; + WorkerDataArray* _opt_scan_hr_used_memory; WorkerDataArray* _obj_copy_lab_waste; WorkerDataArray* _obj_copy_lab_undo_waste; @@ -145,6 +161,9 @@ double _cur_string_deduplication_time_ms; + double _cur_merge_heap_roots_time_ms; + double _cur_optional_merge_heap_roots_time_ms; + double _cur_prepare_tlab_time_ms; double _cur_resize_tlab_time_ms; @@ -159,6 +178,8 @@ double _external_accounted_time_ms; + double _recorded_prepare_heap_roots_time_ms; + double _recorded_clear_claimed_marks_time_ms; double _recorded_young_cset_choice_time_ms; @@ -208,7 +229,8 @@ void trace_count(const char* name, size_t value) const; double print_pre_evacuate_collection_set() const; - double print_evacuate_collection_set() const; + double print_merge_heap_roots_time() const; + double print_evacuate_initial_collection_set() const; double print_evacuate_optional_collection_set() const; double print_post_evacuate_collection_set() const; void print_other(double accounted_ms) const; @@ -278,6 +300,14 @@ _cur_strong_code_root_purge_time_ms = ms; } + void record_merge_heap_roots_time(double ms) { + _cur_merge_heap_roots_time_ms += ms; + } + + void record_or_add_optional_merge_heap_roots_time(double ms) { + _cur_optional_merge_heap_roots_time_ms += ms; + } + void record_evac_fail_recalc_used_time(double ms) { _cur_evac_fail_recalc_used = ms; } @@ -357,6 +387,10 @@ _external_accounted_time_ms += time_ms; } + void record_prepare_heap_roots_time_ms(double recorded_prepare_heap_roots_time_ms) { + _recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms; + } + void record_clear_claimed_marks_time_ms(double recorded_clear_claimed_marks_time_ms) { _recorded_clear_claimed_marks_time_ms = recorded_clear_claimed_marks_time_ms; } @@ -397,6 +431,10 @@ return _cur_fast_reclaim_humongous_time_ms; } + size_t fast_reclaim_humongous_candidates() const { + return _cur_fast_reclaim_humongous_candidates; + } + ReferenceProcessorPhaseTimes* ref_phase_times() { return &_ref_phase_times; } WeakProcessorPhaseTimes* weak_phase_times() { return &_weak_phase_times; } @@ -424,8 +462,10 @@ G1GCPhaseTimes* _phase_times; uint _worker_id; EventGCPhaseParallel _event; + bool _must_record; + public: - G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id); + G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id, bool must_record = true); virtual ~G1GCParPhaseTimesTracker(); }; diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp --- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -39,8 +39,8 @@ } // After a collection pause, young list target length is updated. So we need to make sure we have enough regions in dram for young gen. -void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc) { - G1Policy::record_collection_pause_end(pause_time_ms, cards_scanned, heap_used_bytes_before_gc); +void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) { + G1Policy::record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc); _manager->adjust_dram_regions((uint)young_list_target_length(), G1CollectedHeap::heap()->workers()); } diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp --- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -38,7 +38,7 @@ // initialize policy virtual void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set); // Record end of an evacuation pause. - virtual void record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc); + virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc); // Record the end of full collection. virtual void record_full_collection_end(); diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1ParScanThreadState.cpp --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -51,6 +51,7 @@ _tenuring_threshold(g1h->policy()->tenuring_threshold()), _scanner(g1h, this), _worker_id(worker_id), + _last_enqueued_card(SIZE_MAX), _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1), _stack_trim_lower_threshold(GCDrainStackTargetSize), _trim_ticks(), @@ -371,7 +372,7 @@ } size_t used_memory = pss->oops_into_optional_region(hr)->used_memory(); - _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanRS, worker_index, used_memory, G1GCPhaseTimes::ScanRSUsedMemory); + _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanHR, worker_index, used_memory, G1GCPhaseTimes::ScanHRUsedMemory); } } diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1ParScanThreadState.hpp --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -60,6 +60,10 @@ uint _worker_id; + // Remember the last enqueued card to avoid enqueuing the same card over and over; + // since we only ever scan a card once, this is sufficient. + size_t _last_enqueued_card; + // Upper and lower threshold to start and end work queue draining. uint const _stack_trim_upper_threshold; uint const _stack_trim_lower_threshold; @@ -128,8 +132,9 @@ } size_t card_index = ct()->index_for(p); // If the card hasn't been added to the buffer, do it. - if (ct()->mark_card_deferred(card_index)) { + if (_last_enqueued_card != card_index) { dirty_card_queue().enqueue(ct()->byte_for_index(card_index)); + _last_enqueued_card = card_index; } } diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1Policy.cpp --- a/src/hotspot/share/gc/g1/g1Policy.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Policy.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -572,10 +572,24 @@ return result; } +double G1Policy::log_buffer_processing_time() const { + double all_cards_processing_time = average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR); + size_t log_buffer_dirty_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); + size_t scan_heap_roots_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + + phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); + // This may happen if there are duplicate cards in different log buffers. + if (log_buffer_dirty_cards > scan_heap_roots_cards) { + return all_cards_processing_time + average_time_ms(G1GCPhaseTimes::MergeLB); + } + return (all_cards_processing_time * log_buffer_dirty_cards / scan_heap_roots_cards) + average_time_ms(G1GCPhaseTimes::MergeLB); +} + // Anything below that is considered to be zero #define MIN_TIMER_GRANULARITY 0.0000001 -void G1Policy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc) { +void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) { + G1GCPhaseTimes* p = phase_times(); + double end_time_sec = os::elapsedTime(); assert_used_and_recalculate_used_equal(_g1h); @@ -645,29 +659,40 @@ _short_lived_surv_rate_group->start_adding_regions(); // Do that for any other surv rate groups - double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::ScanHCC) : 0.0; + double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0; if (update_stats) { - double cost_per_card_ms = 0.0; - if (_pending_cards > 0) { - cost_per_card_ms = (average_time_ms(G1GCPhaseTimes::UpdateRS)) / (double) _pending_cards; - _analytics->report_cost_per_card_ms(cost_per_card_ms); + double cost_per_log_buffer_entry = 0.0; + size_t const pending_log_buffer_entries = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); + if (pending_log_buffer_entries > 0) { + cost_per_log_buffer_entry = log_buffer_processing_time() / pending_log_buffer_entries; + _analytics->report_cost_per_log_buffer_entry_ms(cost_per_log_buffer_entry); } _analytics->report_cost_scan_hcc(scan_hcc_time_ms); - double cost_per_entry_ms = 0.0; - if (cards_scanned > 10) { - double avg_time_scan_rs = average_time_ms(G1GCPhaseTimes::ScanRS); - if (this_pause_was_young_only) { - avg_time_scan_rs += average_time_ms(G1GCPhaseTimes::OptScanRS); - } - cost_per_entry_ms = avg_time_scan_rs / cards_scanned; - _analytics->report_cost_per_entry_ms(cost_per_entry_ms, this_pause_was_young_only); + size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + + p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); + size_t remset_cards_scanned = 0; + // There might have been duplicate log buffer entries in the queues which could + // increase this value beyond the cards scanned. In this case attribute all cards + // to the log buffers. + if (pending_log_buffer_entries <= total_cards_scanned) { + remset_cards_scanned = total_cards_scanned - pending_log_buffer_entries; + } + + double cost_per_remset_card_ms = 0.0; + if (remset_cards_scanned > 10) { + double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) * + remset_cards_scanned / total_cards_scanned) + + average_time_ms(G1GCPhaseTimes::MergeRS); + + cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned; + _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only); } if (_max_rs_lengths > 0) { double cards_per_entry_ratio = - (double) cards_scanned / (double) _max_rs_lengths; + (double) remset_cards_scanned / (double) _max_rs_lengths; _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only); } @@ -759,20 +784,26 @@ } // Note that _mmu_tracker->max_gc_time() returns the time in seconds. - double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; + double scan_log_buffer_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; - if (update_rs_time_goal_ms < scan_hcc_time_ms) { + if (scan_log_buffer_time_goal_ms < scan_hcc_time_ms) { log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)." - "Update RS time goal: %1.2fms Scan HCC time: %1.2fms", - update_rs_time_goal_ms, scan_hcc_time_ms); + "Log Buffer Scan time goal: %1.2fms Scan HCC time: %1.2fms", + scan_log_buffer_time_goal_ms, scan_hcc_time_ms); - update_rs_time_goal_ms = 0; + scan_log_buffer_time_goal_ms = 0; } else { - update_rs_time_goal_ms -= scan_hcc_time_ms; + scan_log_buffer_time_goal_ms -= scan_hcc_time_ms; } - _g1h->concurrent_refine()->adjust(average_time_ms(G1GCPhaseTimes::UpdateRS), - phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), - update_rs_time_goal_ms); + + double const log_buffer_time = log_buffer_processing_time(); + + log_debug(gc, ergo, refine)("Concurrent refinement times: Log Buffer Scan time goal: %1.2fms Log Buffer Scan time: %1.2fms HCC time: %1.2fms", + scan_log_buffer_time_goal_ms, log_buffer_time, scan_hcc_time_ms); + + _g1h->concurrent_refine()->adjust(log_buffer_time, + phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBProcessedBuffers), + scan_log_buffer_time_goal_ms); } G1IHOPControl* G1Policy::create_ihop_control(const G1Predictions* predictor){ diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1Policy.hpp --- a/src/hotspot/share/gc/g1/g1Policy.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Policy.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -111,6 +111,8 @@ bool should_update_surv_rate_group_predictors() { return collector_state()->in_young_only_phase() && !collector_state()->mark_or_rebuild_in_progress(); } + + double log_buffer_processing_time() const; public: const G1Predictions& predictor() const { return _predictor; } const G1Analytics* analytics() const { return const_cast(_analytics); } @@ -311,7 +313,7 @@ // Record the start and end of an evacuation pause. void record_collection_pause_start(double start_time_sec); - virtual void record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc); + virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc); // Record the start and end of a full collection. void record_full_collection_start(); diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1RemSet.cpp --- a/src/hotspot/share/gc/g1/g1RemSet.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -38,7 +38,8 @@ #include "gc/g1/g1SharedDirtyCardQueue.hpp" #include "gc/g1/heapRegion.inline.hpp" #include "gc/g1/heapRegionManager.inline.hpp" -#include "gc/g1/heapRegionRemSet.hpp" +#include "gc/g1/heapRegionRemSet.inline.hpp" +#include "gc/g1/sparsePRT.hpp" #include "gc/shared/gcTraceTime.inline.hpp" #include "gc/shared/suspendibleThreadSet.hpp" #include "jfr/jfrEvents.hpp" @@ -52,40 +53,453 @@ #include "utilities/stack.inline.hpp" #include "utilities/ticks.hpp" -// Collects information about the overall remembered set scan progress during an evacuation. +// Collects information about the overall heap root scan progress during an evacuation. +// +// Scanning the remembered sets works by first merging all sources of cards to be +// scanned (log buffers, hcc, remembered sets) into a single data structure to remove +// duplicates and simplify work distribution. +// +// During the following card scanning we not only scan this combined set of cards, but +// also remember that these were completely scanned. The following evacuation passes +// do not scan these cards again, and so need to be preserved across increments. +// +// The representation for all the cards to scan is the card table: cards can have +// one of three states during GC: +// - clean: these cards will not be scanned in this pass +// - dirty: these cards will be scanned in this pass +// - scanned: these cards have already been scanned in a previous pass +// +// After all evacuation is done, we reset the card table to clean. +// +// Work distribution occurs on "chunk" basis, i.e. contiguous ranges of cards. As an +// additional optimization, during card merging we remember which regions and which +// chunks actually contain cards to be scanned. Threads iterate only across these +// regions, and only compete for chunks containing any cards. +// +// Within these chunks, a worker scans the card table on "blocks" of cards, i.e. +// contiguous ranges of dirty cards to be scanned. These blocks are converted to actual +// memory ranges and then passed on to actual scanning. class G1RemSetScanState : public CHeapObj { + class G1DirtyRegions; + + size_t _max_regions; + + // Has this region that is part of the regions in the collection set been processed yet. + typedef bool G1RemsetIterState; + + G1RemsetIterState volatile* _collection_set_iter_state; + + // Card table iteration claim for each heap region, from 0 (completely unscanned) + // to (>=) HeapRegion::CardsPerRegion (completely scanned). + uint volatile* _card_table_scan_state; + + // Random power of two number of cards we want to claim per thread. This corresponds + // to a 64k of memory work chunk area for every thread. + // We use the same claim size as Parallel GC. No particular measurements have been + // performed to determine an optimal number. + static const uint CardsPerChunk = 128; + + uint _scan_chunks_per_region; + bool* _region_scan_chunks; + uint8_t _scan_chunks_shift; +public: + uint scan_chunk_size() const { return (uint)1 << _scan_chunks_shift; } + + // Returns whether the chunk corresponding to the given region/card in region contain a + // dirty card, i.e. actually needs scanning. + bool chunk_needs_scan(uint const region_idx, uint const card_in_region) const { + size_t const idx = (size_t)region_idx * _scan_chunks_per_region + (card_in_region >> _scan_chunks_shift); + assert(idx < (_max_regions * _scan_chunks_per_region), "Index " SIZE_FORMAT " out of bounds " SIZE_FORMAT, + idx, _max_regions * _scan_chunks_per_region); + return _region_scan_chunks[idx]; + } + private: + // The complete set of regions which card table needs to be cleared at the end of GC because + // we scribbled all over them. + G1DirtyRegions* _all_dirty_regions; + // The set of regions which card table needs to be scanned for new dirty cards + // in the current evacuation pass. + G1DirtyRegions* _next_dirty_regions; + + // Set of (unique) regions that can be added to concurrently. + class G1DirtyRegions : public CHeapObj { + uint* _buffer; + uint _cur_idx; + size_t _max_regions; + + bool* _contains; + + public: + G1DirtyRegions(size_t max_regions) : + _buffer(NEW_C_HEAP_ARRAY(uint, max_regions, mtGC)), + _cur_idx(0), + _max_regions(max_regions), + _contains(NEW_C_HEAP_ARRAY(bool, max_regions, mtGC)) { + + reset(); + } + + static size_t chunk_size() { return M; } + + ~G1DirtyRegions() { + FREE_C_HEAP_ARRAY(uint, _buffer); + FREE_C_HEAP_ARRAY(bool, _contains); + } + + void reset() { + _cur_idx = 0; + ::memset(_contains, false, _max_regions * sizeof(bool)); + } + + uint size() const { return _cur_idx; } + + uint at(uint idx) const { + assert(idx < _cur_idx, "Index %u beyond valid regions", idx); + return _buffer[idx]; + } + + void add_dirty_region(uint region) { + if (_contains[region]) { + return; + } + + bool marked_as_dirty = Atomic::cmpxchg(true, &_contains[region], false) == false; + if (marked_as_dirty) { + uint allocated = Atomic::add(1u, &_cur_idx) - 1; + _buffer[allocated] = region; + } + } + + // Creates the union of this and the other G1DirtyRegions. + void merge(const G1DirtyRegions* other) { + for (uint i = 0; i < other->size(); i++) { + uint region = other->at(i); + if (!_contains[region]) { + _buffer[_cur_idx++] = region; + _contains[region] = true; + } + } + } + }; + + // Returns whether the given region contains cards we need to scan. The remembered + // set and other sources may contain cards that + // - are in uncommitted regions + // - are located in the collection set + // - are located in free regions + // as we do not clean up remembered sets before merging heap roots. + bool contains_cards_to_process(uint const region_idx) const { + HeapRegion* hr = G1CollectedHeap::heap()->region_at_or_null(region_idx); + return (hr != NULL && !hr->in_collection_set() && hr->is_old_or_humongous_or_archive()); + } + + class G1MergeCardSetClosure : public HeapRegionClosure { + G1RemSetScanState* _scan_state; + G1CardTable* _ct; + + uint _merged_sparse; + uint _merged_fine; + uint _merged_coarse; + + // Returns if the region contains cards we need to scan. If so, remember that + // region in the current set of dirty regions. + bool remember_if_interesting(uint const region_idx) { + if (!_scan_state->contains_cards_to_process(region_idx)) { + return false; + } + _scan_state->add_dirty_region(region_idx); + return true; + } + public: + G1MergeCardSetClosure(G1RemSetScanState* scan_state) : + _scan_state(scan_state), + _ct(G1CollectedHeap::heap()->card_table()), + _merged_sparse(0), + _merged_fine(0), + _merged_coarse(0) { } + + void next_coarse_prt(uint const region_idx) { + if (!remember_if_interesting(region_idx)) { + return; + } + + _merged_coarse++; + + size_t region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; + _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion); + _scan_state->set_chunk_region_dirty(region_base_idx); + } + + void next_fine_prt(uint const region_idx, BitMap* bm) { + if (!remember_if_interesting(region_idx)) { + return; + } + + _merged_fine++; + + size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; + BitMap::idx_t cur = bm->get_next_one_offset(0); + while (cur != bm->size()) { + _ct->mark_clean_as_dirty(region_base_idx + cur); + _scan_state->set_chunk_dirty(region_base_idx + cur); + cur = bm->get_next_one_offset(cur + 1); + } + } + + void next_sparse_prt(uint const region_idx, SparsePRTEntry::card_elem_t* cards, uint const num_cards) { + if (!remember_if_interesting(region_idx)) { + return; + } + + _merged_sparse++; + + size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; + for (uint i = 0; i < num_cards; i++) { + size_t card_idx = region_base_idx + cards[i]; + _ct->mark_clean_as_dirty(card_idx); + _scan_state->set_chunk_dirty(card_idx); + } + } + + virtual bool do_heap_region(HeapRegion* r) { + assert(r->in_collection_set() || r->is_starts_humongous(), "must be"); + + HeapRegionRemSet* rem_set = r->rem_set(); + if (!rem_set->is_empty()) { + rem_set->iterate_prts(*this); + } + + return false; + } + + size_t merged_sparse() const { return _merged_sparse; } + size_t merged_fine() const { return _merged_fine; } + size_t merged_coarse() const { return _merged_coarse; } + }; + + // Visitor for the remembered sets of humongous candidate regions to merge their + // remembered set into the card table. + class G1FlushHumongousCandidateRemSets : public HeapRegionClosure { + G1MergeCardSetClosure _cl; + + public: + G1FlushHumongousCandidateRemSets(G1RemSetScanState* scan_state) : _cl(scan_state) { } + + virtual bool do_heap_region(HeapRegion* r) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + if (!r->is_starts_humongous() || + !g1h->region_attr(r->hrm_index()).is_humongous() || + r->rem_set()->is_empty()) { + return false; + } + + guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries), + "Found a not-small remembered set here. This is inconsistent with previous assumptions."); + + _cl.do_heap_region(r); + + // We should only clear the card based remembered set here as we will not + // implicitly rebuild anything else during eager reclaim. Note that at the moment + // (and probably never) we do not enter this path if there are other kind of + // remembered sets for this region. + r->rem_set()->clear_locked(true /* only_cardset */); + // Clear_locked() above sets the state to Empty. However we want to continue + // collecting remembered set entries for humongous regions that were not + // reclaimed. + r->rem_set()->set_state_complete(); +#ifdef ASSERT + G1HeapRegionAttr region_attr = g1h->region_attr(r->hrm_index()); + assert(region_attr.needs_remset_update(), "must be"); +#endif + assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty."); + + return false; + } + + size_t merged_sparse() const { return _cl.merged_sparse(); } + size_t merged_fine() const { return _cl.merged_fine(); } + size_t merged_coarse() const { return _cl.merged_coarse(); } + }; + + // Visitor for the log buffer entries to merge them into the card table. + class G1MergeLogBufferCardsClosure : public G1CardTableEntryClosure { + G1RemSetScanState* _scan_state; + G1CardTable* _ct; + + size_t _cards_dirty; + size_t _cards_skipped; + public: + G1MergeLogBufferCardsClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state) : + _scan_state(scan_state), _ct(g1h->card_table()), _cards_dirty(0), _cards_skipped(0) + {} + + bool do_card_ptr(CardValue* card_ptr, uint worker_i) { + // The only time we care about recording cards that + // contain references that point into the collection set + // is during RSet updating within an evacuation pause. + // In this case worker_id should be the id of a GC worker thread. + assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); + + uint const region_idx = _ct->region_idx_for(card_ptr); + + // The second clause must come after - the log buffers might contain cards to uncommited + // regions. + // This code may count duplicate entries in the log buffers (even if rare) multiple + // times. + if (_scan_state->contains_cards_to_process(region_idx) && (*card_ptr == G1CardTable::dirty_card_val())) { + _scan_state->add_dirty_region(region_idx); + _scan_state->set_chunk_dirty(_ct->index_for_cardvalue(card_ptr)); + _cards_dirty++; + } else { + // We may have had dirty cards in the (initial) collection set (or the + // young regions which are always in the initial collection set). We do + // not fix their cards here: we already added these regions to the set of + // regions to clear the card table at the end during the prepare() phase. + _cards_skipped++; + } + return true; + } + + size_t cards_dirty() const { return _cards_dirty; } + size_t cards_skipped() const { return _cards_skipped; } + }; + + class G1MergeHeapRootsTask : public AbstractGangTask { + HeapRegionClaimer _hr_claimer; + G1RemSetScanState* _scan_state; + bool _remembered_set_only; + + G1GCPhaseTimes::GCParPhases _merge_phase; + + volatile bool _fast_reclaim_handled; + + public: + G1MergeHeapRootsTask(G1RemSetScanState* scan_state, uint num_workers, bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase) : + AbstractGangTask("G1 Merge Heap Roots"), + _hr_claimer(num_workers), + _scan_state(scan_state), + _remembered_set_only(remembered_set_only), + _merge_phase(merge_phase), + _fast_reclaim_handled(false) { } + + virtual void work(uint worker_id) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1GCPhaseTimes* p = g1h->phase_times(); + + // We schedule flushing the remembered sets of humongous fast reclaim candidates + // onto the card table first to allow the remaining parallelized tasks hide it. + if (!_remembered_set_only && + p->fast_reclaim_humongous_candidates() > 0 && + !_fast_reclaim_handled && + !Atomic::cmpxchg(true, &_fast_reclaim_handled, false)) { + + G1FlushHumongousCandidateRemSets cl(_scan_state); + g1h->heap_region_iterate(&cl); + + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + } + + // Merge remembered sets of current candidates. + { + G1GCParPhaseTimesTracker x(p, _merge_phase, worker_id, !_remembered_set_only /* must_record */); + G1MergeCardSetClosure cl(_scan_state); + g1h->collection_set_iterate_increment_from(&cl, &_hr_claimer, worker_id); + + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + } + + // Apply closure to log entries in the HCC. + if (!_remembered_set_only && G1HotCardCache::default_use_cache()) { + assert(_merge_phase == G1GCPhaseTimes::MergeRS, "Wrong merge phase"); + G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeHCC, worker_id); + G1MergeLogBufferCardsClosure cl(g1h, _scan_state); + g1h->iterate_hcc_closure(&cl, worker_id); + } + + // Now apply the closure to all remaining log entries. + if (!_remembered_set_only) { + assert(_merge_phase == G1GCPhaseTimes::MergeRS, "Wrong merge phase"); + G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeLB, worker_id); + + G1MergeLogBufferCardsClosure cl(g1h, _scan_state); + g1h->iterate_dirty_card_closure(&cl, worker_id); + + p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeLBDirtyCards); + p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_skipped(), G1GCPhaseTimes::MergeLBSkippedCards); + } + } + }; + + // Creates a snapshot of the current _top values at the start of collection to + // filter out card marks that we do not want to scan. + class G1ResetScanTopClosure : public HeapRegionClosure { + G1RemSetScanState* _scan_state; + + public: + G1ResetScanTopClosure(G1RemSetScanState* scan_state) : _scan_state(scan_state) { } + + virtual bool do_heap_region(HeapRegion* r) { + uint hrm_index = r->hrm_index(); + if (r->in_collection_set()) { + // Young regions had their card table marked as young at their allocation; + // we need to make sure that these marks are cleared at the end of GC, *but* + // they should not be scanned for cards. + // So directly add them to the "all_dirty_regions". + // Same for regions in the (initial) collection set: they may contain cards from + // the log buffers, make sure they are cleaned. + _scan_state->add_all_dirty_region(hrm_index); + } else if (r->is_old_or_humongous_or_archive()) { + _scan_state->set_scan_top(hrm_index, r->top()); + } + return false; + } + }; + // For each region, contains the maximum top() value to be used during this garbage + // collection. Subsumes common checks like filtering out everything but old and + // humongous regions outside the collection set. + // This is valid because we are not interested in scanning stray remembered set + // entries from free or archive regions. + HeapWord** _scan_top; + class G1ClearCardTableTask : public AbstractGangTask { G1CollectedHeap* _g1h; - uint* _dirty_region_list; - size_t _num_dirty_regions; - size_t _chunk_length; + G1DirtyRegions* _regions; + uint _chunk_length; - size_t volatile _cur_dirty_regions; + uint volatile _cur_dirty_regions; + + G1RemSetScanState* _scan_state; + public: G1ClearCardTableTask(G1CollectedHeap* g1h, - uint* dirty_region_list, - size_t num_dirty_regions, - size_t chunk_length) : + G1DirtyRegions* regions, + uint chunk_length, + G1RemSetScanState* scan_state) : AbstractGangTask("G1 Clear Card Table Task"), _g1h(g1h), - _dirty_region_list(dirty_region_list), - _num_dirty_regions(num_dirty_regions), + _regions(regions), _chunk_length(chunk_length), - _cur_dirty_regions(0) { + _cur_dirty_regions(0), + _scan_state(scan_state) { assert(chunk_length > 0, "must be"); } - static size_t chunk_size() { return M; } + static uint chunk_size() { return M; } void work(uint worker_id) { - while (_cur_dirty_regions < _num_dirty_regions) { - size_t next = Atomic::add(_chunk_length, &_cur_dirty_regions) - _chunk_length; - size_t max = MIN2(next + _chunk_length, _num_dirty_regions); + while (_cur_dirty_regions < _regions->size()) { + uint next = Atomic::add(_chunk_length, &_cur_dirty_regions) - _chunk_length; + uint max = MIN2(next + _chunk_length, _regions->size()); - for (size_t i = next; i < max; i++) { - HeapRegion* r = _g1h->region_at(_dirty_region_list[i]); + for (uint i = next; i < max; i++) { + HeapRegion* r = _g1h->region_at(_regions->at(i)); if (!r->is_survivor()) { r->clear_cardtable(); } @@ -94,159 +508,222 @@ } }; - size_t _max_regions; - - // Scan progress for the remembered set of a single region. Transitions from - // Unclaimed -> Claimed -> Complete. - // At each of the transitions the thread that does the transition needs to perform - // some special action once. This is the reason for the extra "Claimed" state. - typedef jint G1RemsetIterState; - - static const G1RemsetIterState Unclaimed = 0; // The remembered set has not been scanned yet. - static const G1RemsetIterState Claimed = 1; // The remembered set is currently being scanned. - static const G1RemsetIterState Complete = 2; // The remembered set has been completely scanned. + // Clear the card table of "dirty" regions. + void clear_card_table(WorkGang* workers) { + uint num_regions = _all_dirty_regions->size(); - G1RemsetIterState volatile* _iter_states; - // The current location where the next thread should continue scanning in a region's - // remembered set. - size_t volatile* _iter_claims; + if (num_regions == 0) { + return; + } - // Temporary buffer holding the regions we used to store remembered set scan duplicate - // information. These are also called "dirty". Valid entries are from [0.._cur_dirty_region) - uint* _dirty_region_buffer; - - // Flag for every region whether it is in the _dirty_region_buffer already - // to avoid duplicates. - bool volatile* _in_dirty_region_buffer; - size_t _cur_dirty_region; + uint const num_chunks = (uint)(align_up((size_t)num_regions << HeapRegion::LogCardsPerRegion, G1ClearCardTableTask::chunk_size()) / G1ClearCardTableTask::chunk_size()); + uint const num_workers = MIN2(num_chunks, workers->active_workers()); + uint const chunk_length = G1ClearCardTableTask::chunk_size() / (uint)HeapRegion::CardsPerRegion; - // Creates a snapshot of the current _top values at the start of collection to - // filter out card marks that we do not want to scan. - class G1ResetScanTopClosure : public HeapRegionClosure { - private: - HeapWord** _scan_top; - public: - G1ResetScanTopClosure(HeapWord** scan_top) : _scan_top(scan_top) { } + // Iterate over the dirty cards region list. + G1ClearCardTableTask cl(G1CollectedHeap::heap(), _all_dirty_regions, chunk_length, this); - virtual bool do_heap_region(HeapRegion* r) { - uint hrm_index = r->hrm_index(); - if (!r->in_collection_set() && r->is_old_or_humongous_or_archive() && !r->is_empty()) { - _scan_top[hrm_index] = r->top(); - } else { - _scan_top[hrm_index] = NULL; - } - return false; - } - }; + log_debug(gc, ergo)("Running %s using %u workers for %u " + "units of work for %u regions.", + cl.name(), num_workers, num_chunks, num_regions); + workers->run_task(&cl, num_workers); - // For each region, contains the maximum top() value to be used during this garbage - // collection. Subsumes common checks like filtering out everything but old and - // humongous regions outside the collection set. - // This is valid because we are not interested in scanning stray remembered set - // entries from free or archive regions. - HeapWord** _scan_top; +#ifndef PRODUCT + G1CollectedHeap::heap()->verifier()->verify_card_table_cleanup(); +#endif + } + public: G1RemSetScanState() : _max_regions(0), - _iter_states(NULL), - _iter_claims(NULL), - _dirty_region_buffer(NULL), - _in_dirty_region_buffer(NULL), - _cur_dirty_region(0), + _collection_set_iter_state(NULL), + _card_table_scan_state(NULL), + _scan_chunks_per_region((uint)(HeapRegion::CardsPerRegion / CardsPerChunk)), + _region_scan_chunks(NULL), + _scan_chunks_shift(0), + _all_dirty_regions(NULL), + _next_dirty_regions(NULL), _scan_top(NULL) { } ~G1RemSetScanState() { - if (_iter_states != NULL) { - FREE_C_HEAP_ARRAY(G1RemsetIterState, _iter_states); - } - if (_iter_claims != NULL) { - FREE_C_HEAP_ARRAY(size_t, _iter_claims); - } - if (_dirty_region_buffer != NULL) { - FREE_C_HEAP_ARRAY(uint, _dirty_region_buffer); - } - if (_in_dirty_region_buffer != NULL) { - FREE_C_HEAP_ARRAY(bool, _in_dirty_region_buffer); - } - if (_scan_top != NULL) { - FREE_C_HEAP_ARRAY(HeapWord*, _scan_top); - } + FREE_C_HEAP_ARRAY(G1RemsetIterState, _collection_set_iter_state); + FREE_C_HEAP_ARRAY(uint, _card_table_scan_state); + FREE_C_HEAP_ARRAY(bool, _region_scan_chunks); + FREE_C_HEAP_ARRAY(HeapWord*, _scan_top); } - void initialize(uint max_regions) { - assert(_iter_states == NULL, "Must not be initialized twice"); - assert(_iter_claims == NULL, "Must not be initialized twice"); + void initialize(size_t max_regions) { + assert(_collection_set_iter_state == NULL, "Must not be initialized twice"); _max_regions = max_regions; - _iter_states = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC); - _iter_claims = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); - _dirty_region_buffer = NEW_C_HEAP_ARRAY(uint, max_regions, mtGC); - _in_dirty_region_buffer = NEW_C_HEAP_ARRAY(bool, max_regions, mtGC); + _collection_set_iter_state = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC); + _card_table_scan_state = NEW_C_HEAP_ARRAY(uint, max_regions, mtGC); + _region_scan_chunks = NEW_C_HEAP_ARRAY(bool, max_regions * _scan_chunks_per_region, mtGC); + + _scan_chunks_shift = (uint8_t)log2_intptr(HeapRegion::CardsPerRegion / _scan_chunks_per_region); _scan_top = NEW_C_HEAP_ARRAY(HeapWord*, max_regions, mtGC); } - void reset() { - for (uint i = 0; i < _max_regions; i++) { - _iter_states[i] = Unclaimed; - clear_scan_top(i); + void prepare() { + for (size_t i = 0; i < _max_regions; i++) { + _collection_set_iter_state[i] = false; + clear_scan_top((uint)i); } - G1ResetScanTopClosure cl(_scan_top); + _all_dirty_regions = new G1DirtyRegions(_max_regions); + + G1ResetScanTopClosure cl(this); G1CollectedHeap::heap()->heap_region_iterate(&cl); - memset((void*)_iter_claims, 0, _max_regions * sizeof(size_t)); - memset((void*)_in_dirty_region_buffer, false, _max_regions * sizeof(bool)); - _cur_dirty_region = 0; + _next_dirty_regions = new G1DirtyRegions(_max_regions); } - // Attempt to claim the remembered set of the region for iteration. Returns true - // if this call caused the transition from Unclaimed to Claimed. - inline bool claim_iter(uint region) { - assert(region < _max_regions, "Tried to access invalid region %u", region); - if (_iter_states[region] != Unclaimed) { - return false; + void print_merge_heap_roots_stats() { + size_t num_scan_chunks = 0; + for (uint i = 0; i < _max_regions * _scan_chunks_per_region; i++) { + if (_region_scan_chunks[i]) { + num_scan_chunks++; + } } - G1RemsetIterState res = Atomic::cmpxchg(Claimed, &_iter_states[region], Unclaimed); - return (res == Unclaimed); + size_t num_visited_cards = num_scan_chunks * CardsPerChunk; + size_t total_dirty_region_cards = _next_dirty_regions->size() * HeapRegion::CardsPerRegion; + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + size_t total_old_region_cards = + (g1h->num_regions() - (g1h->num_free_regions() - g1h->collection_set()->cur_length())) * HeapRegion::CardsPerRegion; + + log_debug(gc,remset)("Visited cards " SIZE_FORMAT " Total dirty " SIZE_FORMAT " (%.2lf%%) Total old " SIZE_FORMAT " (%.2lf%%)", + num_visited_cards, + total_dirty_region_cards, + percent_of(num_visited_cards, total_dirty_region_cards), + total_old_region_cards, + percent_of(num_visited_cards, total_old_region_cards)); } - // Try to atomically sets the iteration state to "complete". Returns true for the - // thread that caused the transition. - inline bool set_iter_complete(uint region) { - if (iter_is_complete(region)) { - return false; + void merge_heap_roots(WorkGang* workers, bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase) { + { + _all_dirty_regions->merge(_next_dirty_regions); + _next_dirty_regions->reset(); + for (size_t i = 0; i < _max_regions; i++) { + _card_table_scan_state[i] = 0; + } + + ::memset(_region_scan_chunks, false, _max_regions * _scan_chunks_per_region * sizeof(*_region_scan_chunks)); } - G1RemsetIterState res = Atomic::cmpxchg(Complete, &_iter_states[region], Claimed); - return (res == Claimed); + + size_t const increment_length = G1CollectedHeap::heap()->collection_set()->increment_length(); + + uint const num_workers = !remembered_set_only ? workers->active_workers() : + MIN2(workers->active_workers(), (uint)increment_length); + + { + G1MergeHeapRootsTask cl(this, num_workers, remembered_set_only, merge_phase); + log_debug(gc, ergo)("Running %s using %u workers for " SIZE_FORMAT " regions", + cl.name(), num_workers, increment_length); + workers->run_task(&cl, num_workers); + } + + if (log_is_enabled(Debug, gc, remset)) { + print_merge_heap_roots_stats(); + } } - // Returns true if the region's iteration is complete. - inline bool iter_is_complete(uint region) const { - assert(region < _max_regions, "Tried to access invalid region %u", region); - return _iter_states[region] == Complete; + void set_chunk_region_dirty(size_t const region_card_idx) { + size_t chunk_idx = region_card_idx >> _scan_chunks_shift; + for (uint i = 0; i < _scan_chunks_per_region; i++) { + _region_scan_chunks[chunk_idx++] = true; + } + } + + void set_chunk_dirty(size_t const card_idx) { + assert((card_idx >> _scan_chunks_shift) < (_max_regions * _scan_chunks_per_region), + "Trying to access index " SIZE_FORMAT " out of bounds " SIZE_FORMAT, + card_idx >> _scan_chunks_shift, _max_regions * _scan_chunks_per_region); + size_t const chunk_idx = card_idx >> _scan_chunks_shift; + if (!_region_scan_chunks[chunk_idx]) { + _region_scan_chunks[chunk_idx] = true; + } } - // The current position within the remembered set of the given region. - inline size_t iter_claimed(uint region) const { - assert(region < _max_regions, "Tried to access invalid region %u", region); - return _iter_claims[region]; + void cleanup(WorkGang* workers) { + _all_dirty_regions->merge(_next_dirty_regions); + + clear_card_table(workers); + + delete _all_dirty_regions; + _all_dirty_regions = NULL; + + delete _next_dirty_regions; + _next_dirty_regions = NULL; } - // Claim the next block of cards within the remembered set of the region with - // step size. - inline size_t iter_claimed_next(uint region, size_t step) { - return Atomic::add(step, &_iter_claims[region]) - step; - } + void iterate_dirty_regions_from(HeapRegionClosure* cl, uint worker_id) { + uint num_regions = _next_dirty_regions->size(); - void add_dirty_region(uint region) { - if (_in_dirty_region_buffer[region]) { + if (num_regions == 0) { return; } - if (!Atomic::cmpxchg(true, &_in_dirty_region_buffer[region], false)) { - size_t allocated = Atomic::add(1u, &_cur_dirty_region) - 1; - _dirty_region_buffer[allocated] = region; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + WorkGang* workers = g1h->workers(); + uint const max_workers = workers->active_workers(); + + uint const start_pos = num_regions * worker_id / max_workers; + uint cur = start_pos; + + do { + bool result = cl->do_heap_region(g1h->region_at(_next_dirty_regions->at(cur))); + guarantee(!result, "Not allowed to ask for early termination."); + cur++; + if (cur == _next_dirty_regions->size()) { + cur = 0; + } + } while (cur != start_pos); + } + + // Attempt to claim the given region in the collection set for iteration. Returns true + // if this call caused the transition from Unclaimed to Claimed. + inline bool claim_collection_set_region(uint region) { + assert(region < _max_regions, "Tried to access invalid region %u", region); + if (_collection_set_iter_state[region]) { + return false; } + return !Atomic::cmpxchg(true, &_collection_set_iter_state[region], false); + } + + bool has_cards_to_scan(uint region) { + assert(region < _max_regions, "Tried to access invalid region %u", region); + return _card_table_scan_state[region] < HeapRegion::CardsPerRegion; + } + + uint claim_cards_to_scan(uint region, uint increment) { + assert(region < _max_regions, "Tried to access invalid region %u", region); + return Atomic::add(increment, &_card_table_scan_state[region]) - increment; + } + + void add_dirty_region(uint const region) { +#ifdef ASSERT + HeapRegion* hr = G1CollectedHeap::heap()->region_at(region); + assert(!hr->in_collection_set() && hr->is_old_or_humongous_or_archive(), + "Region %u is not suitable for scanning, is %sin collection set or %s", + hr->hrm_index(), hr->in_collection_set() ? "" : "not ", hr->get_short_type_str()); +#endif + _next_dirty_regions->add_dirty_region(region); + } + + void add_all_dirty_region(uint region) { +#ifdef ASSERT + HeapRegion* hr = G1CollectedHeap::heap()->region_at(region); + assert(hr->in_collection_set(), + "Only add young regions to all dirty regions directly but %u is %s", + hr->hrm_index(), hr->get_short_type_str()); +#endif + _all_dirty_regions->add_dirty_region(region); + } + + void set_scan_top(uint region_idx, HeapWord* value) { + _scan_top[region_idx] = value; } HeapWord* scan_top(uint region_idx) const { @@ -254,30 +731,7 @@ } void clear_scan_top(uint region_idx) { - _scan_top[region_idx] = NULL; - } - - // Clear the card table of "dirty" regions. - void clear_card_table(WorkGang* workers) { - if (_cur_dirty_region == 0) { - return; - } - - size_t const num_chunks = align_up(_cur_dirty_region * HeapRegion::CardsPerRegion, G1ClearCardTableTask::chunk_size()) / G1ClearCardTableTask::chunk_size(); - uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); - size_t const chunk_length = G1ClearCardTableTask::chunk_size() / HeapRegion::CardsPerRegion; - - // Iterate over the dirty cards region list. - G1ClearCardTableTask cl(G1CollectedHeap::heap(), _dirty_region_buffer, _cur_dirty_region, chunk_length); - - log_debug(gc, ergo)("Running %s using %u workers for " SIZE_FORMAT " " - "units of work for " SIZE_FORMAT " regions.", - cl.name(), num_workers, num_chunks, _cur_dirty_region); - workers->run_task(&cl, num_workers); - -#ifndef PRODUCT - G1CollectedHeap::heap()->verifier()->verify_card_table_cleanup(); -#endif + set_scan_top(region_idx, NULL); } }; @@ -294,9 +748,7 @@ } G1RemSet::~G1RemSet() { - if (_scan_state != NULL) { - delete _scan_state; - } + delete _scan_state; } uint G1RemSet::num_par_rem_sets() { @@ -308,181 +760,252 @@ _scan_state->initialize(max_regions); } -class G1ScanRSForRegionClosure : public HeapRegionClosure { +// Helper class to scan and detect ranges of cards that need to be scanned on the +// card table. +class G1CardTableScanner : public StackObj { +public: + typedef CardTable::CardValue CardValue; + +private: + CardValue* const _base_addr; + + CardValue* _cur_addr; + CardValue* const _end_addr; + + static const size_t ToScanMask = G1CardTable::g1_card_already_scanned; + static const size_t ExpandedToScanMask = G1CardTable::WordAlreadyScanned; + + bool cur_addr_aligned() const { + return ((uintptr_t)_cur_addr) % sizeof(size_t) == 0; + } + + bool cur_card_is_dirty() const { + CardValue value = *_cur_addr; + return (value & ToScanMask) == 0; + } + + bool cur_word_of_cards_contains_any_dirty_card() const { + assert(cur_addr_aligned(), "Current address should be aligned"); + size_t const value = *(size_t*)_cur_addr; + return (~value & ExpandedToScanMask) != 0; + } + + bool cur_word_of_cards_all_dirty_cards() const { + size_t const value = *(size_t*)_cur_addr; + return value == G1CardTable::WordAllDirty; + } + + size_t get_and_advance_pos() { + _cur_addr++; + return pointer_delta(_cur_addr, _base_addr, sizeof(CardValue)) - 1; + } + +public: + G1CardTableScanner(CardValue* start_card, size_t size) : + _base_addr(start_card), + _cur_addr(start_card), + _end_addr(start_card + size) { + + assert(is_aligned(start_card, sizeof(size_t)), "Unaligned start addr " PTR_FORMAT, p2i(start_card)); + assert(is_aligned(size, sizeof(size_t)), "Unaligned size " SIZE_FORMAT, size); + } + + size_t find_next_dirty() { + while (!cur_addr_aligned()) { + if (cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + + assert(cur_addr_aligned(), "Current address should be aligned now."); + while (_cur_addr != _end_addr) { + if (cur_word_of_cards_contains_any_dirty_card()) { + for (size_t i = 0; i < sizeof(size_t); i++) { + if (cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + assert(false, "Should not reach here given we detected a dirty card in the word."); + } + _cur_addr += sizeof(size_t); + } + return get_and_advance_pos(); + } + + size_t find_next_non_dirty() { + assert(_cur_addr <= _end_addr, "Not allowed to search for marks after area."); + + while (!cur_addr_aligned()) { + if (!cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + + assert(cur_addr_aligned(), "Current address should be aligned now."); + while (_cur_addr != _end_addr) { + if (!cur_word_of_cards_all_dirty_cards()) { + for (size_t i = 0; i < sizeof(size_t); i++) { + if (!cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + assert(false, "Should not reach here given we detected a non-dirty card in the word."); + } + _cur_addr += sizeof(size_t); + } + return get_and_advance_pos(); + } +}; + +// Helper class to claim dirty chunks within the card table. +class G1CardTableChunkClaimer { + G1RemSetScanState* _scan_state; + uint _region_idx; + uint _cur_claim; + +public: + G1CardTableChunkClaimer(G1RemSetScanState* scan_state, uint region_idx) : + _scan_state(scan_state), + _region_idx(region_idx), + _cur_claim(0) { + guarantee(size() <= HeapRegion::CardsPerRegion, "Should not claim more space than possible."); + } + + bool has_next() { + while (true) { + _cur_claim = _scan_state->claim_cards_to_scan(_region_idx, size()); + if (_cur_claim >= HeapRegion::CardsPerRegion) { + return false; + } + if (_scan_state->chunk_needs_scan(_region_idx, _cur_claim)) { + return true; + } + } + } + + uint value() const { return _cur_claim; } + uint size() const { return _scan_state->scan_chunk_size(); } +}; + +// Scans a heap region for dirty cards. +class G1ScanHRForRegionClosure : public HeapRegionClosure { G1CollectedHeap* _g1h; - G1CardTable *_ct; + G1CardTable* _ct; + G1BlockOffsetTable* _bot; G1ParScanThreadState* _pss; - G1ScanCardClosure* _scan_objs_on_card_cl; G1RemSetScanState* _scan_state; G1GCPhaseTimes::GCParPhases _phase; - uint _worker_i; - - size_t _opt_refs_scanned; - size_t _opt_refs_memory_used; + uint _worker_id; size_t _cards_scanned; - size_t _cards_claimed; - size_t _cards_skipped; + size_t _blocks_scanned; + size_t _chunks_claimed; Tickspan _rem_set_root_scan_time; Tickspan _rem_set_trim_partially_time; - Tickspan _strong_code_root_scan_time; - Tickspan _strong_code_trim_partially_time; - - void claim_card(size_t card_index, const uint region_idx_for_card) { - _ct->set_card_claimed(card_index); - _scan_state->add_dirty_region(region_idx_for_card); - } - - void scan_card(MemRegion mr, uint region_idx_for_card) { + void scan_memregion(uint region_idx_for_card, MemRegion mr) { HeapRegion* const card_region = _g1h->region_at(region_idx_for_card); - assert(!card_region->is_young(), "Should not scan card in young region %u", region_idx_for_card); - card_region->oops_on_card_seq_iterate_careful(mr, _scan_objs_on_card_cl); - _scan_objs_on_card_cl->trim_queue_partially(); - _cards_scanned++; + G1ScanCardClosure card_cl(_g1h, _pss); + card_region->oops_on_card_seq_iterate_careful(mr, &card_cl); + _pss->trim_queue_partially(); } - void scan_opt_rem_set_roots(HeapRegion* r) { - EventGCPhaseParallel event; - - G1OopStarChunkedList* opt_rem_set_list = _pss->oops_into_optional_region(r); - - G1ScanCardClosure scan_cl(_g1h, _pss); - G1ScanRSForOptionalClosure cl(_g1h, &scan_cl); - _opt_refs_scanned += opt_rem_set_list->oops_do(&cl, _pss->closures()->raw_strong_oops()); - _opt_refs_memory_used += opt_rem_set_list->used_memory(); - - event.commit(GCId::current(), _worker_i, G1GCPhaseTimes::phase_name(_phase)); - } - - void scan_rem_set_roots(HeapRegion* r) { - EventGCPhaseParallel event; - uint const region_idx = r->hrm_index(); - - if (_scan_state->claim_iter(region_idx)) { - // If we ever free the collection set concurrently, we should also - // clear the card table concurrently therefore we won't need to - // add regions of the collection set to the dirty cards region. - _scan_state->add_dirty_region(region_idx); - } - - if (r->rem_set()->cardset_is_empty()) { + void do_claimed_block(uint const region_idx_for_card, size_t const first_card, size_t const num_cards) { + HeapWord* const card_start = _bot->address_for_index_raw(first_card); +#ifdef ASSERT + HeapRegion* hr = _g1h->region_at_or_null(region_idx_for_card); + assert(hr == NULL || hr->is_in_reserved(card_start), + "Card start " PTR_FORMAT " to scan outside of region %u", p2i(card_start), _g1h->region_at(region_idx_for_card)->hrm_index()); +#endif + HeapWord* const top = _scan_state->scan_top(region_idx_for_card); + if (card_start >= top) { return; } - // We claim cards in blocks so as to reduce the contention. - size_t const block_size = G1RSetScanBlockSize; - - HeapRegionRemSetIterator iter(r->rem_set()); - size_t card_index; - - size_t claimed_card_block = _scan_state->iter_claimed_next(region_idx, block_size); - for (size_t current_card = 0; iter.has_next(card_index); current_card++) { - if (current_card >= claimed_card_block + block_size) { - claimed_card_block = _scan_state->iter_claimed_next(region_idx, block_size); - } - if (current_card < claimed_card_block) { - _cards_skipped++; - continue; - } - _cards_claimed++; - - HeapWord* const card_start = _g1h->bot()->address_for_index_raw(card_index); - uint const region_idx_for_card = _g1h->addr_to_region(card_start); + MemRegion mr(card_start, MIN2(card_start + ((size_t)num_cards << BOTConstants::LogN_words), top)); + scan_memregion(region_idx_for_card, mr); -#ifdef ASSERT - HeapRegion* hr = _g1h->region_at_or_null(region_idx_for_card); - assert(hr == NULL || hr->is_in_reserved(card_start), - "Card start " PTR_FORMAT " to scan outside of region %u", p2i(card_start), _g1h->region_at(region_idx_for_card)->hrm_index()); -#endif - HeapWord* const top = _scan_state->scan_top(region_idx_for_card); - if (card_start >= top) { - continue; - } + _cards_scanned += num_cards; + } - // If the card is dirty, then G1 will scan it during Update RS. - if (_ct->is_card_claimed(card_index) || _ct->is_card_dirty(card_index)) { - continue; - } - - // We claim lazily (so races are possible but they're benign), which reduces the - // number of duplicate scans (the rsets of the regions in the cset can intersect). - // Claim the card after checking bounds above: the remembered set may contain - // random cards into current survivor, and we would then have an incorrectly - // claimed card in survivor space. Card table clear does not reset the card table - // of survivor space regions. - claim_card(card_index, region_idx_for_card); - - MemRegion const mr(card_start, MIN2(card_start + BOTConstants::N_words, top)); - - scan_card(mr, region_idx_for_card); - } - event.commit(GCId::current(), _worker_i, G1GCPhaseTimes::phase_name(_phase)); + ALWAYSINLINE void do_card_block(uint const region_idx, size_t const first_card, size_t const num_cards) { + _ct->mark_as_scanned(first_card, num_cards); + do_claimed_block(region_idx, first_card, num_cards); + _blocks_scanned++; } - void scan_strong_code_roots(HeapRegion* r) { + void scan_heap_roots(HeapRegion* r) { EventGCPhaseParallel event; - // We pass a weak code blobs closure to the remembered set scanning because we want to avoid - // treating the nmethods visited to act as roots for concurrent marking. - // We only want to make sure that the oops in the nmethods are adjusted with regard to the - // objects copied by the current evacuation. - r->strong_code_roots_do(_pss->closures()->weak_codeblobs()); - event.commit(GCId::current(), _worker_i, G1GCPhaseTimes::phase_name(G1GCPhaseTimes::CodeRoots)); + uint const region_idx = r->hrm_index(); + + ResourceMark rm; + + G1CardTableChunkClaimer claim(_scan_state, region_idx); + + while (claim.has_next()) { + size_t const region_card_base_idx = ((size_t)region_idx << HeapRegion::LogCardsPerRegion) + claim.value(); + CardTable::CardValue* const base_addr = _ct->byte_for_index(region_card_base_idx); + + G1CardTableScanner scan(base_addr, claim.size()); + + size_t first_scan_idx = scan.find_next_dirty(); + while (first_scan_idx != claim.size()) { + assert(*_ct->byte_for_index(region_card_base_idx + first_scan_idx) <= 0x1, "is %d at region %u idx " SIZE_FORMAT, *_ct->byte_for_index(region_card_base_idx + first_scan_idx), region_idx, first_scan_idx); + + size_t const last_scan_idx = scan.find_next_non_dirty(); + size_t const len = last_scan_idx - first_scan_idx; + + do_card_block(region_idx, region_card_base_idx + first_scan_idx, len); + + if (last_scan_idx == claim.size()) { + break; + } + + first_scan_idx = scan.find_next_dirty(); + } + _chunks_claimed++; + } + + event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(G1GCPhaseTimes::ScanHR)); } public: - G1ScanRSForRegionClosure(G1RemSetScanState* scan_state, - G1ScanCardClosure* scan_obj_on_card, + G1ScanHRForRegionClosure(G1RemSetScanState* scan_state, G1ParScanThreadState* pss, - G1GCPhaseTimes::GCParPhases phase, - uint worker_i) : + uint worker_id, + G1GCPhaseTimes::GCParPhases phase) : _g1h(G1CollectedHeap::heap()), _ct(_g1h->card_table()), + _bot(_g1h->bot()), _pss(pss), - _scan_objs_on_card_cl(scan_obj_on_card), _scan_state(scan_state), _phase(phase), - _worker_i(worker_i), - _opt_refs_scanned(0), - _opt_refs_memory_used(0), + _worker_id(worker_id), _cards_scanned(0), - _cards_claimed(0), - _cards_skipped(0), + _blocks_scanned(0), + _chunks_claimed(0), _rem_set_root_scan_time(), - _rem_set_trim_partially_time(), - _strong_code_root_scan_time(), - _strong_code_trim_partially_time() { } + _rem_set_trim_partially_time() { + } bool do_heap_region(HeapRegion* r) { - assert(r->in_collection_set(), "Region %u is not in the collection set.", r->hrm_index()); + assert(!r->in_collection_set() && r->is_old_or_humongous_or_archive(), + "Should only be called on old gen non-collection set regions but region %u is not.", + r->hrm_index()); uint const region_idx = r->hrm_index(); - // The individual references for the optional remembered set are per-worker, so we - // always need to scan them. - if (r->has_index_in_opt_cset()) { + if (_scan_state->has_cards_to_scan(region_idx)) { G1EvacPhaseWithTrimTimeTracker timer(_pss, _rem_set_root_scan_time, _rem_set_trim_partially_time); - scan_opt_rem_set_roots(r); - } - - // Do an early out if we know we are complete. - if (_scan_state->iter_is_complete(region_idx)) { - return false; - } - - { - G1EvacPhaseWithTrimTimeTracker timer(_pss, _rem_set_root_scan_time, _rem_set_trim_partially_time); - scan_rem_set_roots(r); - } - - if (_scan_state->set_iter_complete(region_idx)) { - G1EvacPhaseWithTrimTimeTracker timer(_pss, _strong_code_root_scan_time, _strong_code_trim_partially_time); - // Scan the strong code root list attached to the current region - scan_strong_code_roots(r); + scan_heap_roots(r); } return false; } @@ -490,120 +1013,156 @@ Tickspan rem_set_root_scan_time() const { return _rem_set_root_scan_time; } Tickspan rem_set_trim_partially_time() const { return _rem_set_trim_partially_time; } + size_t cards_scanned() const { return _cards_scanned; } + size_t blocks_scanned() const { return _blocks_scanned; } + size_t chunks_claimed() const { return _chunks_claimed; } +}; + +void G1RemSet::scan_heap_roots(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase) { + G1ScanHRForRegionClosure cl(_scan_state, pss, worker_id, scan_phase); + _scan_state->iterate_dirty_regions_from(&cl, worker_id); + + G1GCPhaseTimes* p = _g1p->phase_times(); + + p->record_or_add_time_secs(objcopy_phase, worker_id, cl.rem_set_trim_partially_time().seconds()); + + p->record_or_add_time_secs(scan_phase, worker_id, cl.rem_set_root_scan_time().seconds()); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.cards_scanned(), G1GCPhaseTimes::ScanHRScannedCards); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.blocks_scanned(), G1GCPhaseTimes::ScanHRScannedBlocks); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.chunks_claimed(), G1GCPhaseTimes::ScanHRClaimedChunks); +} + +// Heap region closure to be applied to all regions in the current collection set +// increment to fix up non-card related roots. +class G1ScanCollectionSetRegionClosure : public HeapRegionClosure { + G1ParScanThreadState* _pss; + G1RemSetScanState* _scan_state; + + G1GCPhaseTimes::GCParPhases _scan_phase; + G1GCPhaseTimes::GCParPhases _code_roots_phase; + + uint _worker_id; + + size_t _opt_refs_scanned; + size_t _opt_refs_memory_used; + + Tickspan _strong_code_root_scan_time; + Tickspan _strong_code_trim_partially_time; + + Tickspan _rem_set_opt_root_scan_time; + Tickspan _rem_set_opt_trim_partially_time; + + void scan_opt_rem_set_roots(HeapRegion* r) { + EventGCPhaseParallel event; + + G1OopStarChunkedList* opt_rem_set_list = _pss->oops_into_optional_region(r); + + G1ScanCardClosure scan_cl(G1CollectedHeap::heap(), _pss); + G1ScanRSForOptionalClosure cl(G1CollectedHeap::heap(), &scan_cl); + _opt_refs_scanned += opt_rem_set_list->oops_do(&cl, _pss->closures()->raw_strong_oops()); + _opt_refs_memory_used += opt_rem_set_list->used_memory(); + + event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(_scan_phase)); + } + +public: + G1ScanCollectionSetRegionClosure(G1RemSetScanState* scan_state, + G1ParScanThreadState* pss, + uint worker_i, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases code_roots_phase) : + _pss(pss), + _scan_state(scan_state), + _scan_phase(scan_phase), + _code_roots_phase(code_roots_phase), + _worker_id(worker_i), + _opt_refs_scanned(0), + _opt_refs_memory_used(0), + _strong_code_root_scan_time(), + _strong_code_trim_partially_time(), + _rem_set_opt_root_scan_time(), + _rem_set_opt_trim_partially_time() { } + + bool do_heap_region(HeapRegion* r) { + uint const region_idx = r->hrm_index(); + + // The individual references for the optional remembered set are per-worker, so we + // always need to scan them. + if (r->has_index_in_opt_cset()) { + G1EvacPhaseWithTrimTimeTracker timer(_pss, _rem_set_opt_root_scan_time, _rem_set_opt_trim_partially_time); + scan_opt_rem_set_roots(r); + } + + if (_scan_state->claim_collection_set_region(region_idx)) { + EventGCPhaseParallel event; + + G1EvacPhaseWithTrimTimeTracker timer(_pss, _strong_code_root_scan_time, _strong_code_trim_partially_time); + // Scan the strong code root list attached to the current region + r->strong_code_roots_do(_pss->closures()->weak_codeblobs()); + + event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(_code_roots_phase)); + } + + return false; + } + Tickspan strong_code_root_scan_time() const { return _strong_code_root_scan_time; } Tickspan strong_code_root_trim_partially_time() const { return _strong_code_trim_partially_time; } - size_t cards_scanned() const { return _cards_scanned; } - size_t cards_claimed() const { return _cards_claimed; } - size_t cards_skipped() const { return _cards_skipped; } + Tickspan rem_set_opt_root_scan_time() const { return _rem_set_opt_root_scan_time; } + Tickspan rem_set_opt_trim_partially_time() const { return _rem_set_opt_trim_partially_time; } size_t opt_refs_scanned() const { return _opt_refs_scanned; } size_t opt_refs_memory_used() const { return _opt_refs_memory_used; } }; -void G1RemSet::scan_rem_set(G1ParScanThreadState* pss, - uint worker_i, - G1GCPhaseTimes::GCParPhases scan_phase, - G1GCPhaseTimes::GCParPhases objcopy_phase, - G1GCPhaseTimes::GCParPhases coderoots_phase) { - assert(pss->trim_ticks().value() == 0, "Queues must have been trimmed before entering."); - - G1ScanCardClosure scan_cl(_g1h, pss); - G1ScanRSForRegionClosure cl(_scan_state, &scan_cl, pss, scan_phase, worker_i); - _g1h->collection_set_iterate_increment_from(&cl, worker_i); - - G1GCPhaseTimes* p = _g1p->phase_times(); - - p->record_or_add_time_secs(objcopy_phase, worker_i, cl.rem_set_trim_partially_time().seconds()); +void G1RemSet::scan_collection_set_regions(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases coderoots_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase) { + G1ScanCollectionSetRegionClosure cl(_scan_state, pss, worker_id, scan_phase, coderoots_phase); + _g1h->collection_set_iterate_increment_from(&cl, worker_id); - p->record_or_add_time_secs(scan_phase, worker_i, cl.rem_set_root_scan_time().seconds()); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.cards_scanned(), G1GCPhaseTimes::ScanRSScannedCards); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.cards_claimed(), G1GCPhaseTimes::ScanRSClaimedCards); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.cards_skipped(), G1GCPhaseTimes::ScanRSSkippedCards); - // At this time we only record some metrics for the optional remembered set. - if (scan_phase == G1GCPhaseTimes::OptScanRS) { - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.opt_refs_scanned(), G1GCPhaseTimes::ScanRSScannedOptRefs); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.opt_refs_memory_used(), G1GCPhaseTimes::ScanRSUsedMemory); - } - - p->record_or_add_time_secs(coderoots_phase, worker_i, cl.strong_code_root_scan_time().seconds()); - p->add_time_secs(objcopy_phase, worker_i, cl.strong_code_root_trim_partially_time().seconds()); -} - -// Closure used for updating rem sets. Only called during an evacuation pause. -class G1RefineCardClosure: public G1CardTableEntryClosure { - G1RemSet* _g1rs; - G1ScanCardClosure* _update_rs_cl; - - size_t _cards_scanned; - size_t _cards_skipped; -public: - G1RefineCardClosure(G1CollectedHeap* g1h, G1ScanCardClosure* update_rs_cl) : - _g1rs(g1h->rem_set()), _update_rs_cl(update_rs_cl), _cards_scanned(0), _cards_skipped(0) - {} + G1GCPhaseTimes* p = _g1h->phase_times(); - bool do_card_ptr(CardValue* card_ptr, uint worker_i) { - // The only time we care about recording cards that - // contain references that point into the collection set - // is during RSet updating within an evacuation pause. - // In this case worker_i should be the id of a GC worker thread. - assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); - - bool card_scanned = _g1rs->refine_card_during_gc(card_ptr, _update_rs_cl); - - if (card_scanned) { - _update_rs_cl->trim_queue_partially(); - _cards_scanned++; - } else { - _cards_skipped++; - } - return true; - } - - size_t cards_scanned() const { return _cards_scanned; } - size_t cards_skipped() const { return _cards_skipped; } -}; + p->record_or_add_time_secs(scan_phase, worker_id, cl.rem_set_opt_root_scan_time().seconds()); + p->record_or_add_time_secs(scan_phase, worker_id, cl.rem_set_opt_trim_partially_time().seconds()); -void G1RemSet::update_rem_set(G1ParScanThreadState* pss, uint worker_i) { - G1GCPhaseTimes* p = _g1p->phase_times(); - - // Apply closure to log entries in the HCC. - if (G1HotCardCache::default_use_cache()) { - G1EvacPhaseTimesTracker x(p, pss, G1GCPhaseTimes::ScanHCC, worker_i); + p->record_or_add_time_secs(coderoots_phase, worker_id, cl.strong_code_root_scan_time().seconds()); + p->add_time_secs(objcopy_phase, worker_id, cl.strong_code_root_trim_partially_time().seconds()); - G1ScanCardClosure scan_hcc_cl(_g1h, pss); - G1RefineCardClosure refine_card_cl(_g1h, &scan_hcc_cl); - _g1h->iterate_hcc_closure(&refine_card_cl, worker_i); - } - - // Now apply the closure to all remaining log entries. - { - G1EvacPhaseTimesTracker x(p, pss, G1GCPhaseTimes::UpdateRS, worker_i); - - G1ScanCardClosure update_rs_cl(_g1h, pss); - G1RefineCardClosure refine_card_cl(_g1h, &update_rs_cl); - _g1h->iterate_dirty_card_closure(&refine_card_cl, worker_i); - - p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_scanned(), G1GCPhaseTimes::UpdateRSScannedCards); - p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_skipped(), G1GCPhaseTimes::UpdateRSSkippedCards); + // At this time we record some metrics only for the evacuations after the initial one. + if (scan_phase == G1GCPhaseTimes::OptScanHR) { + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.opt_refs_scanned(), G1GCPhaseTimes::ScanHRScannedOptRefs); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.opt_refs_memory_used(), G1GCPhaseTimes::ScanHRUsedMemory); } } -void G1RemSet::prepare_for_scan_rem_set() { - G1BarrierSet::dirty_card_queue_set().concatenate_logs(); - _scan_state->reset(); +void G1RemSet::prepare_for_scan_heap_roots() { + G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); + dcqs.concatenate_logs(); + + _scan_state->prepare(); } -void G1RemSet::prepare_for_scan_rem_set(uint region_idx) { +void G1RemSet::merge_heap_roots(bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase) { + _scan_state->merge_heap_roots(_g1h->workers(), remembered_set_only, merge_phase); +} + +void G1RemSet::prepare_for_scan_heap_roots(uint region_idx) { _scan_state->clear_scan_top(region_idx); } -void G1RemSet::cleanup_after_scan_rem_set() { +void G1RemSet::cleanup_after_scan_heap_roots() { G1GCPhaseTimes* phase_times = _g1h->phase_times(); // Set all cards back to clean. double start = os::elapsedTime(); - _scan_state->clear_card_table(_g1h->workers()); + _scan_state->cleanup(_g1h->workers()); phase_times->record_clear_ct_time((os::elapsedTime() - start) * 1000.0); } @@ -759,53 +1318,6 @@ G1BarrierSet::shared_dirty_card_queue().enqueue(card_ptr); } -bool G1RemSet::refine_card_during_gc(CardValue* card_ptr, - G1ScanCardClosure* update_rs_cl) { - assert(_g1h->is_gc_active(), "Only call during GC"); - - // Construct the region representing the card. - HeapWord* card_start = _ct->addr_for(card_ptr); - // And find the region containing it. - uint const card_region_idx = _g1h->addr_to_region(card_start); - - HeapWord* scan_limit = _scan_state->scan_top(card_region_idx); - if (scan_limit == NULL) { - // This is a card into an uncommitted region. We need to bail out early as we - // should not access the corresponding card table entry. - return false; - } - - check_card_ptr(card_ptr, _ct); - - // If the card is no longer dirty, nothing to do. This covers cards that were already - // scanned as parts of the remembered sets. - if (*card_ptr != G1CardTable::dirty_card_val()) { - return false; - } - - // We claim lazily (so races are possible but they're benign), which reduces the - // number of potential duplicate scans (multiple threads may enqueue the same card twice). - *card_ptr = G1CardTable::clean_card_val() | G1CardTable::claimed_card_val(); - - _scan_state->add_dirty_region(card_region_idx); - if (scan_limit <= card_start) { - // If the card starts above the area in the region containing objects to scan, skip it. - return false; - } - - // Don't use addr_for(card_ptr + 1) which can ask for - // a card beyond the heap. - HeapWord* card_end = card_start + G1CardTable::card_size_in_words; - MemRegion dirty_region(card_start, MIN2(scan_limit, card_end)); - assert(!dirty_region.is_empty(), "sanity"); - - HeapRegion* const card_region = _g1h->region_at(card_region_idx); - assert(!card_region->is_young(), "Should not scan card in young region %u", card_region_idx); - bool card_processed = card_region->oops_on_card_seq_iterate_careful(dirty_region, update_rs_cl); - assert(card_processed, "must be"); - return true; -} - void G1RemSet::print_periodic_summary_info(const char* header, uint period_count) { if ((G1SummarizeRSetStatsPeriod > 0) && log_is_enabled(Trace, gc, remset) && (period_count % G1SummarizeRSetStatsPeriod == 0)) { diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/g1RemSet.hpp --- a/src/hotspot/share/gc/g1/g1RemSet.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSet.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -46,6 +46,7 @@ class G1HotCardCache; class G1RemSetScanState; class G1ParScanThreadState; +class G1ParScanThreadStateSet; class G1Policy; class G1ScanCardClosure; class HeapRegionClaimer; @@ -84,39 +85,39 @@ G1HotCardCache* hot_card_cache); ~G1RemSet(); - // Scan all remembered sets of the collection set for references into the collection - // set. - // Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code - // roots list for each region in the collection set. - void scan_rem_set(G1ParScanThreadState* pss, - uint worker_i, - G1GCPhaseTimes::GCParPhases scan_phase, - G1GCPhaseTimes::GCParPhases objcopy_phase, - G1GCPhaseTimes::GCParPhases coderoots_phase); + // Scan all cards in the non-collection set regions that potentially contain + // references into the current whole collection set. + void scan_heap_roots(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase); + + // Merge cards from various sources (remembered sets, hot card cache, log buffers) + // and calculate the cards that need to be scanned later (via scan_heap_roots()). + // If remembered_set_only is set, only merge remembered set cards. + void merge_heap_roots(bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase); - // Flush remaining refinement buffers for cross-region references to either evacuate references - // into the collection set or update the remembered set. - void update_rem_set(G1ParScanThreadState* pss, uint worker_i); - - // Prepare for and cleanup after scanning the remembered sets. Must be called + // Prepare for and cleanup after scanning the heap roots. Must be called // once before and after in sequential code. - void prepare_for_scan_rem_set(); - void cleanup_after_scan_rem_set(); - // Prepares the given region for remembered set scanning. - void prepare_for_scan_rem_set(uint region_idx); + void prepare_for_scan_heap_roots(); + // Cleans the card table from temporary duplicate detection information. + void cleanup_after_scan_heap_roots(); + // Prepares the given region for heap root scanning. + void prepare_for_scan_heap_roots(uint region_idx); - G1RemSetScanState* scan_state() const { return _scan_state; } + // Do work for regions in the current increment of the collection set, scanning + // non-card based (heap) roots. + void scan_collection_set_regions(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases coderoots_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase); // Refine the card corresponding to "card_ptr". Safe to be called concurrently // to the mutator. void refine_card_concurrently(CardValue* card_ptr, uint worker_i); - // Refine the card corresponding to "card_ptr", applying the given closure to - // all references found. Must only be called during gc. - // Returns whether the card has been scanned. - bool refine_card_during_gc(CardValue* card_ptr, G1ScanCardClosure* update_rs_cl); - // Print accumulated summary info from the start of the VM. void print_summary_info(); diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/heapRegion.cpp --- a/src/hotspot/share/gc/g1/heapRegion.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/heapRegion.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -49,6 +49,7 @@ int HeapRegion::LogOfHRGrainBytes = 0; int HeapRegion::LogOfHRGrainWords = 0; +int HeapRegion::LogCardsPerRegion = 0; size_t HeapRegion::GrainBytes = 0; size_t HeapRegion::GrainWords = 0; size_t HeapRegion::CardsPerRegion = 0; @@ -105,6 +106,8 @@ guarantee(CardsPerRegion == 0, "we should only set it once"); CardsPerRegion = GrainBytes >> G1CardTable::card_shift; + LogCardsPerRegion = log2_long((jlong) CardsPerRegion); + if (G1HeapRegionSize != GrainBytes) { FLAG_SET_ERGO(G1HeapRegionSize, GrainBytes); } diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/heapRegion.hpp --- a/src/hotspot/share/gc/g1/heapRegion.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/heapRegion.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -60,7 +60,6 @@ class G1CMBitMap; class G1IsAliveAndApplyClosure; class HeapRegionRemSet; -class HeapRegionRemSetIterator; class HeapRegion; class HeapRegionSetBase; class nmethod; @@ -315,6 +314,7 @@ static int LogOfHRGrainBytes; static int LogOfHRGrainWords; + static int LogCardsPerRegion; static size_t GrainBytes; static size_t GrainWords; diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/heapRegionRemSet.cpp --- a/src/hotspot/share/gc/g1/heapRegionRemSet.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -27,7 +27,7 @@ #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1ConcurrentRefine.hpp" #include "gc/g1/heapRegionManager.inline.hpp" -#include "gc/g1/heapRegionRemSet.hpp" +#include "gc/g1/heapRegionRemSet.inline.hpp" #include "gc/shared/space.inline.hpp" #include "memory/allocation.hpp" #include "memory/padded.inline.hpp" @@ -42,195 +42,21 @@ const char* HeapRegionRemSet::_state_strings[] = {"Untracked", "Updating", "Complete"}; const char* HeapRegionRemSet::_short_state_strings[] = {"UNTRA", "UPDAT", "CMPLT"}; -class PerRegionTable: public CHeapObj { - friend class OtherRegionsTable; - friend class HeapRegionRemSetIterator; - - HeapRegion* _hr; - CHeapBitMap _bm; - jint _occupied; - - // next pointer for free/allocated 'all' list - PerRegionTable* _next; - - // prev pointer for the allocated 'all' list - PerRegionTable* _prev; - - // next pointer in collision list - PerRegionTable * _collision_list_next; - - // Global free list of PRTs - static PerRegionTable* volatile _free_list; - -protected: - // We need access in order to union things into the base table. - BitMap* bm() { return &_bm; } - - PerRegionTable(HeapRegion* hr) : - _hr(hr), - _bm(HeapRegion::CardsPerRegion, mtGC), - _occupied(0), - _next(NULL), _prev(NULL), - _collision_list_next(NULL) - {} - - void add_card_work(CardIdx_t from_card, bool par) { - if (!_bm.at(from_card)) { - if (par) { - if (_bm.par_at_put(from_card, 1)) { - Atomic::inc(&_occupied); - } - } else { - _bm.at_put(from_card, 1); - _occupied++; - } - } - } - - void add_reference_work(OopOrNarrowOopStar from, bool par) { - // Must make this robust in case "from" is not in "_hr", because of - // concurrency. - - HeapRegion* loc_hr = hr(); - // If the test below fails, then this table was reused concurrently - // with this operation. This is OK, since the old table was coarsened, - // and adding a bit to the new table is never incorrect. - if (loc_hr->is_in_reserved(from)) { - CardIdx_t from_card = OtherRegionsTable::card_within_region(from, loc_hr); - add_card_work(from_card, par); +PerRegionTable* PerRegionTable::alloc(HeapRegion* hr) { + PerRegionTable* fl = _free_list; + while (fl != NULL) { + PerRegionTable* nxt = fl->next(); + PerRegionTable* res = Atomic::cmpxchg(nxt, &_free_list, fl); + if (res == fl) { + fl->init(hr, true); + return fl; + } else { + fl = _free_list; } } - -public: - - HeapRegion* hr() const { return OrderAccess::load_acquire(&_hr); } - - jint occupied() const { - // Overkill, but if we ever need it... - // guarantee(_occupied == _bm.count_one_bits(), "Check"); - return _occupied; - } - - void init(HeapRegion* hr, bool clear_links_to_all_list) { - if (clear_links_to_all_list) { - set_next(NULL); - set_prev(NULL); - } - _collision_list_next = NULL; - _occupied = 0; - _bm.clear(); - // Make sure that the bitmap clearing above has been finished before publishing - // this PRT to concurrent threads. - OrderAccess::release_store(&_hr, hr); - } - - void add_reference(OopOrNarrowOopStar from) { - add_reference_work(from, /*parallel*/ true); - } - - void seq_add_reference(OopOrNarrowOopStar from) { - add_reference_work(from, /*parallel*/ false); - } - - void add_card(CardIdx_t from_card_index) { - add_card_work(from_card_index, /*parallel*/ true); - } - - void seq_add_card(CardIdx_t from_card_index) { - add_card_work(from_card_index, /*parallel*/ false); - } - - // (Destructively) union the bitmap of the current table into the given - // bitmap (which is assumed to be of the same size.) - void union_bitmap_into(BitMap* bm) { - bm->set_union(_bm); - } - - // Mem size in bytes. - size_t mem_size() const { - return sizeof(PerRegionTable) + _bm.size_in_words() * HeapWordSize; - } - - // Requires "from" to be in "hr()". - bool contains_reference(OopOrNarrowOopStar from) const { - assert(hr()->is_in_reserved(from), "Precondition."); - size_t card_ind = pointer_delta(from, hr()->bottom(), - G1CardTable::card_size); - return _bm.at(card_ind); - } - - // Bulk-free the PRTs from prt to last, assumes that they are - // linked together using their _next field. - static void bulk_free(PerRegionTable* prt, PerRegionTable* last) { - while (true) { - PerRegionTable* fl = _free_list; - last->set_next(fl); - PerRegionTable* res = Atomic::cmpxchg(prt, &_free_list, fl); - if (res == fl) { - return; - } - } - ShouldNotReachHere(); - } - - static void free(PerRegionTable* prt) { - bulk_free(prt, prt); - } - - // Returns an initialized PerRegionTable instance. - static PerRegionTable* alloc(HeapRegion* hr) { - PerRegionTable* fl = _free_list; - while (fl != NULL) { - PerRegionTable* nxt = fl->next(); - PerRegionTable* res = Atomic::cmpxchg(nxt, &_free_list, fl); - if (res == fl) { - fl->init(hr, true); - return fl; - } else { - fl = _free_list; - } - } - assert(fl == NULL, "Loop condition."); - return new PerRegionTable(hr); - } - - PerRegionTable* next() const { return _next; } - void set_next(PerRegionTable* next) { _next = next; } - PerRegionTable* prev() const { return _prev; } - void set_prev(PerRegionTable* prev) { _prev = prev; } - - // Accessor and Modification routines for the pointer for the - // singly linked collision list that links the PRTs within the - // OtherRegionsTable::_fine_grain_regions hash table. - // - // It might be useful to also make the collision list doubly linked - // to avoid iteration over the collisions list during scrubbing/deletion. - // OTOH there might not be many collisions. - - PerRegionTable* collision_list_next() const { - return _collision_list_next; - } - - void set_collision_list_next(PerRegionTable* next) { - _collision_list_next = next; - } - - PerRegionTable** collision_list_next_addr() { - return &_collision_list_next; - } - - static size_t fl_mem_size() { - PerRegionTable* cur = _free_list; - size_t res = 0; - while (cur != NULL) { - res += cur->mem_size(); - cur = cur->next(); - } - return res; - } - - static void test_fl_mem_size(); -}; + assert(fl == NULL, "Loop condition."); + return new PerRegionTable(hr); +} PerRegionTable* volatile PerRegionTable::_free_list = NULL; @@ -696,175 +522,3 @@ size_t HeapRegionRemSet::strong_code_roots_mem_size() { return _code_roots.mem_size(); } - -HeapRegionRemSetIterator:: HeapRegionRemSetIterator(HeapRegionRemSet* hrrs) : - _hrrs(hrrs), - _coarse_map(&hrrs->_other_regions._coarse_map), - _bot(hrrs->_bot), - _g1h(G1CollectedHeap::heap()), - _n_yielded_fine(0), - _n_yielded_coarse(0), - _n_yielded_sparse(0), - _is(Sparse), - _cur_region_card_offset(0), - // Set these values so that we increment to the first region. - _coarse_cur_region_index(-1), - _coarse_cur_region_cur_card(HeapRegion::CardsPerRegion-1), - _fine_cur_prt(NULL), - _cur_card_in_prt(HeapRegion::CardsPerRegion), - _sparse_iter(&hrrs->_other_regions._sparse_table) {} - -bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) { - if (_hrrs->_other_regions._n_coarse_entries == 0) return false; - // Go to the next card. - _coarse_cur_region_cur_card++; - // Was the last the last card in the current region? - if (_coarse_cur_region_cur_card == HeapRegion::CardsPerRegion) { - // Yes: find the next region. This may leave _coarse_cur_region_index - // Set to the last index, in which case there are no more coarse - // regions. - _coarse_cur_region_index = - (int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1); - if ((size_t)_coarse_cur_region_index < _coarse_map->size()) { - _coarse_cur_region_cur_card = 0; - HeapWord* r_bot = - _g1h->region_at((uint) _coarse_cur_region_index)->bottom(); - _cur_region_card_offset = _bot->index_for_raw(r_bot); - } else { - return false; - } - } - // If we didn't return false above, then we can yield a card. - card_index = _cur_region_card_offset + _coarse_cur_region_cur_card; - return true; -} - -bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) { - if (fine_has_next()) { - _cur_card_in_prt = - _fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1); - } - if (_cur_card_in_prt == HeapRegion::CardsPerRegion) { - // _fine_cur_prt may still be NULL in case if there are not PRTs at all for - // the remembered set. - if (_fine_cur_prt == NULL || _fine_cur_prt->next() == NULL) { - return false; - } - PerRegionTable* next_prt = _fine_cur_prt->next(); - switch_to_prt(next_prt); - _cur_card_in_prt = _fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1); - } - - card_index = _cur_region_card_offset + _cur_card_in_prt; - guarantee(_cur_card_in_prt < HeapRegion::CardsPerRegion, - "Card index " SIZE_FORMAT " must be within the region", _cur_card_in_prt); - return true; -} - -bool HeapRegionRemSetIterator::fine_has_next() { - return _cur_card_in_prt != HeapRegion::CardsPerRegion; -} - -void HeapRegionRemSetIterator::switch_to_prt(PerRegionTable* prt) { - assert(prt != NULL, "Cannot switch to NULL prt"); - _fine_cur_prt = prt; - - HeapWord* r_bot = _fine_cur_prt->hr()->bottom(); - _cur_region_card_offset = _bot->index_for_raw(r_bot); - - // The bitmap scan for the PRT always scans from _cur_region_cur_card + 1. - // To avoid special-casing this start case, and not miss the first bitmap - // entry, initialize _cur_region_cur_card with -1 instead of 0. - _cur_card_in_prt = (size_t)-1; -} - -bool HeapRegionRemSetIterator::has_next(size_t& card_index) { - switch (_is) { - case Sparse: { - if (_sparse_iter.has_next(card_index)) { - _n_yielded_sparse++; - return true; - } - // Otherwise, deliberate fall-through - _is = Fine; - PerRegionTable* initial_fine_prt = _hrrs->_other_regions._first_all_fine_prts; - if (initial_fine_prt != NULL) { - switch_to_prt(_hrrs->_other_regions._first_all_fine_prts); - } - } - case Fine: - if (fine_has_next(card_index)) { - _n_yielded_fine++; - return true; - } - // Otherwise, deliberate fall-through - _is = Coarse; - case Coarse: - if (coarse_has_next(card_index)) { - _n_yielded_coarse++; - return true; - } - // Otherwise... - break; - } - return false; -} - -#ifndef PRODUCT -void HeapRegionRemSet::test() { - os::sleep(Thread::current(), (jlong)5000, false); - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - - // Run with "-XX:G1LogRSetRegionEntries=2", so that 1 and 5 end up in same - // hash bucket. - HeapRegion* hr0 = g1h->region_at(0); - HeapRegion* hr1 = g1h->region_at(1); - HeapRegion* hr2 = g1h->region_at(5); - HeapRegion* hr3 = g1h->region_at(6); - HeapRegion* hr4 = g1h->region_at(7); - HeapRegion* hr5 = g1h->region_at(8); - - HeapWord* hr1_start = hr1->bottom(); - HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2; - HeapWord* hr1_last = hr1->end() - 1; - - HeapWord* hr2_start = hr2->bottom(); - HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2; - HeapWord* hr2_last = hr2->end() - 1; - - HeapWord* hr3_start = hr3->bottom(); - HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2; - HeapWord* hr3_last = hr3->end() - 1; - - HeapRegionRemSet* hrrs = hr0->rem_set(); - - // Make three references from region 0x101... - hrrs->add_reference((OopOrNarrowOopStar)hr1_start); - hrrs->add_reference((OopOrNarrowOopStar)hr1_mid); - hrrs->add_reference((OopOrNarrowOopStar)hr1_last); - - hrrs->add_reference((OopOrNarrowOopStar)hr2_start); - hrrs->add_reference((OopOrNarrowOopStar)hr2_mid); - hrrs->add_reference((OopOrNarrowOopStar)hr2_last); - - hrrs->add_reference((OopOrNarrowOopStar)hr3_start); - hrrs->add_reference((OopOrNarrowOopStar)hr3_mid); - hrrs->add_reference((OopOrNarrowOopStar)hr3_last); - - // Now cause a coarsening. - hrrs->add_reference((OopOrNarrowOopStar)hr4->bottom()); - hrrs->add_reference((OopOrNarrowOopStar)hr5->bottom()); - - // Now, does iteration yield these three? - HeapRegionRemSetIterator iter(hrrs); - size_t sum = 0; - size_t card_index; - while (iter.has_next(card_index)) { - HeapWord* card_start = g1h->bot()->address_for_index(card_index); - tty->print_cr(" Card " PTR_FORMAT ".", p2i(card_start)); - sum++; - } - guarantee(sum == 11 - 3 + 2048, "Failure"); - guarantee(sum == hrrs->occupied(), "Failure"); -} -#endif diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/heapRegionRemSet.hpp --- a/src/hotspot/share/gc/g1/heapRegionRemSet.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -28,6 +28,7 @@ #include "gc/g1/g1CodeCacheRemSet.hpp" #include "gc/g1/g1FromCardCache.hpp" #include "gc/g1/sparsePRT.hpp" +#include "utilities/bitMap.hpp" // Remembered set for a heap region. Represent a set of "cards" that // contain pointers into the owner heap region. Cards are defined somewhat @@ -37,7 +38,6 @@ class G1BlockOffsetTable; class G1CardLiveData; class HeapRegion; -class HeapRegionRemSetIterator; class PerRegionTable; class SparsePRT; class nmethod; @@ -67,8 +67,6 @@ // thinking the PRT is for a different region, does no harm. class OtherRegionsTable { - friend class HeapRegionRemSetIterator; - G1CollectedHeap* _g1h; Mutex* _m; @@ -125,6 +123,9 @@ // Create a new remembered set. The given mutex is used to ensure consistency. OtherRegionsTable(Mutex* m); + template + void iterate(Closure& v); + // Returns the card index of the given within_region pointer relative to the bottom // of the given heap region. static CardIdx_t card_within_region(OopOrNarrowOopStar within_region, HeapRegion* hr); @@ -157,9 +158,140 @@ void clear(); }; +class PerRegionTable: public CHeapObj { + friend class OtherRegionsTable; + + HeapRegion* _hr; + CHeapBitMap _bm; + jint _occupied; + + // next pointer for free/allocated 'all' list + PerRegionTable* _next; + + // prev pointer for the allocated 'all' list + PerRegionTable* _prev; + + // next pointer in collision list + PerRegionTable * _collision_list_next; + + // Global free list of PRTs + static PerRegionTable* volatile _free_list; + +protected: + PerRegionTable(HeapRegion* hr) : + _hr(hr), + _bm(HeapRegion::CardsPerRegion, mtGC), + _occupied(0), + _next(NULL), _prev(NULL), + _collision_list_next(NULL) + {} + + inline void add_card_work(CardIdx_t from_card, bool par); + + inline void add_reference_work(OopOrNarrowOopStar from, bool par); + +public: + // We need access in order to union things into the base table. + BitMap* bm() { return &_bm; } + + HeapRegion* hr() const { return OrderAccess::load_acquire(&_hr); } + + jint occupied() const { + // Overkill, but if we ever need it... + // guarantee(_occupied == _bm.count_one_bits(), "Check"); + return _occupied; + } + + void init(HeapRegion* hr, bool clear_links_to_all_list); + + inline void add_reference(OopOrNarrowOopStar from); + + inline void seq_add_reference(OopOrNarrowOopStar from); + + inline void add_card(CardIdx_t from_card_index); + + void seq_add_card(CardIdx_t from_card_index); + + // (Destructively) union the bitmap of the current table into the given + // bitmap (which is assumed to be of the same size.) + void union_bitmap_into(BitMap* bm) { + bm->set_union(_bm); + } + + // Mem size in bytes. + size_t mem_size() const { + return sizeof(PerRegionTable) + _bm.size_in_words() * HeapWordSize; + } + + // Requires "from" to be in "hr()". + bool contains_reference(OopOrNarrowOopStar from) const { + assert(hr()->is_in_reserved(from), "Precondition."); + size_t card_ind = pointer_delta(from, hr()->bottom(), + G1CardTable::card_size); + return _bm.at(card_ind); + } + + // Bulk-free the PRTs from prt to last, assumes that they are + // linked together using their _next field. + static void bulk_free(PerRegionTable* prt, PerRegionTable* last) { + while (true) { + PerRegionTable* fl = _free_list; + last->set_next(fl); + PerRegionTable* res = Atomic::cmpxchg(prt, &_free_list, fl); + if (res == fl) { + return; + } + } + ShouldNotReachHere(); + } + + static void free(PerRegionTable* prt) { + bulk_free(prt, prt); + } + + // Returns an initialized PerRegionTable instance. + static PerRegionTable* alloc(HeapRegion* hr); + + PerRegionTable* next() const { return _next; } + void set_next(PerRegionTable* next) { _next = next; } + PerRegionTable* prev() const { return _prev; } + void set_prev(PerRegionTable* prev) { _prev = prev; } + + // Accessor and Modification routines for the pointer for the + // singly linked collision list that links the PRTs within the + // OtherRegionsTable::_fine_grain_regions hash table. + // + // It might be useful to also make the collision list doubly linked + // to avoid iteration over the collisions list during scrubbing/deletion. + // OTOH there might not be many collisions. + + PerRegionTable* collision_list_next() const { + return _collision_list_next; + } + + void set_collision_list_next(PerRegionTable* next) { + _collision_list_next = next; + } + + PerRegionTable** collision_list_next_addr() { + return &_collision_list_next; + } + + static size_t fl_mem_size() { + PerRegionTable* cur = _free_list; + size_t res = 0; + while (cur != NULL) { + res += cur->mem_size(); + cur = cur->next(); + } + return res; + } + + static void test_fl_mem_size(); +}; + class HeapRegionRemSet : public CHeapObj { friend class VMStructs; - friend class HeapRegionRemSetIterator; private: G1BlockOffsetTable* _bot; @@ -182,18 +314,23 @@ // Setup sparse and fine-grain tables sizes. static void setup_remset_size(); - bool cardset_is_empty() const { - return _other_regions.is_empty(); - } - bool is_empty() const { - return (strong_code_roots_list_length() == 0) && cardset_is_empty(); + return (strong_code_roots_list_length() == 0) && _other_regions.is_empty(); } bool occupancy_less_or_equal_than(size_t occ) const { return (strong_code_roots_list_length() == 0) && _other_regions.occupancy_less_or_equal_than(occ); } + // For each PRT in the card (remembered) set call one of the following methods + // of the given closure: + // + // set_full_region_dirty(uint region_idx) - pass the region index for coarse PRTs + // set_bitmap_dirty(uint region_idx, BitMap* bitmap) - pass the region index and bitmap for fine PRTs + // set_cards_dirty(uint region_idx, elem_t* cards, uint num_cards) - pass region index and cards for sparse PRTs + template + inline void iterate_prts(Closure& cl); + size_t occupied() { MutexLocker x(&_m, Mutex::_no_safepoint_check_flag); return occupied_locked(); @@ -339,70 +476,4 @@ #endif }; -class HeapRegionRemSetIterator : public StackObj { -private: - // The region RSet over which we are iterating. - HeapRegionRemSet* _hrrs; - - // Local caching of HRRS fields. - const BitMap* _coarse_map; - - G1BlockOffsetTable* _bot; - G1CollectedHeap* _g1h; - - // The number of cards yielded since initialization. - size_t _n_yielded_fine; - size_t _n_yielded_coarse; - size_t _n_yielded_sparse; - - // Indicates what granularity of table that we are currently iterating over. - // We start iterating over the sparse table, progress to the fine grain - // table, and then finish with the coarse table. - enum IterState { - Sparse, - Fine, - Coarse - }; - IterState _is; - - // For both Coarse and Fine remembered set iteration this contains the - // first card number of the heap region we currently iterate over. - size_t _cur_region_card_offset; - - // Current region index for the Coarse remembered set iteration. - int _coarse_cur_region_index; - size_t _coarse_cur_region_cur_card; - - bool coarse_has_next(size_t& card_index); - - // The PRT we are currently iterating over. - PerRegionTable* _fine_cur_prt; - // Card offset within the current PRT. - size_t _cur_card_in_prt; - - // Update internal variables when switching to the given PRT. - void switch_to_prt(PerRegionTable* prt); - bool fine_has_next(); - bool fine_has_next(size_t& card_index); - - // The Sparse remembered set iterator. - SparsePRTIter _sparse_iter; - -public: - HeapRegionRemSetIterator(HeapRegionRemSet* hrrs); - - // If there remains one or more cards to be yielded, returns true and - // sets "card_index" to one of those cards (which is then considered - // yielded.) Otherwise, returns false (and leaves "card_index" - // undefined.) - bool has_next(size_t& card_index); - - size_t n_yielded_fine() { return _n_yielded_fine; } - size_t n_yielded_coarse() { return _n_yielded_coarse; } - size_t n_yielded_sparse() { return _n_yielded_sparse; } - size_t n_yielded() { - return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse(); - } -}; - #endif // SHARE_GC_G1_HEAPREGIONREMSET_HPP diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP +#define SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP + +#include "gc/g1/heapRegion.inline.hpp" +#include "gc/g1/heapRegionRemSet.hpp" +#include "gc/g1/sparsePRT.hpp" +#include "utilities/bitMap.inline.hpp" + +template +inline void HeapRegionRemSet::iterate_prts(Closure& cl) { + _other_regions.iterate(cl); +} + +inline void PerRegionTable::add_card_work(CardIdx_t from_card, bool par) { + if (!_bm.at(from_card)) { + if (par) { + if (_bm.par_set_bit(from_card)) { + Atomic::inc(&_occupied); + } + } else { + _bm.set_bit(from_card); + _occupied++; + } + } +} + +inline void PerRegionTable::add_reference_work(OopOrNarrowOopStar from, bool par) { + // Must make this robust in case "from" is not in "_hr", because of + // concurrency. + + HeapRegion* loc_hr = hr(); + // If the test below fails, then this table was reused concurrently + // with this operation. This is OK, since the old table was coarsened, + // and adding a bit to the new table is never incorrect. + if (loc_hr->is_in_reserved(from)) { + CardIdx_t from_card = OtherRegionsTable::card_within_region(from, loc_hr); + add_card_work(from_card, par); + } +} + +inline void PerRegionTable::add_card(CardIdx_t from_card_index) { + add_card_work(from_card_index, /*parallel*/ true); +} + +inline void PerRegionTable::seq_add_card(CardIdx_t from_card_index) { + add_card_work(from_card_index, /*parallel*/ false); +} + +inline void PerRegionTable::add_reference(OopOrNarrowOopStar from) { + add_reference_work(from, /*parallel*/ true); +} + +inline void PerRegionTable::seq_add_reference(OopOrNarrowOopStar from) { + add_reference_work(from, /*parallel*/ false); +} + +inline void PerRegionTable::init(HeapRegion* hr, bool clear_links_to_all_list) { + if (clear_links_to_all_list) { + set_next(NULL); + set_prev(NULL); + } + _collision_list_next = NULL; + _occupied = 0; + _bm.clear(); + // Make sure that the bitmap clearing above has been finished before publishing + // this PRT to concurrent threads. + OrderAccess::release_store(&_hr, hr); +} + +template +void OtherRegionsTable::iterate(Closure& cl) { + if (_n_coarse_entries > 0) { + BitMap::idx_t cur = _coarse_map.get_next_one_offset(0); + while (cur != _coarse_map.size()) { + cl.next_coarse_prt((uint)cur); + cur = _coarse_map.get_next_one_offset(cur + 1); + } + } + { + PerRegionTable* cur = _first_all_fine_prts; + while (cur != NULL) { + cl.next_fine_prt(cur->hr()->hrm_index(), cur->bm()); + cur = cur->next(); + } + } + { + SparsePRTBucketIter iter(&_sparse_table); + SparsePRTEntry* cur; + while (iter.has_next(cur)) { + cl.next_sparse_prt(cur->r_ind(), cur->cards(), cur->num_valid_cards()); + } + } +} + +#endif // SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/sparsePRT.cpp --- a/src/hotspot/share/gc/g1/sparsePRT.cpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/sparsePRT.cpp Thu Jun 27 11:48:32 2019 +0200 @@ -275,6 +275,19 @@ return false; } +bool RSHashTableBucketIter::has_next(SparsePRTEntry*& entry) { + while (_bl_ind == RSHashTable::NullEntry) { + if (_tbl_ind == (int)_rsht->capacity() - 1) { + return false; + } + _tbl_ind++; + _bl_ind = _rsht->_buckets[_tbl_ind]; + } + entry = _rsht->entry(_bl_ind); + _bl_ind = entry->next_index(); + return true; +} + bool RSHashTable::contains_card(RegionIdx_t region_index, CardIdx_t card_index) const { SparsePRTEntry* e = get_entry(region_index); return (e != NULL && e->contains_card(card_index)); diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/g1/sparsePRT.hpp --- a/src/hotspot/share/gc/g1/sparsePRT.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/g1/sparsePRT.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -38,10 +38,11 @@ // that might contain pointers into the owner region. class SparsePRTEntry: public CHeapObj { -private: +public: // The type of a card entry. typedef uint16_t card_elem_t; +private: // We need to make sizeof(SparsePRTEntry) an even multiple of maximum member size, // in order to force correct alignment that could otherwise cause SIGBUS errors // when reading the member variables. This calculates the minimum number of card @@ -96,6 +97,8 @@ // Copy the current entry's cards into the "_card" array of "e." inline void copy_cards(SparsePRTEntry* e) const; + card_elem_t* cards() { return _cards; } + inline CardIdx_t card(int i) const { assert(i >= 0, "must be nonnegative"); assert(i < cards_num(), "range checking"); @@ -106,7 +109,7 @@ class RSHashTable : public CHeapObj { friend class RSHashTableIter; - + friend class RSHashTableBucketIter; // Inverse maximum hash table occupancy used. static float TableOccupancyFactor; @@ -209,12 +212,29 @@ bool has_next(size_t& card_index); }; +// This is embedded in HRRS iterator. +class RSHashTableBucketIter { + int _tbl_ind; // [-1, 0.._rsht->_capacity) + int _bl_ind; // [-1, 0.._rsht->_capacity) + + RSHashTable* _rsht; + +public: + RSHashTableBucketIter(RSHashTable* rsht) : + _tbl_ind(0), + _bl_ind(rsht->_buckets[_tbl_ind]), + _rsht(rsht) { } + + bool has_next(SparsePRTEntry*& entry); +}; + // Concurrent access to a SparsePRT must be serialized by some external mutex. class SparsePRTIter; class SparsePRT { friend class SparsePRTIter; + friend class SparsePRTBucketIter; RSHashTable* _table; @@ -262,4 +282,14 @@ } }; +class SparsePRTBucketIter: public RSHashTableBucketIter { +public: + SparsePRTBucketIter(const SparsePRT* sprt) : + RSHashTableBucketIter(sprt->_table) {} + + bool has_next(SparsePRTEntry*& entry) { + return RSHashTableBucketIter::has_next(entry); + } +}; + #endif // SHARE_GC_G1_SPARSEPRT_HPP diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/shared/cardTable.hpp --- a/src/hotspot/share/gc/shared/cardTable.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/shared/cardTable.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -103,15 +103,11 @@ enum CardValues { clean_card = (CardValue)-1, - // The mask contains zeros in places for all other values. - clean_card_mask = clean_card - 31, dirty_card = 0, precleaned_card = 1, - claimed_card = 2, - deferred_card = 4, - last_card = 8, - CT_MR_BS_last_reserved = 16 + last_card = 2, + CT_MR_BS_last_reserved = 4 }; // a word's worth (row) of clean card values @@ -242,11 +238,8 @@ }; static CardValue clean_card_val() { return clean_card; } - static CardValue clean_card_mask_val() { return clean_card_mask; } static CardValue dirty_card_val() { return dirty_card; } - static CardValue claimed_card_val() { return claimed_card; } static CardValue precleaned_card_val() { return precleaned_card; } - static CardValue deferred_card_val() { return deferred_card; } static intptr_t clean_card_row_val() { return clean_card_row; } // Card marking array base (adjusted for heap low boundary) diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/shared/workerDataArray.hpp --- a/src/hotspot/share/gc/shared/workerDataArray.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/shared/workerDataArray.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -34,7 +34,7 @@ class WorkerDataArray : public CHeapObj { friend class WDAPrinter; public: - static const uint MaxThreadWorkItems = 5; + static const uint MaxThreadWorkItems = 6; private: T* _data; uint _length; diff -r d58442b8abc1 -r 3e31a8beaae4 src/hotspot/share/gc/shared/workerDataArray.inline.hpp --- a/src/hotspot/share/gc/shared/workerDataArray.inline.hpp Thu Jun 27 03:33:44 2019 +0200 +++ b/src/hotspot/share/gc/shared/workerDataArray.inline.hpp Thu Jun 27 11:48:32 2019 +0200 @@ -101,7 +101,7 @@ template void WorkerDataArray::add(uint worker_i, T value) { assert(worker_i < _length, "Worker %d is greater than max: %d", worker_i, _length); - assert(_data[worker_i] != uninitialized(), "No data to add to for worker %d", worker_i); + assert(_data[worker_i] != uninitialized(), "No data to add to %s for worker %d", _title, worker_i); _data[worker_i] += value; } diff -r d58442b8abc1 -r 3e31a8beaae4 test/hotspot/jtreg/gc/g1/TestGCLogMessages.java --- a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java Thu Jun 27 03:33:44 2019 +0200 +++ b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java Thu Jun 27 11:48:32 2019 +0200 @@ -95,21 +95,28 @@ new LogMessageWithLevel("Post Evacuate Collection Set", Level.INFO), new LogMessageWithLevel("Other", Level.INFO), - // Update RS - new LogMessageWithLevel("Update RS", Level.DEBUG), + // Merge Heap Roots + new LogMessageWithLevel("Merge Heap Roots", Level.INFO), + new LogMessageWithLevel("Remembered Sets", Level.DEBUG), + new LogMessageWithLevel("Merged Sparse", Level.DEBUG), + new LogMessageWithLevel("Merged Fine", Level.DEBUG), + new LogMessageWithLevel("Merged Coarse", Level.DEBUG), + new LogMessageWithLevel("Hot Card Cache", Level.DEBUG), + new LogMessageWithLevel("Log Buffers", Level.DEBUG), new LogMessageWithLevel("Processed Buffers", Level.DEBUG), - new LogMessageWithLevel("Scanned Cards", Level.DEBUG), + new LogMessageWithLevel("Dirty Cards", Level.DEBUG), new LogMessageWithLevel("Skipped Cards", Level.DEBUG), - new LogMessageWithLevel("Scan HCC", Level.DEBUG), - // Scan RS - new LogMessageWithLevel("Scan RS", Level.DEBUG), + // Scan Heap Roots + new LogMessageWithLevel("Scan Heap Roots", Level.DEBUG), new LogMessageWithLevel("Scanned Cards", Level.DEBUG), - new LogMessageWithLevel("Claimed Cards", Level.DEBUG), - new LogMessageWithLevel("Skipped Cards", Level.DEBUG), + new LogMessageWithLevel("Scanned Blocks", Level.DEBUG), + new LogMessageWithLevel("Claimed Chunks", Level.DEBUG), + // Code Roots Scan + new LogMessageWithLevel("Code Root Scan", Level.DEBUG), // Object Copy new LogMessageWithLevel("Object Copy", Level.DEBUG), - new LogMessageWithLevel("Scanned Cards", Level.DEBUG), - new LogMessageWithLevel("Claimed Cards", Level.DEBUG), + new LogMessageWithLevel("LAB Waste", Level.DEBUG), + new LogMessageWithLevel("LAB Undo Waste", Level.DEBUG), // Ext Root Scan new LogMessageWithLevel("Thread Roots", Level.TRACE), new LogMessageWithLevel("Universe Roots", Level.TRACE), @@ -133,6 +140,7 @@ new LogMessageWithLevel("Table Fixup", Level.DEBUG), new LogMessageWithLevel("Expand Heap After Collection", Level.DEBUG), new LogMessageWithLevel("Region Register", Level.DEBUG), + new LogMessageWithLevel("Prepare Heap Roots", Level.DEBUG), // Free CSet new LogMessageWithLevel("Free Collection Set", Level.DEBUG), new LogMessageWithLevel("Free Collection Set Serial", Level.TRACE), diff -r d58442b8abc1 -r 3e31a8beaae4 test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java --- a/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java Thu Jun 27 03:33:44 2019 +0200 +++ b/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java Thu Jun 27 11:48:32 2019 +0200 @@ -100,20 +100,30 @@ "CMRefRoots", "WaitForStrongCLD", "WeakCLDRoots", - "UpdateRS", - "ScanHCC", - "ScanRS", + "MergeHCC", + "MergeRS", + "MergeLB", + "ScanHR", "CodeRoots", "ObjCopy", "Termination", "StringDedupQueueFixup", "StringDedupTableFixup", "RedirtyCards", - // "PreserveCMReferents", "NonYoungFreeCSet", "YoungFreeCSet" ); + // Some GC phases may or may not occur depending on environment. Filter them out + // since we can not reliably guarantee that they occur (or not). + Set optPhases = of( + "OptScanHR", + "OptMergeRS", + "OptCodeRoots", + "OptObjCopy" + ); + usedPhases.removeAll(optPhases); + assertTrue(usedPhases.equals(allPhases), "Compare events expected and received" + ", Not found phases: " + allPhases.stream().filter(p -> !usedPhases.contains(p)).collect(joining(", ")) + ", Not expected phases: " + usedPhases.stream().filter(p -> !allPhases.contains(p)).collect(joining(", ")));