# HG changeset patch # User johnc # Date 1321562415 28800 # Node ID 9bb1ddd8da51578a3865ff0b8918a10e7e2c786a # Parent 7fde26aecbe545bb0bf3aa6ed9b66619b3a31eed 7112743: G1: Reduce overhead of marking closure during evacuation pauses Summary: Parallelize the serial code that was used to mark objects reachable from survivor objects in the collection set. Some minor improvments in the timers used to track the freeing of the collection set along with some tweaks to PrintGCDetails. Reviewed-by: tonyp, brutisso diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Nov 17 12:40:15 2011 -0800 @@ -2906,8 +2906,10 @@ } } -class CSMarkOopClosure: public OopClosure { - friend class CSMarkBitMapClosure; +// Closures used by ConcurrentMark::complete_marking_in_collection_set(). + +class CSetMarkOopClosure: public OopClosure { + friend class CSetMarkBitMapClosure; G1CollectedHeap* _g1h; CMBitMap* _bm; @@ -2917,6 +2919,7 @@ int _ms_size; int _ms_ind; int _array_increment; + int _worker_i; bool push(oop obj, int arr_ind = 0) { if (_ms_ind == _ms_size) { @@ -2957,7 +2960,6 @@ for (int j = arr_ind; j < lim; j++) { do_oop(aobj->objArrayOopDesc::obj_at_addr(j)); } - } else { obj->oop_iterate(this); } @@ -2967,17 +2969,17 @@ } public: - CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : + CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) : _g1h(G1CollectedHeap::heap()), _cm(cm), _bm(cm->nextMarkBitMap()), _ms_size(ms_size), _ms_ind(0), _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), - _array_increment(MAX2(ms_size/8, 16)) - {} - - ~CSMarkOopClosure() { + _array_increment(MAX2(ms_size/8, 16)), + _worker_i(worker_i) { } + + ~CSetMarkOopClosure() { FREE_C_HEAP_ARRAY(oop, _ms); FREE_C_HEAP_ARRAY(jint, _array_ind_stack); } @@ -3000,10 +3002,11 @@ if (hr != NULL) { if (hr->in_collection_set()) { if (_g1h->is_obj_ill(obj)) { - _bm->mark((HeapWord*)obj); - if (!push(obj)) { - gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); - set_abort(); + if (_bm->parMark((HeapWord*)obj)) { + if (!push(obj)) { + gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); + set_abort(); + } } } } else { @@ -3014,19 +3017,19 @@ } }; -class CSMarkBitMapClosure: public BitMapClosure { - G1CollectedHeap* _g1h; - CMBitMap* _bitMap; - ConcurrentMark* _cm; - CSMarkOopClosure _oop_cl; +class CSetMarkBitMapClosure: public BitMapClosure { + G1CollectedHeap* _g1h; + CMBitMap* _bitMap; + ConcurrentMark* _cm; + CSetMarkOopClosure _oop_cl; + int _worker_i; + public: - CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : + CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) : _g1h(G1CollectedHeap::heap()), _bitMap(cm->nextMarkBitMap()), - _oop_cl(cm, ms_size) - {} - - ~CSMarkBitMapClosure() {} + _oop_cl(cm, ms_size, worker_i), + _worker_i(worker_i) { } bool do_bit(size_t offset) { // convert offset into a HeapWord* @@ -3048,53 +3051,69 @@ } }; - -class CompleteMarkingInCSHRClosure: public HeapRegionClosure { - CMBitMap* _bm; - CSMarkBitMapClosure _bit_cl; +class CompleteMarkingInCSetHRClosure: public HeapRegionClosure { + CMBitMap* _bm; + CSetMarkBitMapClosure _bit_cl; + int _worker_i; + enum SomePrivateConstants { MSSize = 1000 }; - bool _completed; + public: - CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : + CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) : _bm(cm->nextMarkBitMap()), - _bit_cl(cm, MSSize), - _completed(true) - {} - - ~CompleteMarkingInCSHRClosure() {} - - bool doHeapRegion(HeapRegion* r) { - if (!r->evacuation_failed()) { - MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); - if (!mr.is_empty()) { - if (!_bm->iterate(&_bit_cl, mr)) { - _completed = false; - return true; + _bit_cl(cm, MSSize, worker_i), + _worker_i(worker_i) { } + + bool doHeapRegion(HeapRegion* hr) { + if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) { + // The current worker has successfully claimed the region. + if (!hr->evacuation_failed()) { + MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start()); + if (!mr.is_empty()) { + bool done = false; + while (!done) { + done = _bm->iterate(&_bit_cl, mr); + } } } } return false; } - - bool completed() { return _completed; } }; -class ClearMarksInHRClosure: public HeapRegionClosure { - CMBitMap* _bm; +class SetClaimValuesInCSetHRClosure: public HeapRegionClosure { + jint _claim_value; + public: - ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } - - bool doHeapRegion(HeapRegion* r) { - if (!r->used_region().is_empty() && !r->evacuation_failed()) { - MemRegion usedMR = r->used_region(); - _bm->clearRange(r->used_region()); - } + SetClaimValuesInCSetHRClosure(jint claim_value) : + _claim_value(claim_value) { } + + bool doHeapRegion(HeapRegion* hr) { + hr->set_claim_value(_claim_value); return false; } }; +class G1ParCompleteMarkInCSetTask: public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + +public: + G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h, + ConcurrentMark* cm) : + AbstractGangTask("Complete Mark in CSet"), + _g1h(g1h), _cm(cm) { } + + void work(int worker_i) { + CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i); + HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i); + _g1h->collection_set_iterate_from(hr, &cmplt); + } +}; + void ConcurrentMark::complete_marking_in_collection_set() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); @@ -3103,17 +3122,28 @@ return; } - int i = 1; double start = os::elapsedTime(); - while (true) { - i++; - CompleteMarkingInCSHRClosure cmplt(this); - g1h->collection_set_iterate(&cmplt); - if (cmplt.completed()) break; + int n_workers = g1h->workers()->total_workers(); + + G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this); + + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); + + if (G1CollectedHeap::use_parallel_gc_threads()) { + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&complete_mark_task); + g1h->set_par_threads(0); + } else { + complete_mark_task.work(0); } - ClearMarksInHRClosure clr(nextMarkBitMap()); - g1h->collection_set_iterate(&clr); + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity"); + + // Now reset the claim values in the regions in the collection set. + SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue); + g1h->collection_set_iterate(&set_cv_cl); + + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); double end_time = os::elapsedTime(); double elapsed_time_ms = (end_time - start) * 1000.0; diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Nov 17 12:40:15 2011 -0800 @@ -360,7 +360,7 @@ friend class ConcurrentMarkThread; friend class CMTask; friend class CMBitMapClosure; - friend class CSMarkOopClosure; + friend class CSetMarkOopClosure; friend class CMGlobalObjectClosure; friend class CMRemarkTask; friend class CMConcurrentMarkingTask; diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Nov 17 12:40:15 2011 -0800 @@ -2617,10 +2617,10 @@ _claim_value(claim_value), _failures(0), _sh_region(NULL) { } bool doHeapRegion(HeapRegion* r) { if (r->claim_value() != _claim_value) { - gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " + gclog_or_tty->print_cr("Region " HR_FORMAT ", " "claim value = %d, should be %d", - r->bottom(), r->end(), r->claim_value(), - _claim_value); + HR_FORMAT_PARAMS(r), + r->claim_value(), _claim_value); ++_failures; } if (!r->isHumongous()) { @@ -2629,9 +2629,9 @@ _sh_region = r; } else if (r->continuesHumongous()) { if (r->humongous_start_region() != _sh_region) { - gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " + gclog_or_tty->print_cr("Region " HR_FORMAT ", " "HS = "PTR_FORMAT", should be "PTR_FORMAT, - r->bottom(), r->end(), + HR_FORMAT_PARAMS(r), r->humongous_start_region(), _sh_region); ++_failures; @@ -2649,8 +2649,63 @@ heap_region_iterate(&cl); return cl.failures() == 0; } + +class CheckClaimValuesInCSetHRClosure: public HeapRegionClosure { + jint _claim_value; + size_t _failures; + +public: + CheckClaimValuesInCSetHRClosure(jint claim_value) : + _claim_value(claim_value), + _failures(0) { } + + size_t failures() { + return _failures; + } + + bool doHeapRegion(HeapRegion* hr) { + assert(hr->in_collection_set(), "how?"); + assert(!hr->isHumongous(), "H-region in CSet"); + if (hr->claim_value() != _claim_value) { + gclog_or_tty->print_cr("CSet Region " HR_FORMAT ", " + "claim value = %d, should be %d", + HR_FORMAT_PARAMS(hr), + hr->claim_value(), _claim_value); + _failures += 1; + } + return false; + } +}; + +bool G1CollectedHeap::check_cset_heap_region_claim_values(jint claim_value) { + CheckClaimValuesInCSetHRClosure cl(claim_value); + collection_set_iterate(&cl); + return cl.failures() == 0; +} #endif // ASSERT +// We want the parallel threads to start their collection +// set iteration at different collection set regions to +// avoid contention. +// If we have: +// n collection set regions +// p threads +// Then thread t will start at region t * floor (n/p) + +HeapRegion* G1CollectedHeap::start_cset_region_for_worker(int worker_i) { + HeapRegion* result = g1_policy()->collection_set(); + if (G1CollectedHeap::use_parallel_gc_threads()) { + size_t cs_size = g1_policy()->cset_region_length(); + int n_workers = workers()->total_workers(); + size_t cs_spans = cs_size / n_workers; + size_t ind = cs_spans * worker_i; + for (size_t i = 0; i < ind; i++) { + result = result->next_in_collection_set(); + } + } + return result; +} + void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) { HeapRegion* r = g1_policy()->collection_set(); while (r != NULL) { @@ -5393,8 +5448,11 @@ finalize_for_evac_failure(); - // Must do this before removing self-forwarding pointers, which clears - // the per-region evac-failure flags. + // Must do this before clearing the per-region evac-failure flags + // (which is currently done when we free the collection set). + // We also only do this if marking is actually in progress and so + // have to do this before we set the mark_in_progress flag at the + // end of an initial mark pause. concurrent_mark()->complete_marking_in_collection_set(); if (evacuation_failed()) { @@ -5656,7 +5714,6 @@ while (cur != NULL) { assert(!is_on_master_free_list(cur), "sanity"); - if (non_young) { if (cur->is_young()) { double end_sec = os::elapsedTime(); @@ -5667,12 +5724,14 @@ non_young = false; } } else { - double end_sec = os::elapsedTime(); - double elapsed_ms = (end_sec - start_sec) * 1000.0; - young_time_ms += elapsed_ms; - - start_sec = os::elapsedTime(); - non_young = true; + if (!cur->is_young()) { + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + young_time_ms += elapsed_ms; + + start_sec = os::elapsedTime(); + non_young = true; + } } rs_lengths += cur->rem_set()->occupied(); @@ -5704,8 +5763,20 @@ "invariant" ); if (!cur->evacuation_failed()) { + MemRegion used_mr = cur->used_region(); + // And the region is empty. - assert(!cur->is_empty(), "Should not have empty regions in a CS."); + assert(!used_mr.is_empty(), "Should not have empty regions in a CS."); + + // If marking is in progress then clear any objects marked in + // the current region. Note mark_in_progress() returns false, + // even during an initial mark pause, until the set_marking_started() + // call which takes place later in the pause. + if (mark_in_progress()) { + assert(!g1_policy()->during_initial_mark_pause(), "sanity"); + _cm->nextMarkBitMap()->clearRange(used_mr); + } + free_region(cur, &pre_used, &local_free_list, false /* par */); } else { cur->uninstall_surv_rate_group(); @@ -5725,10 +5796,12 @@ double end_sec = os::elapsedTime(); double elapsed_ms = (end_sec - start_sec) * 1000.0; - if (non_young) + + if (non_young) { non_young_time_ms += elapsed_ms; - else + } else { young_time_ms += elapsed_ms; + } update_sets_after_freeing_regions(pre_used, &local_free_list, NULL /* old_proxy_set */, diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Nov 17 12:40:15 2011 -0800 @@ -1294,8 +1294,17 @@ #ifdef ASSERT bool check_heap_region_claim_values(jint claim_value); + + // Same as the routine above but only checks regions in the + // current collection set. + bool check_cset_heap_region_claim_values(jint claim_value); #endif // ASSERT + // Given the id of a worker, calculate a suitable + // starting region for iterating over the current + // collection set. + HeapRegion* start_cset_region_for_worker(int worker_i); + // Iterate over the regions (if any) in the current collection set. void collection_set_iterate(HeapRegionClosure* blk); diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Nov 17 12:40:15 2011 -0800 @@ -141,6 +141,7 @@ _summary(new Summary()), _cur_clear_ct_time_ms(0.0), + _mark_closure_time_ms(0.0), _cur_ref_proc_time_ms(0.0), _cur_ref_enq_time_ms(0.0), @@ -959,10 +960,6 @@ assert( verify_young_ages(), "region age verification" ); } -void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) { - _mark_closure_time_ms = mark_closure_time_ms; -} - void G1CollectorPolicy::record_concurrent_mark_init_end(double mark_init_elapsed_time_ms) { _during_marking = true; @@ -1251,6 +1248,11 @@ // current value of "other time" other_time_ms -= _cur_clear_ct_time_ms; + // Subtract the time spent completing marking in the collection + // set. Note if marking is not in progress during the pause + // the value of _mark_closure_time_ms will be zero. + other_time_ms -= _mark_closure_time_ms; + // TraceGen0Time and TraceGen1Time summary info updating. _all_pause_times_ms->add(elapsed_ms); @@ -1407,6 +1409,9 @@ print_stats(1, "Scan RS", scan_rs_time); print_stats(1, "Object Copying", obj_copy_time); } + if (print_marking_info) { + print_stats(1, "Complete CSet Marking", _mark_closure_time_ms); + } print_stats(1, "Clear CT", _cur_clear_ct_time_ms); #ifndef PRODUCT print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms); @@ -1418,9 +1423,14 @@ } #endif print_stats(1, "Other", other_time_ms); - print_stats(2, "Choose CSet", _recorded_young_cset_choice_time_ms); + print_stats(2, "Choose CSet", + (_recorded_young_cset_choice_time_ms + + _recorded_non_young_cset_choice_time_ms)); print_stats(2, "Ref Proc", _cur_ref_proc_time_ms); print_stats(2, "Ref Enq", _cur_ref_enq_time_ms); + print_stats(2, "Free CSet", + (_recorded_young_free_cset_time_ms + + _recorded_non_young_free_cset_time_ms)); for (int i = 0; i < _aux_num; ++i) { if (_cur_aux_times_set[i]) { diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Nov 17 12:40:15 2011 -0800 @@ -737,7 +737,9 @@ void record_concurrent_mark_init_end(double mark_init_elapsed_time_ms); - void record_mark_closure_time(double mark_closure_time_ms); + void record_mark_closure_time(double mark_closure_time_ms) { + _mark_closure_time_ms = mark_closure_time_ms; + } void record_concurrent_mark_remark_start(); void record_concurrent_mark_remark_end(); diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Nov 17 12:40:15 2011 -0800 @@ -209,29 +209,9 @@ size_t cards_looked_up() { return _cards;} }; -// We want the parallel threads to start their scanning at -// different collection set regions to avoid contention. -// If we have: -// n collection set regions -// p threads -// Then thread t will start at region t * floor (n/p) - -HeapRegion* G1RemSet::calculateStartRegion(int worker_i) { - HeapRegion* result = _g1p->collection_set(); - if (G1CollectedHeap::use_parallel_gc_threads()) { - size_t cs_size = _g1p->cset_region_length(); - int n_workers = _g1->workers()->total_workers(); - size_t cs_spans = cs_size / n_workers; - size_t ind = cs_spans * worker_i; - for (size_t i = 0; i < ind; i++) - result = result->next_in_collection_set(); - } - return result; -} - void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { double rs_time_start = os::elapsedTime(); - HeapRegion *startRegion = calculateStartRegion(worker_i); + HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i); ScanRSClosure scanRScl(oc, worker_i); diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Nov 17 12:40:15 2011 -0800 @@ -104,8 +104,6 @@ void scanRS(OopsInHeapRegionClosure* oc, int worker_i); void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i); - HeapRegion* calculateStartRegion(int i); - CardTableModRefBS* ct_bs() { return _ct_bs; } size_t cardsScanned() { return _total_cards_scanned; } diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp Thu Nov 17 12:40:15 2011 -0800 @@ -367,12 +367,13 @@ static void setup_heap_region_size(uintx min_heap_size); enum ClaimValues { - InitialClaimValue = 0, - FinalCountClaimValue = 1, - NoteEndClaimValue = 2, - ScrubRemSetClaimValue = 3, - ParVerifyClaimValue = 4, - RebuildRSClaimValue = 5 + InitialClaimValue = 0, + FinalCountClaimValue = 1, + NoteEndClaimValue = 2, + ScrubRemSetClaimValue = 3, + ParVerifyClaimValue = 4, + RebuildRSClaimValue = 5, + CompleteMarkCSetClaimValue = 6 }; inline HeapWord* par_allocate_no_bot_updates(size_t word_size) { diff -r 7fde26aecbe5 -r 9bb1ddd8da51 hotspot/src/share/vm/oops/objArrayOop.hpp --- a/hotspot/src/share/vm/oops/objArrayOop.hpp Mon Nov 28 09:49:05 2011 -0800 +++ b/hotspot/src/share/vm/oops/objArrayOop.hpp Thu Nov 17 12:40:15 2011 -0800 @@ -34,7 +34,7 @@ friend class objArrayKlass; friend class Runtime1; friend class psPromotionManager; - friend class CSMarkOopClosure; + friend class CSetMarkOopClosure; friend class G1ParScanPartialArrayClosure; template T* obj_at_addr(int index) const {