6923991: G1: improve scalability of RSet scanning
Summary: Implemented block-based work stealing. Moved copying during the rset scanning phase to the main copying phase. Made the size of rset table depend on the region size.
Reviewed-by: apetrusenko, tonyp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Feb 11 15:52:19 2010 -0800
@@ -2646,6 +2646,13 @@
// </NEW PREDICTION>
+struct PrepareForRSScanningClosure : public HeapRegionClosure {
+ bool doHeapRegion(HeapRegion *r) {
+ r->rem_set()->set_iter_claimed(0);
+ return false;
+ }
+};
+
void
G1CollectedHeap::do_collection_pause_at_safepoint() {
if (PrintHeapAtGC) {
@@ -2784,6 +2791,8 @@
gclog_or_tty->print_cr("\nAfter pause, heap:");
print();
#endif
+ PrepareForRSScanningClosure prepare_for_rs_scan;
+ collection_set_iterate(&prepare_for_rs_scan);
setup_surviving_young_words();
@@ -3781,22 +3790,16 @@
return obj;
}
-template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee, bool skip_cset_test>
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
template <class T>
-void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee, skip_cset_test>
+void G1ParCopyClosure <do_gen_barrier, barrier, do_mark_forwardee>
::do_oop_work(T* p) {
oop obj = oopDesc::load_decode_heap_oop(p);
assert(barrier != G1BarrierRS || obj != NULL,
"Precondition: G1BarrierRS implies obj is nonNull");
- // The only time we skip the cset test is when we're scanning
- // references popped from the queue. And we only push on the queue
- // references that we know point into the cset, so no point in
- // checking again. But we'll leave an assert here for peace of mind.
- assert(!skip_cset_test || _g1->obj_in_cs(obj), "invariant");
-
// here the null check is implicit in the cset_fast_test() test
- if (skip_cset_test || _g1->in_cset_fast_test(obj)) {
+ if (_g1->in_cset_fast_test(obj)) {
#if G1_REM_SET_LOGGING
gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" "
"into CS.", p, (void*) obj);
@@ -3813,7 +3816,6 @@
}
}
- // When scanning moved objs, must look at all oops.
if (barrier == G1BarrierEvac && obj != NULL) {
_par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
}
@@ -3823,8 +3825,8 @@
}
}
-template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(oop* p);
-template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(narrowOop* p);
+template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
+template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(narrowOop* p);
template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
assert(has_partial_array_mask(p), "invariant");
@@ -3896,11 +3898,11 @@
assert(UseCompressedOops, "Error");
narrowOop* p = (narrowOop*) stolen_task;
assert(has_partial_array_mask(p) ||
- _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "Error");
+ _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "Error");
pss->push_on_queue(p);
} else {
oop* p = (oop*) stolen_task;
- assert(has_partial_array_mask(p) || _g1h->obj_in_cs(*p), "Error");
+ assert(has_partial_array_mask(p) || _g1h->is_in_g1_reserved(*p), "Error");
pss->push_on_queue(p);
}
continue;
@@ -3962,6 +3964,7 @@
G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss);
G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss);
G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss);
+ G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss);
G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss);
G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss);
@@ -3985,7 +3988,7 @@
_g1h->g1_process_strong_roots(/* not collecting perm */ false,
SharedHeap::SO_AllClasses,
scan_root_cl,
- &only_scan_heap_rs_cl,
+ &push_heap_rs_cl,
scan_so_cl,
scan_perm_cl,
i);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -1623,7 +1623,7 @@
template <class T> void push_on_queue(T* ref) {
assert(ref != NULL, "invariant");
assert(has_partial_array_mask(ref) ||
- _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(ref)), "invariant");
+ _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(ref)), "invariant");
#ifdef ASSERT
if (has_partial_array_mask(ref)) {
oop p = clear_partial_array_mask(ref);
@@ -1644,9 +1644,9 @@
assert((oop*)ref != NULL, "pop_local() returned true");
assert(UseCompressedOops || !ref.is_narrow(), "Error");
assert(has_partial_array_mask((oop*)ref) ||
- _g1h->obj_in_cs(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
- : oopDesc::load_decode_heap_oop((oop*)ref)),
- "invariant");
+ _g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
+ : oopDesc::load_decode_heap_oop((oop*)ref)),
+ "invariant");
IF_G1_DETAILED_STATS(note_pop());
} else {
StarTask null_task;
@@ -1659,9 +1659,9 @@
assert((oop*)new_ref != NULL, "pop() from a local non-empty stack");
assert(UseCompressedOops || !new_ref.is_narrow(), "Error");
assert(has_partial_array_mask((oop*)new_ref) ||
- _g1h->obj_in_cs(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref)
- : oopDesc::load_decode_heap_oop((oop*)new_ref)),
- "invariant");
+ _g1h->is_in_g1_reserved(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref)
+ : oopDesc::load_decode_heap_oop((oop*)new_ref)),
+ "invariant");
ref = new_ref;
}
@@ -1825,12 +1825,12 @@
assert(UseCompressedOops, "Error");
narrowOop* p = (narrowOop*)ref_to_scan;
assert(!has_partial_array_mask(p) &&
- _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+ _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
} else {
oop* p = (oop*)ref_to_scan;
- assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
- _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+ assert((has_partial_array_mask(p) && _g1h->is_in_g1_reserved(clear_partial_array_mask(p))) ||
+ _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
}
}
@@ -1844,12 +1844,12 @@
assert(UseCompressedOops, "Error");
narrowOop* p = (narrowOop*)ref_to_scan;
assert(!has_partial_array_mask(p) &&
- _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+ _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
} else {
oop* p = (oop*)ref_to_scan;
assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
- _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+ _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity");
deal_with_reference(p);
}
}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Feb 11 15:52:19 2010 -0800
@@ -205,6 +205,7 @@
// policy is created before the heap, we have to set this up here,
// so it's done as soon as possible.
HeapRegion::setup_heap_region_size(Arguments::min_heap_size());
+ HeapRegionRemSet::setup_remset_size();
_recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
_prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -53,6 +53,15 @@
bool apply_to_weak_ref_discovered_field() { return true; }
};
+class G1ParPushHeapRSClosure : public G1ParClosureSuper {
+public:
+ G1ParPushHeapRSClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+ G1ParClosureSuper(g1, par_scan_state) { }
+ template <class T> void do_oop_nv(T* p);
+ virtual void do_oop(oop* p) { do_oop_nv(p); }
+ virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
class G1ParScanClosure : public G1ParClosureSuper {
public:
G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
@@ -100,7 +109,7 @@
};
template<bool do_gen_barrier, G1Barrier barrier,
- bool do_mark_forwardee, bool skip_cset_test>
+ bool do_mark_forwardee>
class G1ParCopyClosure : public G1ParCopyHelper {
G1ParScanClosure _scanner;
template <class T> void do_oop_work(T* p);
@@ -116,12 +125,13 @@
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
-typedef G1ParCopyClosure<false, G1BarrierNone, false, false> G1ParScanExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, false, false> G1ParScanPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, false, false> G1ParScanHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierNone, true, false> G1ParScanAndMarkExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, true, false> G1ParScanAndMarkPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, true, false> G1ParScanAndMarkHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
+
// This is the only case when we set skip_cset_test. Basically, this
// closure is (should?) only be called directly while we're draining
// the overflow and task queues. In that case we know that the
@@ -132,7 +142,7 @@
// We need a separate closure to handle references during evacuation
// failure processing, as we cannot asume that the reference already
// points into the collection set (like G1ParScanHeapEvacClosure does).
-typedef G1ParCopyClosure<false, G1BarrierEvac, false, false> G1ParScanHeapEvacFailureClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacFailureClosure;
class FilterIntoCSClosure: public OopClosure {
G1CollectedHeap* _g1;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -104,3 +104,16 @@
}
}
}
+
+template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+ T heap_oop = oopDesc::load_heap_oop(p);
+
+ if (!oopDesc::is_null(heap_oop)) {
+ oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+ if (_g1->in_cset_fast_test(obj)) {
+ Prefetch::write(obj->mark_addr(), 0);
+ Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+ _par_scan_state->push_on_queue(p);
+ }
+ }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Feb 11 15:52:19 2010 -0800
@@ -155,8 +155,8 @@
G1BlockOffsetSharedArray* _bot_shared;
CardTableModRefBS *_ct_bs;
int _worker_i;
+ int _block_size;
bool _try_claimed;
- size_t _min_skip_distance, _max_skip_distance;
public:
ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
_oc(oc),
@@ -168,8 +168,7 @@
_g1h = G1CollectedHeap::heap();
_bot_shared = _g1h->bot_shared();
_ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
- _min_skip_distance = 16;
- _max_skip_distance = 2 * _g1h->n_par_threads() * _min_skip_distance;
+ _block_size = MAX2<int>(G1RSetScanBlockSize, 1);
}
void set_try_claimed() { _try_claimed = true; }
@@ -225,12 +224,15 @@
HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
hrrs->init_iterator(iter);
size_t card_index;
- size_t skip_distance = 0, current_card = 0, jump_to_card = 0;
- while (iter->has_next(card_index)) {
- if (current_card < jump_to_card) {
- ++current_card;
- continue;
+
+ // We claim cards in block so as to recude the contention. The block size is determined by
+ // the G1RSetScanBlockSize parameter.
+ size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
+ for (size_t current_card = 0; iter->has_next(card_index); current_card++) {
+ if (current_card >= jump_to_card + _block_size) {
+ jump_to_card = hrrs->iter_claimed_next(_block_size);
}
+ if (current_card < jump_to_card) continue;
HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
#if 0
gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
@@ -247,22 +249,14 @@
// If the card is dirty, then we will scan it during updateRS.
if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
- if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) {
- scanCard(card_index, card_region);
- } else if (_try_claimed) {
- if (jump_to_card == 0 || jump_to_card != current_card) {
- // We did some useful work in the previous iteration.
- // Decrease the distance.
- skip_distance = MAX2(skip_distance >> 1, _min_skip_distance);
- } else {
- // Previous iteration resulted in a claim failure.
- // Increase the distance.
- skip_distance = MIN2(skip_distance << 1, _max_skip_distance);
- }
- jump_to_card = current_card + skip_distance;
- }
+ // We make the card as "claimed" lazily (so races are possible but they're benign),
+ // which reduces the number of duplicate scans (the rsets of the regions in the cset
+ // can intersect).
+ if (!_ct_bs->is_card_claimed(card_index)) {
+ _ct_bs->set_card_claimed(card_index);
+ scanCard(card_index, card_region);
+ }
}
- ++current_card;
}
if (!_try_claimed) {
hrrs->set_iter_complete();
@@ -299,30 +293,18 @@
double rs_time_start = os::elapsedTime();
HeapRegion *startRegion = calculateStartRegion(worker_i);
- BufferingOopsInHeapRegionClosure boc(oc);
- ScanRSClosure scanRScl(&boc, worker_i);
+ ScanRSClosure scanRScl(oc, worker_i);
_g1->collection_set_iterate_from(startRegion, &scanRScl);
scanRScl.set_try_claimed();
_g1->collection_set_iterate_from(startRegion, &scanRScl);
- boc.done();
- double closure_app_time_sec = boc.closure_app_seconds();
- double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
- closure_app_time_sec;
- double closure_app_time_ms = closure_app_time_sec * 1000.0;
+ double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
assert( _cards_scanned != NULL, "invariant" );
_cards_scanned[worker_i] = scanRScl.cards_done();
_g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
_g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
-
- double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
- if (scan_new_refs_time_ms > 0.0) {
- closure_app_time_ms += scan_new_refs_time_ms;
- }
-
- _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
}
void HRInto_G1RemSet::updateRS(int worker_i) {
@@ -449,9 +431,8 @@
oc->do_oop(p);
}
}
- _g1p->record_scan_new_refs_time(worker_i,
- (os::elapsedTime() - scan_new_refs_start_sec)
- * 1000.0);
+ double scan_new_refs_time_ms = (os::elapsedTime() - scan_new_refs_start_sec) * 1000.0;
+ _g1p->record_scan_new_refs_time(worker_i, scan_new_refs_time_ms);
}
void HRInto_G1RemSet::cleanupHRRS() {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -207,8 +207,20 @@
develop(bool, G1PrintOopAppls, false, \
"When true, print applications of closures to external locs.") \
\
- develop(intx, G1LogRSRegionEntries, 7, \
- "Log_2 of max number of regions for which we keep bitmaps.") \
+ develop(intx, G1RSetRegionEntriesBase, 256, \
+ "Max number of regions in a fine-grain table per MB.") \
+ \
+ product(intx, G1RSetRegionEntries, 0, \
+ "Max number of regions for which we keep bitmaps." \
+ "Will be set ergonomically by default") \
+ \
+ develop(intx, G1RSetSparseRegionEntriesBase, 4, \
+ "Max number of entries per region in a sparse table " \
+ "per MB.") \
+ \
+ product(intx, G1RSetSparseRegionEntries, 0, \
+ "Max number of entries per region in a sparse table." \
+ "Will be set ergonomically by default.") \
\
develop(bool, G1RecordHRRSOops, false, \
"When true, record recent calls to rem set operations.") \
@@ -293,6 +305,10 @@
develop(bool, G1VerifyCTCleanup, false, \
"Verify card table cleanup.") \
\
+ product(uintx, G1RSetScanBlockSize, 64, \
+ "Size of a work unit of cards claimed by a worker thread" \
+ "during RSet scanning.") \
+ \
develop(bool, ReduceInitialCardMarksForG1, false, \
"When ReduceInitialCardMarks is true, this flag setting " \
" controls whether G1 allows the RICM optimization")
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -33,11 +33,12 @@
};
template<bool do_gen_barrier, G1Barrier barrier,
- bool do_mark_forwardee, bool skip_cset_test>
+ bool do_mark_forwardee>
class G1ParCopyClosure;
class G1ParScanClosure;
+class G1ParPushHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
class FilterIntoCSClosure;
class FilterOutOfRegionClosure;
@@ -51,6 +52,7 @@
#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
f(G1ParScanHeapEvacClosure,_nv) \
f(G1ParScanClosure,_nv) \
+ f(G1ParPushHeapRSClosure,_nv) \
f(FilterIntoCSClosure,_nv) \
f(FilterOutOfRegionClosure,_nv) \
f(FilterInHeapRegionAndIntoCSClosure,_nv) \
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Thu Feb 11 15:52:19 2010 -0800
@@ -505,12 +505,13 @@
typedef PosParPRT* PosParPRTPtr;
if (_max_fine_entries == 0) {
assert(_mod_max_fine_entries_mask == 0, "Both or none.");
- _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries);
+ size_t max_entries_log = (size_t)log2_long((jlong)G1RSetRegionEntries);
+ _max_fine_entries = (size_t)(1 << max_entries_log);
_mod_max_fine_entries_mask = _max_fine_entries - 1;
#if SAMPLE_FOR_EVICTION
assert(_fine_eviction_sample_size == 0
&& _fine_eviction_stride == 0, "All init at same time.");
- _fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries);
+ _fine_eviction_sample_size = MAX2((size_t)4, max_entries_log);
_fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size;
#endif
}
@@ -655,13 +656,6 @@
#endif
}
- // Otherwise, transfer from sparse to fine-grain.
- CardIdx_t cards[SparsePRTEntry::CardsPerEntry];
- if (G1HRRSUseSparseTable) {
- bool res = _sparse_table.get_cards(from_hrs_ind, &cards[0]);
- assert(res, "There should have been an entry");
- }
-
if (_n_fine_entries == _max_fine_entries) {
prt = delete_region_table();
} else {
@@ -676,10 +670,12 @@
_fine_grain_regions[ind] = prt;
_n_fine_entries++;
- // Add in the cards from the sparse table.
if (G1HRRSUseSparseTable) {
- for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) {
- CardIdx_t c = cards[i];
+ // Transfer from sparse to fine-grain.
+ SparsePRTEntry *sprt_entry = _sparse_table.get_entry(from_hrs_ind);
+ assert(sprt_entry != NULL, "There should have been an entry");
+ for (int i = 0; i < SparsePRTEntry::cards_num(); i++) {
+ CardIdx_t c = sprt_entry->card(i);
if (c != SparsePRTEntry::NullEntry) {
prt->add_card(c);
}
@@ -1084,6 +1080,19 @@
{}
+void HeapRegionRemSet::setup_remset_size() {
+ // Setup sparse and fine-grain tables sizes.
+ // table_size = base * (log(region_size / 1M) + 1)
+ int region_size_log_mb = MAX2((int)HeapRegion::LogOfHRGrainBytes - (int)LOG_M, 0);
+ if (FLAG_IS_DEFAULT(G1RSetSparseRegionEntries)) {
+ G1RSetSparseRegionEntries = G1RSetSparseRegionEntriesBase * (region_size_log_mb + 1);
+ }
+ if (FLAG_IS_DEFAULT(G1RSetRegionEntries)) {
+ G1RSetRegionEntries = G1RSetRegionEntriesBase * (region_size_log_mb + 1);
+ }
+ guarantee(G1RSetSparseRegionEntries > 0 && G1RSetRegionEntries > 0 , "Sanity");
+}
+
void HeapRegionRemSet::init_for_par_iteration() {
_iter_state = Unclaimed;
}
@@ -1399,7 +1408,7 @@
os::sleep(Thread::current(), (jlong)5000, false);
G1CollectedHeap* g1h = G1CollectedHeap::heap();
- // Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same
+ // Run with "-XX:G1LogRSetRegionEntries=2", so that 1 and 5 end up in same
// hash bucket.
HeapRegion* hr0 = g1h->region_at(0);
HeapRegion* hr1 = g1h->region_at(1);
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -187,7 +187,8 @@
void clear_outgoing_entries();
enum ParIterState { Unclaimed, Claimed, Complete };
- ParIterState _iter_state;
+ volatile ParIterState _iter_state;
+ volatile jlong _iter_claimed;
// Unused unless G1RecordHRRSOops is true.
@@ -209,6 +210,7 @@
HeapRegion* hr);
static int num_par_rem_sets();
+ static void setup_remset_size();
HeapRegion* hr() const {
return _other_regions.hr();
@@ -272,6 +274,19 @@
// Returns "true" iff the region's iteration is complete.
bool iter_is_complete();
+ // Support for claiming blocks of cards during iteration
+ void set_iter_claimed(size_t x) { _iter_claimed = (jlong)x; }
+ size_t iter_claimed() const { return (size_t)_iter_claimed; }
+ // Claim the next block of cards
+ size_t iter_claimed_next(size_t step) {
+ size_t current, next;
+ do {
+ current = iter_claimed();
+ next = current + step;
+ } while (Atomic::cmpxchg((jlong)next, &_iter_claimed, (jlong)current) != (jlong)current);
+ return current;
+ }
+
// Initialize the given iterator to iterate over this rem set.
void init_iterator(HeapRegionRemSetIterator* iter) const;
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp Thu Feb 11 15:52:19 2010 -0800
@@ -27,7 +27,7 @@
#define SPARSE_PRT_VERBOSE 0
-#define UNROLL_CARD_LOOPS 1
+#define UNROLL_CARD_LOOPS 1
void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) {
sprt_iter->init(this);
@@ -36,27 +36,32 @@
void SparsePRTEntry::init(RegionIdx_t region_ind) {
_region_ind = region_ind;
_next_index = NullEntry;
+
#if UNROLL_CARD_LOOPS
- assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
- _cards[0] = NullEntry;
- _cards[1] = NullEntry;
- _cards[2] = NullEntry;
- _cards[3] = NullEntry;
+ assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
+ for (int i = 0; i < cards_num(); i += UnrollFactor) {
+ _cards[i] = NullEntry;
+ _cards[i + 1] = NullEntry;
+ _cards[i + 2] = NullEntry;
+ _cards[i + 3] = NullEntry;
+ }
#else
- for (int i = 0; i < CardsPerEntry; i++)
+ for (int i = 0; i < cards_num(); i++)
_cards[i] = NullEntry;
#endif
}
bool SparsePRTEntry::contains_card(CardIdx_t card_index) const {
#if UNROLL_CARD_LOOPS
- assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
- if (_cards[0] == card_index) return true;
- if (_cards[1] == card_index) return true;
- if (_cards[2] == card_index) return true;
- if (_cards[3] == card_index) return true;
+ assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
+ for (int i = 0; i < cards_num(); i += UnrollFactor) {
+ if (_cards[i] == card_index ||
+ _cards[i + 1] == card_index ||
+ _cards[i + 2] == card_index ||
+ _cards[i + 3] == card_index) return true;
+ }
#else
- for (int i = 0; i < CardsPerEntry; i++) {
+ for (int i = 0; i < cards_num(); i++) {
if (_cards[i] == card_index) return true;
}
#endif
@@ -67,14 +72,16 @@
int SparsePRTEntry::num_valid_cards() const {
int sum = 0;
#if UNROLL_CARD_LOOPS
- assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
- if (_cards[0] != NullEntry) sum++;
- if (_cards[1] != NullEntry) sum++;
- if (_cards[2] != NullEntry) sum++;
- if (_cards[3] != NullEntry) sum++;
+ assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
+ for (int i = 0; i < cards_num(); i += UnrollFactor) {
+ sum += (_cards[i] != NullEntry);
+ sum += (_cards[i + 1] != NullEntry);
+ sum += (_cards[i + 2] != NullEntry);
+ sum += (_cards[i + 3] != NullEntry);
+ }
#else
- for (int i = 0; i < CardsPerEntry; i++) {
- if (_cards[i] != NulLEntry) sum++;
+ for (int i = 0; i < cards_num(); i++) {
+ sum += (_cards[i] != NullEntry);
}
#endif
// Otherwise, we're full.
@@ -83,27 +90,27 @@
SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(CardIdx_t card_index) {
#if UNROLL_CARD_LOOPS
- assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
- CardIdx_t c = _cards[0];
- if (c == card_index) return found;
- if (c == NullEntry) { _cards[0] = card_index; return added; }
- c = _cards[1];
- if (c == card_index) return found;
- if (c == NullEntry) { _cards[1] = card_index; return added; }
- c = _cards[2];
- if (c == card_index) return found;
- if (c == NullEntry) { _cards[2] = card_index; return added; }
- c = _cards[3];
- if (c == card_index) return found;
- if (c == NullEntry) { _cards[3] = card_index; return added; }
+ assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
+ CardIdx_t c;
+ for (int i = 0; i < cards_num(); i += UnrollFactor) {
+ c = _cards[i];
+ if (c == card_index) return found;
+ if (c == NullEntry) { _cards[i] = card_index; return added; }
+ c = _cards[i + 1];
+ if (c == card_index) return found;
+ if (c == NullEntry) { _cards[i + 1] = card_index; return added; }
+ c = _cards[i + 2];
+ if (c == card_index) return found;
+ if (c == NullEntry) { _cards[i + 2] = card_index; return added; }
+ c = _cards[i + 3];
+ if (c == card_index) return found;
+ if (c == NullEntry) { _cards[i + 3] = card_index; return added; }
+ }
#else
- for (int i = 0; i < CardsPerEntry; i++) {
+ for (int i = 0; i < cards_num(); i++) {
CardIdx_t c = _cards[i];
if (c == card_index) return found;
- if (c == NullEntry) {
- _cards[i] = card_index;
- return added;
- }
+ if (c == NullEntry) { _cards[i] = card_index; return added; }
}
#endif
// Otherwise, we're full.
@@ -112,13 +119,15 @@
void SparsePRTEntry::copy_cards(CardIdx_t* cards) const {
#if UNROLL_CARD_LOOPS
- assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll.");
- cards[0] = _cards[0];
- cards[1] = _cards[1];
- cards[2] = _cards[2];
- cards[3] = _cards[3];
+ assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry");
+ for (int i = 0; i < cards_num(); i += UnrollFactor) {
+ cards[i] = _cards[i];
+ cards[i + 1] = _cards[i + 1];
+ cards[i + 2] = _cards[i + 2];
+ cards[i + 3] = _cards[i + 3];
+ }
#else
- for (int i = 0; i < CardsPerEntry; i++) {
+ for (int i = 0; i < cards_num(); i++) {
cards[i] = _cards[i];
}
#endif
@@ -133,7 +142,7 @@
RSHashTable::RSHashTable(size_t capacity) :
_capacity(capacity), _capacity_mask(capacity-1),
_occupied_entries(0), _occupied_cards(0),
- _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)),
+ _entries((SparsePRTEntry*)NEW_C_HEAP_ARRAY(char, SparsePRTEntry::size() * capacity)),
_buckets(NEW_C_HEAP_ARRAY(int, capacity)),
_free_list(NullEntry), _free_region(0)
{
@@ -161,8 +170,8 @@
"_capacity too large");
// This will put -1 == NullEntry in the key field of all entries.
- memset(_entries, -1, _capacity * sizeof(SparsePRTEntry));
- memset(_buckets, -1, _capacity * sizeof(int));
+ memset(_entries, NullEntry, _capacity * SparsePRTEntry::size());
+ memset(_buckets, NullEntry, _capacity * sizeof(int));
_free_list = NullEntry;
_free_region = 0;
}
@@ -175,8 +184,8 @@
if (res == SparsePRTEntry::added) _occupied_cards++;
#if SPARSE_PRT_VERBOSE
gclog_or_tty->print_cr(" after add_card[%d]: valid-cards = %d.",
- pointer_delta(e, _entries, sizeof(SparsePRTEntry)),
- e->num_valid_cards());
+ pointer_delta(e, _entries, SparsePRTEntry::size()),
+ e->num_valid_cards());
#endif
assert(e->num_valid_cards() > 0, "Postcondition");
return res != SparsePRTEntry::overflow;
@@ -199,6 +208,22 @@
return true;
}
+SparsePRTEntry* RSHashTable::get_entry(RegionIdx_t region_ind) {
+ int ind = (int) (region_ind & capacity_mask());
+ int cur_ind = _buckets[ind];
+ SparsePRTEntry* cur;
+ while (cur_ind != NullEntry &&
+ (cur = entry(cur_ind))->r_ind() != region_ind) {
+ cur_ind = cur->next_index();
+ }
+
+ if (cur_ind == NullEntry) return NULL;
+ // Otherwise...
+ assert(cur->r_ind() == region_ind, "Postcondition of loop + test above.");
+ assert(cur->num_valid_cards() > 0, "Inv");
+ return cur;
+}
+
bool RSHashTable::delete_entry(RegionIdx_t region_ind) {
int ind = (int) (region_ind & capacity_mask());
int* prev_loc = &_buckets[ind];
@@ -225,20 +250,8 @@
int ind = (int) (region_ind & capacity_mask());
int cur_ind = _buckets[ind];
SparsePRTEntry* cur;
- // XXX
- // int k = 0;
while (cur_ind != NullEntry &&
(cur = entry(cur_ind))->r_ind() != region_ind) {
- /*
- k++;
- if (k > 10) {
- gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): "
- "k = %d, cur_ind = %d.", region_ind, k, cur_ind);
- if (k >= 1000) {
- while (1) ;
- }
- }
- */
cur_ind = cur->next_index();
}
@@ -319,7 +332,7 @@
bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) {
_card_ind++;
CardIdx_t ci;
- if (_card_ind < SparsePRTEntry::CardsPerEntry &&
+ if (_card_ind < SparsePRTEntry::cards_num() &&
((ci = _rsht->entry(_bl_ind)->card(_card_ind)) !=
SparsePRTEntry::NullEntry)) {
card_index = compute_card_ind(ci);
@@ -359,7 +372,7 @@
size_t RSHashTable::mem_size() const {
return sizeof(this) +
- capacity() * (sizeof(SparsePRTEntry) + sizeof(int));
+ capacity() * (SparsePRTEntry::size() + sizeof(int));
}
// ----------------------------------------------------------------------
@@ -446,6 +459,10 @@
return _next->get_cards(region_id, cards);
}
+SparsePRTEntry* SparsePRT::get_entry(RegionIdx_t region_id) {
+ return _next->get_entry(region_id);
+}
+
bool SparsePRT::delete_entry(RegionIdx_t region_id) {
return _next->delete_entry(region_id);
}
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -32,21 +32,28 @@
// insertions only enqueue old versions for deletions, but do not delete
// old versions synchronously.
-
class SparsePRTEntry: public CHeapObj {
public:
-
enum SomePublicConstants {
- CardsPerEntry = 4,
- NullEntry = -1
+ NullEntry = -1,
+ UnrollFactor = 4
};
-
private:
RegionIdx_t _region_ind;
int _next_index;
- CardIdx_t _cards[CardsPerEntry];
-
+ CardIdx_t _cards[1];
+ // WARNING: Don't put any data members beyond this line. Card array has, in fact, variable length.
+ // It should always be the last data member.
public:
+ // Returns the size of the entry, used for entry allocation.
+ static size_t size() { return sizeof(SparsePRTEntry) + sizeof(CardIdx_t) * (cards_num() - 1); }
+ // Returns the size of the card array.
+ static int cards_num() {
+ // The number of cards should be a multiple of 4, because that's our current
+ // unrolling factor.
+ static const int s = MAX2<int>(G1RSetSparseRegionEntries & ~(UnrollFactor - 1), UnrollFactor);
+ return s;
+ }
// Set the region_ind to the given value, and delete all cards.
inline void init(RegionIdx_t region_ind);
@@ -134,12 +141,15 @@
bool add_card(RegionIdx_t region_id, CardIdx_t card_index);
bool get_cards(RegionIdx_t region_id, CardIdx_t* cards);
+
bool delete_entry(RegionIdx_t region_id);
bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const;
void add_entry(SparsePRTEntry* e);
+ SparsePRTEntry* get_entry(RegionIdx_t region_id);
+
void clear();
size_t capacity() const { return _capacity; }
@@ -148,7 +158,7 @@
size_t occupied_cards() const { return _occupied_cards; }
size_t mem_size() const;
- SparsePRTEntry* entry(int i) const { return &_entries[i]; }
+ SparsePRTEntry* entry(int i) const { return (SparsePRTEntry*)((char*)_entries + SparsePRTEntry::size() * i); }
void print();
};
@@ -157,7 +167,7 @@
class RSHashTableIter VALUE_OBJ_CLASS_SPEC {
int _tbl_ind; // [-1, 0.._rsht->_capacity)
int _bl_ind; // [-1, 0.._rsht->_capacity)
- short _card_ind; // [0..CardsPerEntry)
+ short _card_ind; // [0..SparsePRTEntry::cards_num())
RSHashTable* _rsht;
size_t _heap_bot_card_ind;
@@ -176,7 +186,7 @@
RSHashTableIter(size_t heap_bot_card_ind) :
_tbl_ind(RSHashTable::NullEntry),
_bl_ind(RSHashTable::NullEntry),
- _card_ind((SparsePRTEntry::CardsPerEntry-1)),
+ _card_ind((SparsePRTEntry::cards_num() - 1)),
_rsht(NULL),
_heap_bot_card_ind(heap_bot_card_ind)
{}
@@ -185,7 +195,7 @@
_rsht = rsht;
_tbl_ind = -1; // So that first increment gets to 0.
_bl_ind = RSHashTable::NullEntry;
- _card_ind = (SparsePRTEntry::CardsPerEntry-1);
+ _card_ind = (SparsePRTEntry::cards_num() - 1);
}
bool has_next(size_t& card_index);
@@ -241,9 +251,13 @@
// If the table hold an entry for "region_ind", Copies its
// cards into "cards", which must be an array of length at least
- // "CardsPerEntry", and returns "true"; otherwise, returns "false".
+ // "SparePRTEntry::cards_num()", and returns "true"; otherwise,
+ // returns "false".
bool get_cards(RegionIdx_t region_ind, CardIdx_t* cards);
+ // Return the pointer to the entry associated with the given region.
+ SparsePRTEntry* get_entry(RegionIdx_t region_ind);
+
// If there is an entry for "region_ind", removes it and return "true";
// otherwise returns "false."
bool delete_entry(RegionIdx_t region_ind);
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -339,6 +339,16 @@
return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val();
}
+ void set_card_claimed(size_t card_index) {
+ jbyte val = _byte_map[card_index];
+ if (val == clean_card_val()) {
+ val = (jbyte)claimed_card_val();
+ } else {
+ val |= (jbyte)claimed_card_val();
+ }
+ _byte_map[card_index] = val;
+ }
+
bool claim_card(size_t card_index);
bool is_card_clean(size_t card_index) {
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp Tue Feb 09 13:56:09 2010 -0800
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp Thu Feb 11 15:52:19 2010 -0800
@@ -139,6 +139,10 @@
const size_t G = M*K;
const size_t HWperKB = K / sizeof(HeapWord);
+const size_t LOG_K = 10;
+const size_t LOG_M = 2 * LOG_K;
+const size_t LOG_G = 2 * LOG_M;
+
const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint
const jint max_jint = (juint)min_jint - 1; // 0x7FFFFFFF == largest jint