--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Thu Mar 26 08:51:32 2009 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Sat Mar 28 15:47:29 2009 -0700
@@ -36,7 +36,7 @@
ObjToScanQueueSet* work_queue_set_,
size_t desired_plab_sz_,
ParallelTaskTerminator& term_) :
- _to_space(to_space_), _old_gen(old_gen_), _thread_num(thread_num_),
+ _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_),
_work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false),
_ageTable(false), // false ==> not the global age table, no perf data.
_to_space_alloc_buffer(desired_plab_sz_),
@@ -57,6 +57,11 @@
_start = os::elapsedTime();
_old_gen_closure.set_generation(old_gen_);
_old_gen_root_closure.set_generation(old_gen_);
+ if (UseCompressedOops) {
+ _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true);
+ } else {
+ _overflow_stack = NULL;
+ }
}
#ifdef _MSC_VER
#pragma warning( pop )
@@ -81,7 +86,7 @@
assert(old->is_objArray(), "must be obj array");
assert(old->is_forwarded(), "must be forwarded");
assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
- assert(!_old_gen->is_in(old), "must be in young generation.");
+ assert(!old_gen()->is_in(old), "must be in young generation.");
objArrayOop obj = objArrayOop(old->forwardee());
// Process ParGCArrayScanChunk elements now
@@ -119,26 +124,68 @@
void ParScanThreadState::trim_queues(int max_size) {
ObjToScanQueue* queue = work_queue();
- while (queue->size() > (juint)max_size) {
- oop obj_to_scan;
- if (queue->pop_local(obj_to_scan)) {
- note_pop();
-
- if ((HeapWord *)obj_to_scan < young_old_boundary()) {
- if (obj_to_scan->is_objArray() &&
- obj_to_scan->is_forwarded() &&
- obj_to_scan->forwardee() != obj_to_scan) {
- scan_partial_array_and_push_remainder(obj_to_scan);
+ do {
+ while (queue->size() > (juint)max_size) {
+ oop obj_to_scan;
+ if (queue->pop_local(obj_to_scan)) {
+ note_pop();
+ if ((HeapWord *)obj_to_scan < young_old_boundary()) {
+ if (obj_to_scan->is_objArray() &&
+ obj_to_scan->is_forwarded() &&
+ obj_to_scan->forwardee() != obj_to_scan) {
+ scan_partial_array_and_push_remainder(obj_to_scan);
+ } else {
+ // object is in to_space
+ obj_to_scan->oop_iterate(&_to_space_closure);
+ }
} else {
- // object is in to_space
- obj_to_scan->oop_iterate(&_to_space_closure);
+ // object is in old generation
+ obj_to_scan->oop_iterate(&_old_gen_closure);
}
- } else {
- // object is in old generation
- obj_to_scan->oop_iterate(&_old_gen_closure);
}
}
+ // For the case of compressed oops, we have a private, non-shared
+ // overflow stack, so we eagerly drain it so as to more evenly
+ // distribute load early. Note: this may be good to do in
+ // general rather than delay for the final stealing phase.
+ // If applicable, we'll transfer a set of objects over to our
+ // work queue, allowing them to be stolen and draining our
+ // private overflow stack.
+ } while (ParGCTrimOverflow && young_gen()->take_from_overflow_list(this));
+}
+
+bool ParScanThreadState::take_from_overflow_stack() {
+ assert(UseCompressedOops, "Else should not call");
+ assert(young_gen()->overflow_list() == NULL, "Error");
+ ObjToScanQueue* queue = work_queue();
+ GrowableArray<oop>* of_stack = overflow_stack();
+ uint num_overflow_elems = of_stack->length();
+ uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4,
+ (juint)ParGCDesiredObjsFromOverflowList),
+ num_overflow_elems);
+ // Transfer the most recent num_take_elems from the overflow
+ // stack to our work queue.
+ for (size_t i = 0; i != num_take_elems; i++) {
+ oop cur = of_stack->pop();
+ oop obj_to_push = cur->forwardee();
+ assert(Universe::heap()->is_in_reserved(cur), "Should be in heap");
+ assert(!old_gen()->is_in_reserved(cur), "Should be in young gen");
+ assert(Universe::heap()->is_in_reserved(obj_to_push), "Should be in heap");
+ if (should_be_partially_scanned(obj_to_push, cur)) {
+ assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
+ obj_to_push = cur;
+ }
+ bool ok = queue->push(obj_to_push);
+ assert(ok, "Should have succeeded");
}
+ assert(young_gen()->overflow_list() == NULL, "Error");
+ return num_take_elems > 0; // was something transferred?
+}
+
+void ParScanThreadState::push_on_overflow_stack(oop p) {
+ assert(UseCompressedOops, "Else should not call");
+ overflow_stack()->push(p);
+ assert(young_gen()->overflow_list() == NULL, "Error");
}
HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
@@ -425,8 +472,7 @@
ResourceMark rm;
HandleMark hm;
// We would need multiple old-gen queues otherwise.
- guarantee(gch->n_gens() == 2,
- "Par young collection currently only works with one older gen.");
+ assert(gch->n_gens() == 2, "Par young collection currently only works with one older gen.");
Generation* old_gen = gch->next_gen(_gen);
@@ -1169,36 +1215,75 @@
}
#endif
+// In case we are using compressed oops, we need to be careful.
+// If the object being pushed is an object array, then its length
+// field keeps track of the "grey boundary" at which the next
+// incremental scan will be done (see ParGCArrayScanChunk).
+// When using compressed oops, this length field is kept in the
+// lower 32 bits of the erstwhile klass word and cannot be used
+// for the overflow chaining pointer (OCP below). As such the OCP
+// would itself need to be compressed into the top 32-bits in this
+// case. Unfortunately, see below, in the event that we have a
+// promotion failure, the node to be pushed on the list can be
+// outside of the Java heap, so the heap-based pointer compression
+// would not work (we would have potential aliasing between C-heap
+// and Java-heap pointers). For this reason, when using compressed
+// oops, we simply use a worker-thread-local, non-shared overflow
+// list in the form of a growable array, with a slightly different
+// overflow stack draining strategy. If/when we start using fat
+// stacks here, we can go back to using (fat) pointer chains
+// (although some performance comparisons would be useful since
+// single global lists have their own performance disadvantages
+// as we were made painfully aware not long ago, see 6786503).
#define BUSY (oop(0x1aff1aff))
void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
- // if the object has been forwarded to itself, then we cannot
- // use the klass pointer for the linked list. Instead we have
- // to allocate an oopDesc in the C-Heap and use that for the linked list.
- // XXX This is horribly inefficient when a promotion failure occurs
- // and should be fixed. XXX FIX ME !!!
+ assert(is_in_reserved(from_space_obj), "Should be from this generation");
+ if (UseCompressedOops) {
+ // In the case of compressed oops, we use a private, not-shared
+ // overflow stack.
+ par_scan_state->push_on_overflow_stack(from_space_obj);
+ } else {
+ // if the object has been forwarded to itself, then we cannot
+ // use the klass pointer for the linked list. Instead we have
+ // to allocate an oopDesc in the C-Heap and use that for the linked list.
+ // XXX This is horribly inefficient when a promotion failure occurs
+ // and should be fixed. XXX FIX ME !!!
#ifndef PRODUCT
- Atomic::inc_ptr(&_num_par_pushes);
- assert(_num_par_pushes > 0, "Tautology");
+ Atomic::inc_ptr(&_num_par_pushes);
+ assert(_num_par_pushes > 0, "Tautology");
#endif
- if (from_space_obj->forwardee() == from_space_obj) {
- oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
- listhead->forward_to(from_space_obj);
- from_space_obj = listhead;
+ if (from_space_obj->forwardee() == from_space_obj) {
+ oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
+ listhead->forward_to(from_space_obj);
+ from_space_obj = listhead;
+ }
+ oop observed_overflow_list = _overflow_list;
+ oop cur_overflow_list;
+ do {
+ cur_overflow_list = observed_overflow_list;
+ if (cur_overflow_list != BUSY) {
+ from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
+ } else {
+ from_space_obj->set_klass_to_list_ptr(NULL);
+ }
+ observed_overflow_list =
+ (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
+ } while (cur_overflow_list != observed_overflow_list);
}
- oop observed_overflow_list = _overflow_list;
- oop cur_overflow_list;
- do {
- cur_overflow_list = observed_overflow_list;
- if (cur_overflow_list != BUSY) {
- from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
- } else {
- from_space_obj->set_klass_to_list_ptr(NULL);
- }
- observed_overflow_list =
- (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
- } while (cur_overflow_list != observed_overflow_list);
}
+bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
+ bool res;
+
+ if (UseCompressedOops) {
+ res = par_scan_state->take_from_overflow_stack();
+ } else {
+ res = take_from_overflow_list_work(par_scan_state);
+ }
+ return res;
+}
+
+
// *NOTE*: The overflow list manipulation code here and
// in CMSCollector:: are very similar in shape,
// except that in the CMS case we thread the objects
@@ -1213,14 +1298,13 @@
// similar changes might be needed.
// See CMSCollector::par_take_from_overflow_list() for
// more extensive documentation comments.
-bool
-ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
+bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan_state) {
ObjToScanQueue* work_q = par_scan_state->work_queue();
- assert(work_q->size() == 0, "Should first empty local work queue");
// How many to take?
- size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4,
+ size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
(size_t)ParGCDesiredObjsFromOverflowList);
+ assert(par_scan_state->overflow_stack() == NULL, "Error");
if (_overflow_list == NULL) return false;
// Otherwise, there was something there; try claiming the list.