--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Jan 21 11:30:22 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Jan 25 10:56:22 2011 -0800
@@ -1055,7 +1055,12 @@
do {
double start_vtime_sec = os::elapsedVTime();
double start_time_sec = os::elapsedTime();
- the_task->do_marking_step(10.0);
+ double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
+
+ the_task->do_marking_step(mark_step_duration_ms,
+ true /* do_stealing */,
+ true /* do_termination */);
+
double end_time_sec = os::elapsedTime();
double end_vtime_sec = os::elapsedVTime();
double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
@@ -1111,7 +1116,8 @@
_restart_for_overflow = false;
- set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
+ size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
+ set_phase(active_workers, true /* concurrent */);
CMConcurrentMarkingTask markingTask(this, cmThread());
if (parallel_marking_threads() > 0)
@@ -1176,6 +1182,12 @@
/* silent */ false,
/* use_prev_marking */ false);
}
+ assert(!restart_for_overflow(), "sanity");
+ }
+
+ // Reset the marking state if marking completed
+ if (!restart_for_overflow()) {
+ set_non_marking_state();
}
#if VERIFY_OBJS_PROCESSED
@@ -1853,6 +1865,8 @@
assert(local_free_list.is_empty(), "post-condition");
}
+// Support closures for reference procssing in G1
+
bool G1CMIsAliveClosure::do_object_b(oop obj) {
HeapWord* addr = (HeapWord*)obj;
return addr != NULL &&
@@ -1873,11 +1887,17 @@
virtual void do_oop( oop* p) { do_oop_work(p); }
template <class T> void do_oop_work(T* p) {
- oop thisOop = oopDesc::load_decode_heap_oop(p);
- HeapWord* addr = (HeapWord*)thisOop;
- if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
+ oop obj = oopDesc::load_decode_heap_oop(p);
+ HeapWord* addr = (HeapWord*)obj;
+
+ if (_cm->verbose_high())
+ gclog_or_tty->print_cr("\t[0] we're looking at location "
+ "*"PTR_FORMAT" = "PTR_FORMAT,
+ p, (void*) obj);
+
+ if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
_bitMap->mark(addr);
- _cm->mark_stack_push(thisOop);
+ _cm->mark_stack_push(obj);
}
}
};
@@ -1899,6 +1919,199 @@
}
};
+// 'Keep Alive' closure used by parallel reference processing.
+// An instance of this closure is used in the parallel reference processing
+// code rather than an instance of G1CMKeepAliveClosure. We could have used
+// the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
+// placed on to discovered ref lists once so we can mark and push with no
+// need to check whether the object has already been marked. Using the
+// G1CMKeepAliveClosure would mean, however, having all the worker threads
+// operating on the global mark stack. This means that an individual
+// worker would be doing lock-free pushes while it processes its own
+// discovered ref list followed by drain call. If the discovered ref lists
+// are unbalanced then this could cause interference with the other
+// workers. Using a CMTask (and its embedded local data structures)
+// avoids that potential interference.
+class G1CMParKeepAliveAndDrainClosure: public OopClosure {
+ ConcurrentMark* _cm;
+ CMTask* _task;
+ CMBitMap* _bitMap;
+ int _ref_counter_limit;
+ int _ref_counter;
+ public:
+ G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
+ CMTask* task,
+ CMBitMap* bitMap) :
+ _cm(cm), _task(task), _bitMap(bitMap),
+ _ref_counter_limit(G1RefProcDrainInterval)
+ {
+ assert(_ref_counter_limit > 0, "sanity");
+ _ref_counter = _ref_counter_limit;
+ }
+
+ virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+ virtual void do_oop( oop* p) { do_oop_work(p); }
+
+ template <class T> void do_oop_work(T* p) {
+ if (!_cm->has_overflown()) {
+ oop obj = oopDesc::load_decode_heap_oop(p);
+ if (_cm->verbose_high())
+ gclog_or_tty->print_cr("\t[%d] we're looking at location "
+ "*"PTR_FORMAT" = "PTR_FORMAT,
+ _task->task_id(), p, (void*) obj);
+
+ _task->deal_with_reference(obj);
+ _ref_counter--;
+
+ if (_ref_counter == 0) {
+ // We have dealt with _ref_counter_limit references, pushing them and objects
+ // reachable from them on to the local stack (and possibly the global stack).
+ // Call do_marking_step() to process these entries. We call the routine in a
+ // loop, which we'll exit if there's nothing more to do (i.e. we're done
+ // with the entries that we've pushed as a result of the deal_with_reference
+ // calls above) or we overflow.
+ // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
+ // while there may still be some work to do. (See the comment at the
+ // beginning of CMTask::do_marking_step() for those conditions - one of which
+ // is reaching the specified time target.) It is only when
+ // CMTask::do_marking_step() returns without setting the has_aborted() flag
+ // that the marking has completed.
+ do {
+ double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
+ _task->do_marking_step(mark_step_duration_ms,
+ false /* do_stealing */,
+ false /* do_termination */);
+ } while (_task->has_aborted() && !_cm->has_overflown());
+ _ref_counter = _ref_counter_limit;
+ }
+ } else {
+ if (_cm->verbose_high())
+ gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
+ }
+ }
+};
+
+class G1CMParDrainMarkingStackClosure: public VoidClosure {
+ ConcurrentMark* _cm;
+ CMTask* _task;
+ public:
+ G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
+ _cm(cm), _task(task)
+ {}
+
+ void do_void() {
+ do {
+ if (_cm->verbose_high())
+ gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id());
+
+ // We call CMTask::do_marking_step() to completely drain the local and
+ // global marking stacks. The routine is called in a loop, which we'll
+ // exit if there's nothing more to do (i.e. we'completely drained the
+ // entries that were pushed as a result of applying the
+ // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
+ // lists above) or we overflow the global marking stack.
+ // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
+ // while there may still be some work to do. (See the comment at the
+ // beginning of CMTask::do_marking_step() for those conditions - one of which
+ // is reaching the specified time target.) It is only when
+ // CMTask::do_marking_step() returns without setting the has_aborted() flag
+ // that the marking has completed.
+
+ _task->do_marking_step(1000000000.0 /* something very large */,
+ true /* do_stealing */,
+ true /* do_termination */);
+ } while (_task->has_aborted() && !_cm->has_overflown());
+ }
+};
+
+// Implementation of AbstractRefProcTaskExecutor for G1
+class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
+private:
+ G1CollectedHeap* _g1h;
+ ConcurrentMark* _cm;
+ CMBitMap* _bitmap;
+ WorkGang* _workers;
+ int _active_workers;
+
+public:
+ G1RefProcTaskExecutor(G1CollectedHeap* g1h,
+ ConcurrentMark* cm,
+ CMBitMap* bitmap,
+ WorkGang* workers,
+ int n_workers) :
+ _g1h(g1h), _cm(cm), _bitmap(bitmap),
+ _workers(workers), _active_workers(n_workers)
+ { }
+
+ // Executes the given task using concurrent marking worker threads.
+ virtual void execute(ProcessTask& task);
+ virtual void execute(EnqueueTask& task);
+};
+
+class G1RefProcTaskProxy: public AbstractGangTask {
+ typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
+ ProcessTask& _proc_task;
+ G1CollectedHeap* _g1h;
+ ConcurrentMark* _cm;
+ CMBitMap* _bitmap;
+
+public:
+ G1RefProcTaskProxy(ProcessTask& proc_task,
+ G1CollectedHeap* g1h,
+ ConcurrentMark* cm,
+ CMBitMap* bitmap) :
+ AbstractGangTask("Process reference objects in parallel"),
+ _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
+ {}
+
+ virtual void work(int i) {
+ CMTask* marking_task = _cm->task(i);
+ G1CMIsAliveClosure g1_is_alive(_g1h);
+ G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
+ G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
+
+ _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
+ }
+};
+
+void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
+ assert(_workers != NULL, "Need parallel worker threads.");
+
+ G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
+
+ // We need to reset the phase for each task execution so that
+ // the termination protocol of CMTask::do_marking_step works.
+ _cm->set_phase(_active_workers, false /* concurrent */);
+ _g1h->set_par_threads(_active_workers);
+ _workers->run_task(&proc_task_proxy);
+ _g1h->set_par_threads(0);
+}
+
+class G1RefEnqueueTaskProxy: public AbstractGangTask {
+ typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
+ EnqueueTask& _enq_task;
+
+public:
+ G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
+ AbstractGangTask("Enqueue reference objects in parallel"),
+ _enq_task(enq_task)
+ { }
+
+ virtual void work(int i) {
+ _enq_task.work(i);
+ }
+};
+
+void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
+ assert(_workers != NULL, "Need parallel worker threads.");
+
+ G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
+
+ _g1h->set_par_threads(_active_workers);
+ _workers->run_task(&enq_task_proxy);
+ _g1h->set_par_threads(0);
+}
+
void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
ResourceMark rm;
HandleMark hm;
@@ -1917,18 +2130,52 @@
G1CMDrainMarkingStackClosure
g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
- // XXXYYY Also: copy the parallel ref processing code from CMS.
- rp->process_discovered_references(&g1_is_alive,
- &g1_keep_alive,
- &g1_drain_mark_stack,
- NULL);
+ // We use the work gang from the G1CollectedHeap and we utilize all
+ // the worker threads.
+ int active_workers = MAX2(MIN2(g1h->workers()->total_workers(), (int)_max_task_num), 1);
+
+ G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
+ g1h->workers(), active_workers);
+
+ if (rp->processing_is_mt()) {
+ // Set the degree of MT here. If the discovery is done MT, there
+ // may have been a different number of threads doing the discovery
+ // and a different number of discovered lists may have Ref objects.
+ // That is OK as long as the Reference lists are balanced (see
+ // balance_all_queues() and balance_queues()).
+ rp->set_mt_degree(active_workers);
+
+ rp->process_discovered_references(&g1_is_alive,
+ &g1_keep_alive,
+ &g1_drain_mark_stack,
+ &par_task_executor);
+
+ // The work routines of the parallel keep_alive and drain_marking_stack
+ // will set the has_overflown flag if we overflow the global marking
+ // stack.
+ } else {
+ rp->process_discovered_references(&g1_is_alive,
+ &g1_keep_alive,
+ &g1_drain_mark_stack,
+ NULL);
+
+ }
+
assert(_markStack.overflow() || _markStack.isEmpty(),
- "mark stack should be empty (unless it overflowed)");
+ "mark stack should be empty (unless it overflowed)");
if (_markStack.overflow()) {
+ // Should have been done already when we tried to push an
+ // entry on to the global mark stack. But let's do it again.
set_has_overflown();
}
- rp->enqueue_discovered_references();
+ if (rp->processing_is_mt()) {
+ assert(rp->num_q() == active_workers, "why not");
+ rp->enqueue_discovered_references(&par_task_executor);
+ } else {
+ rp->enqueue_discovered_references();
+ }
+
rp->verify_no_references_recorded();
assert(!rp->discovery_enabled(), "should have been disabled");
@@ -1955,7 +2202,9 @@
CMTask* task = _cm->task(worker_i);
task->record_start_time();
do {
- task->do_marking_step(1000000000.0 /* something very large */);
+ task->do_marking_step(1000000000.0 /* something very large */,
+ true /* do_stealing */,
+ true /* do_termination */);
} while (task->has_aborted() && !_cm->has_overflown());
// If we overflow, then we do not want to restart. We instead
// want to abort remark and do concurrent marking again.
@@ -1978,7 +2227,7 @@
G1CollectedHeap::StrongRootsScope srs(g1h);
// this is remark, so we'll use up all available threads
int active_workers = ParallelGCThreads;
- set_phase(active_workers, false);
+ set_phase(active_workers, false /* concurrent */);
CMRemarkTask remarkTask(this);
// We will start all available threads, even if we decide that the
@@ -1992,7 +2241,7 @@
G1CollectedHeap::StrongRootsScope srs(g1h);
// this is remark, so we'll use up all available threads
int active_workers = 1;
- set_phase(active_workers, false);
+ set_phase(active_workers, false /* concurrent */);
CMRemarkTask remarkTask(this);
// We will start all available threads, even if we decide that the
@@ -2005,9 +2254,6 @@
print_stats();
- if (!restart_for_overflow())
- set_non_marking_state();
-
#if VERIFY_OBJS_PROCESSED
if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
@@ -3124,7 +3370,7 @@
// do nothing
}
#else // _CHECK_BOTH_FINGERS_
- // we will only check the global finger
+ // we will only check the global finger
if (objAddr < global_finger) {
// see long comment above
@@ -3249,7 +3495,7 @@
double elapsed_time_ms = curr_time_ms - _start_time_ms;
if (elapsed_time_ms > _time_target_ms) {
set_has_aborted();
- _has_aborted_timed_out = true;
+ _has_timed_out = true;
statsOnly( ++_aborted_timed_out );
return;
}
@@ -3754,7 +4000,9 @@
*****************************************************************************/
-void CMTask::do_marking_step(double time_target_ms) {
+void CMTask::do_marking_step(double time_target_ms,
+ bool do_stealing,
+ bool do_termination) {
assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
assert(concurrent() == _cm->concurrent(), "they should be the same");
@@ -3794,7 +4042,7 @@
// clear all flags
clear_has_aborted();
- _has_aborted_timed_out = false;
+ _has_timed_out = false;
_draining_satb_buffers = false;
++_calls;
@@ -3970,7 +4218,7 @@
drain_global_stack(false);
// Attempt at work stealing from other task's queues.
- if (!has_aborted()) {
+ if (do_stealing && !has_aborted()) {
// We have not aborted. This means that we have finished all that
// we could. Let's try to do some stealing...
@@ -4011,7 +4259,7 @@
// We still haven't aborted. Now, let's try to get into the
// termination protocol.
- if (!has_aborted()) {
+ if (do_termination && !has_aborted()) {
// We cannot check whether the global stack is empty, since other
// tasks might be concurrently pushing objects on it. We also cannot
// check if the region stack is empty because if a thread is aborting
@@ -4087,7 +4335,7 @@
statsOnly( ++_aborted );
- if (_has_aborted_timed_out) {
+ if (_has_timed_out) {
double diff_ms = elapsed_time_ms - _time_target_ms;
// Keep statistics of how well we did with respect to hitting
// our target only if we actually timed out (if we aborted for