7098085: G1: partially-young GCs not initiated under certain circumstances
Reviewed-by: ysr, brutisso
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Wed Oct 12 10:25:51 2011 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Thu Oct 13 13:54:29 2011 -0400
@@ -215,20 +215,20 @@
gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
}
- // Now do the remainder of the cleanup operation.
+ // Now do the concurrent cleanup operation.
_cm->completeCleanup();
+
// Notify anyone who's waiting that there are no more free
- // regions coming. We have to do this before we join the STS,
- // otherwise we might deadlock: a GC worker could be blocked
- // waiting for the notification whereas this thread will be
- // blocked for the pause to finish while it's trying to join
- // the STS, which is conditional on the GC workers finishing.
+ // regions coming. We have to do this before we join the STS
+ // (in fact, we should not attempt to join the STS in the
+ // interval between finishing the cleanup pause and clearing
+ // the free_regions_coming flag) otherwise we might deadlock:
+ // a GC worker could be blocked waiting for the notification
+ // whereas this thread will be blocked for the pause to finish
+ // while it's trying to join the STS, which is conditional on
+ // the GC workers finishing.
g1h->reset_free_regions_coming();
- _sts.join();
- g1_policy->record_concurrent_mark_cleanup_completed();
- _sts.leave();
-
double cleanup_end_sec = os::elapsedTime();
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
@@ -240,6 +240,36 @@
guarantee(cm()->cleanup_list_is_empty(),
"at this point there should be no regions on the cleanup list");
+ // There is a tricky race before recording that the concurrent
+ // cleanup has completed and a potential Full GC starting around
+ // the same time. We want to make sure that the Full GC calls
+ // abort() on concurrent mark after
+ // record_concurrent_mark_cleanup_completed(), since abort() is
+ // the method that will reset the concurrent mark state. If we
+ // end up calling record_concurrent_mark_cleanup_completed()
+ // after abort() then we might incorrectly undo some of the work
+ // abort() did. Checking the has_aborted() flag after joining
+ // the STS allows the correct ordering of the two methods. There
+ // are two scenarios:
+ //
+ // a) If we reach here before the Full GC, the fact that we have
+ // joined the STS means that the Full GC cannot start until we
+ // leave the STS, so record_concurrent_mark_cleanup_completed()
+ // will complete before abort() is called.
+ //
+ // b) If we reach here during the Full GC, we'll be held up from
+ // joining the STS until the Full GC is done, which means that
+ // abort() will have completed and has_aborted() will return
+ // true to prevent us from calling
+ // record_concurrent_mark_cleanup_completed() (and, in fact, it's
+ // not needed any more as the concurrent mark state has been
+ // already reset).
+ _sts.join();
+ if (!cm()->has_aborted()) {
+ g1_policy->record_concurrent_mark_cleanup_completed();
+ }
+ _sts.leave();
+
if (cm()->has_aborted()) {
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
@@ -248,7 +278,7 @@
}
}
- // we now want to allow clearing of the marking bitmap to be
+ // We now want to allow clearing of the marking bitmap to be
// suspended by a collection pause.
_sts.join();
_cm->clearNextBitmap();