7132029: G1: mixed GC phase lasts for longer than it should
authortonyp
Wed, 15 Feb 2012 13:06:53 -0500
changeset 11756 28b6fe22e43d
parent 11755 e68b5a95367b
child 11757 cdf6204b114a
7132029: G1: mixed GC phase lasts for longer than it should Summary: Revamp of the mechanism that chooses old regions for inclusion in the CSet. It simplifies the code and introduces min and max bounds on the number of old regions added to the CSet at each mixed GC to avoid pathological cases. It also ensures that when we do a mixed GC we'll always find old regions to add to the CSet (i.e., it eliminates the case where a mixed GC will collect no old regions which can happen today). Reviewed-by: johnc, brutisso
hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp
hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Feb 15 13:06:53 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,8 @@
 
 #ifndef PRODUCT
 bool CSetChooserCache::verify() {
+  guarantee(false, "CSetChooserCache::verify(): don't call this any more");
+
   int index = _first;
   HeapRegion *prev = NULL;
   for (int i = 0; i < _occupancy; ++i) {
@@ -75,6 +77,8 @@
 #endif // PRODUCT
 
 void CSetChooserCache::insert(HeapRegion *hr) {
+  guarantee(false, "CSetChooserCache::insert(): don't call this any more");
+
   assert(!is_full(), "cache should not be empty");
   hr->calc_gc_efficiency();
 
@@ -104,6 +108,9 @@
 }
 
 HeapRegion *CSetChooserCache::remove_first() {
+  guarantee(false, "CSetChooserCache::remove_first(): "
+                   "don't call this any more");
+
   if (_occupancy > 0) {
     assert(_cache[_first] != NULL, "cache should have at least one region");
     HeapRegion *ret = _cache[_first];
@@ -118,16 +125,35 @@
   }
 }
 
-static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
+// Even though we don't use the GC efficiency in our heuristics as
+// much as we used to, we still order according to GC efficiency. This
+// will cause regions with a lot of live objects and large RSets to
+// end up at the end of the array. Given that we might skip collecting
+// the last few old regions, if after a few mixed GCs the remaining
+// have reclaimable bytes under a certain threshold, the hope is that
+// the ones we'll skip are ones with both large RSets and a lot of
+// live objects, not the ones with just a lot of live objects if we
+// ordered according to the amount of reclaimable bytes per region.
+static int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
   if (hr1 == NULL) {
-    if (hr2 == NULL) return 0;
-    else return 1;
+    if (hr2 == NULL) {
+      return 0;
+    } else {
+      return 1;
+    }
   } else if (hr2 == NULL) {
     return -1;
   }
-  if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1;
-  else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1;
-  else return 0;
+
+  double gc_eff1 = hr1->gc_efficiency();
+  double gc_eff2 = hr2->gc_efficiency();
+  if (gc_eff1 > gc_eff2) {
+    return -1;
+  } if (gc_eff1 < gc_eff2) {
+    return 1;
+  } else {
+    return 0;
+  }
 }
 
 static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
@@ -151,51 +177,61 @@
   //
   _markedRegions((ResourceObj::set_allocation_type((address)&_markedRegions,
                                              ResourceObj::C_HEAP),
-                  100),
-                 true),
-  _curMarkedIndex(0),
-  _numMarkedRegions(0),
-  _unmarked_age_1_returned_as_new(false),
-  _first_par_unreserved_idx(0)
-{}
-
-
+                  100), true /* C_Heap */),
+    _curr_index(0), _length(0),
+    _regionLiveThresholdBytes(0), _remainingReclaimableBytes(0),
+    _first_par_unreserved_idx(0) {
+  _regionLiveThresholdBytes =
+    HeapRegion::GrainBytes * (size_t) G1OldCSetRegionLiveThresholdPercent / 100;
+}
 
 #ifndef PRODUCT
 bool CollectionSetChooser::verify() {
+  guarantee(_length >= 0, err_msg("_length: %d", _length));
+  guarantee(0 <= _curr_index && _curr_index <= _length,
+            err_msg("_curr_index: %d _length: %d", _curr_index, _length));
   int index = 0;
-  guarantee(_curMarkedIndex <= _numMarkedRegions,
-            "_curMarkedIndex should be within bounds");
-  while (index < _curMarkedIndex) {
-    guarantee(_markedRegions.at(index++) == NULL,
-              "all entries before _curMarkedIndex should be NULL");
+  size_t sum_of_reclaimable_bytes = 0;
+  while (index < _curr_index) {
+    guarantee(_markedRegions.at(index) == NULL,
+              "all entries before _curr_index should be NULL");
+    index += 1;
   }
   HeapRegion *prev = NULL;
-  while (index < _numMarkedRegions) {
+  while (index < _length) {
     HeapRegion *curr = _markedRegions.at(index++);
     guarantee(curr != NULL, "Regions in _markedRegions array cannot be NULL");
     int si = curr->sort_index();
     guarantee(!curr->is_young(), "should not be young!");
+    guarantee(!curr->isHumongous(), "should not be humongous!");
     guarantee(si > -1 && si == (index-1), "sort index invariant");
     if (prev != NULL) {
-      guarantee(orderRegions(prev, curr) != 1, "regions should be sorted");
+      guarantee(orderRegions(prev, curr) != 1,
+                err_msg("GC eff prev: %1.4f GC eff curr: %1.4f",
+                        prev->gc_efficiency(), curr->gc_efficiency()));
     }
+    sum_of_reclaimable_bytes += curr->reclaimable_bytes();
     prev = curr;
   }
-  return _cache.verify();
+  guarantee(sum_of_reclaimable_bytes == _remainingReclaimableBytes,
+            err_msg("reclaimable bytes inconsistent, "
+                    "remaining: "SIZE_FORMAT" sum: "SIZE_FORMAT,
+                    _remainingReclaimableBytes, sum_of_reclaimable_bytes));
+  return true;
 }
 #endif
 
-void
-CollectionSetChooser::fillCache() {
-  while (!_cache.is_full() && (_curMarkedIndex < _numMarkedRegions)) {
-    HeapRegion* hr = _markedRegions.at(_curMarkedIndex);
+void CollectionSetChooser::fillCache() {
+  guarantee(false, "fillCache: don't call this any more");
+
+  while (!_cache.is_full() && (_curr_index < _length)) {
+    HeapRegion* hr = _markedRegions.at(_curr_index);
     assert(hr != NULL,
            err_msg("Unexpected NULL hr in _markedRegions at index %d",
-                   _curMarkedIndex));
-    _curMarkedIndex += 1;
+                   _curr_index));
+    _curr_index += 1;
     assert(!hr->is_young(), "should not be young!");
-    assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant");
+    assert(hr->sort_index() == _curr_index-1, "sort_index invariant");
     _markedRegions.at_put(hr->sort_index(), NULL);
     _cache.insert(hr);
     assert(!_cache.is_empty(), "cache should not be empty");
@@ -203,9 +239,7 @@
   assert(verify(), "cache should be consistent");
 }
 
-void
-CollectionSetChooser::sortMarkedHeapRegions() {
-  guarantee(_cache.is_empty(), "cache should be empty");
+void CollectionSetChooser::sortMarkedHeapRegions() {
   // First trim any unused portion of the top in the parallel case.
   if (_first_par_unreserved_idx > 0) {
     if (G1PrintParCleanupStats) {
@@ -217,43 +251,78 @@
     _markedRegions.trunc_to(_first_par_unreserved_idx);
   }
   _markedRegions.sort(orderRegions);
-  assert(_numMarkedRegions <= _markedRegions.length(), "Requirement");
-  assert(_numMarkedRegions == 0
-         || _markedRegions.at(_numMarkedRegions-1) != NULL,
-         "Testing _numMarkedRegions");
-  assert(_numMarkedRegions == _markedRegions.length()
-         || _markedRegions.at(_numMarkedRegions) == NULL,
-         "Testing _numMarkedRegions");
+  assert(_length <= _markedRegions.length(), "Requirement");
+  assert(_length == 0 || _markedRegions.at(_length - 1) != NULL,
+         "Testing _length");
+  assert(_length == _markedRegions.length() ||
+                        _markedRegions.at(_length) == NULL, "Testing _length");
   if (G1PrintParCleanupStats) {
-    gclog_or_tty->print_cr("     Sorted %d marked regions.", _numMarkedRegions);
+    gclog_or_tty->print_cr("     Sorted %d marked regions.", _length);
   }
-  for (int i = 0; i < _numMarkedRegions; i++) {
+  for (int i = 0; i < _length; i++) {
     assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
     _markedRegions.at(i)->set_sort_index(i);
   }
   if (G1PrintRegionLivenessInfo) {
     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Sorting");
-    for (int i = 0; i < _numMarkedRegions; ++i) {
+    for (int i = 0; i < _length; ++i) {
       HeapRegion* r = _markedRegions.at(i);
       cl.doHeapRegion(r);
     }
   }
-  assert(verify(), "should now be sorted");
+  assert(verify(), "CSet chooser verification");
 }
 
-void
-CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
+size_t CollectionSetChooser::calcMinOldCSetLength() {
+  // The min old CSet region bound is based on the maximum desired
+  // number of mixed GCs after a cycle. I.e., even if some old regions
+  // look expensive, we should add them to the CSet anyway to make
+  // sure we go through the available old regions in no more than the
+  // maximum desired number of mixed GCs.
+  //
+  // The calculation is based on the number of marked regions we added
+  // to the CSet chooser in the first place, not how many remain, so
+  // that the result is the same during all mixed GCs that follow a cycle.
+
+  const size_t region_num = (size_t) _length;
+  const size_t gc_num = (size_t) G1MaxMixedGCNum;
+  size_t result = region_num / gc_num;
+  // emulate ceiling
+  if (result * gc_num < region_num) {
+    result += 1;
+  }
+  return result;
+}
+
+size_t CollectionSetChooser::calcMaxOldCSetLength() {
+  // The max old CSet region bound is based on the threshold expressed
+  // as a percentage of the heap size. I.e., it should bound the
+  // number of old regions added to the CSet irrespective of how many
+  // of them are available.
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  const size_t region_num = g1h->n_regions();
+  const size_t perc = (size_t) G1OldCSetRegionThresholdPercent;
+  size_t result = region_num * perc / 100;
+  // emulate ceiling
+  if (100 * result < region_num * perc) {
+    result += 1;
+  }
+  return result;
+}
+
+void CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
   assert(!hr->isHumongous(),
          "Humongous regions shouldn't be added to the collection set");
   assert(!hr->is_young(), "should not be young!");
   _markedRegions.append(hr);
-  _numMarkedRegions++;
+  _length++;
+  _remainingReclaimableBytes += hr->reclaimable_bytes();
   hr->calc_gc_efficiency();
 }
 
-void
-CollectionSetChooser::
-prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
+void CollectionSetChooser::prepareForAddMarkedHeapRegionsPar(size_t n_regions,
+                                                             size_t chunkSize) {
   _first_par_unreserved_idx = 0;
   int n_threads = ParallelGCThreads;
   if (UseDynamicNumberOfGCThreads) {
@@ -274,8 +343,7 @@
   _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
 }
 
-jint
-CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
+jint CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
   // Don't do this assert because this can be called at a point
   // where the loop up stream will not execute again but might
   // try to claim more chunks (loop test has not been done yet).
@@ -287,83 +355,37 @@
   return res - n_regions;
 }
 
-void
-CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
+void CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
   assert(_markedRegions.at(index) == NULL, "precondition");
   assert(!hr->is_young(), "should not be young!");
   _markedRegions.at_put(index, hr);
   hr->calc_gc_efficiency();
 }
 
-void
-CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) {
-  (void)Atomic::add(inc_by, &_numMarkedRegions);
-}
-
-void
-CollectionSetChooser::clearMarkedHeapRegions(){
-  for (int i = 0; i < _markedRegions.length(); i++) {
-    HeapRegion* r =   _markedRegions.at(i);
-    if (r != NULL) r->set_sort_index(-1);
+void CollectionSetChooser::updateTotals(jint region_num,
+                                        size_t reclaimable_bytes) {
+  // Only take the lock if we actually need to update the totals.
+  if (region_num > 0) {
+    assert(reclaimable_bytes > 0, "invariant");
+    // We could have just used atomics instead of taking the
+    // lock. However, we currently don't have an atomic add for size_t.
+    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+    _length += (int) region_num;
+    _remainingReclaimableBytes += reclaimable_bytes;
+  } else {
+    assert(reclaimable_bytes == 0, "invariant");
   }
-  _markedRegions.clear();
-  _curMarkedIndex = 0;
-  _numMarkedRegions = 0;
-  _cache.clear();
-};
-
-void
-CollectionSetChooser::updateAfterFullCollection() {
-  clearMarkedHeapRegions();
 }
 
-// if time_remaining < 0.0, then this method should try to return
-// a region, whether it fits within the remaining time or not
-HeapRegion*
-CollectionSetChooser::getNextMarkedRegion(double time_remaining,
-                                          double avg_prediction) {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  G1CollectorPolicy* g1p = g1h->g1_policy();
-  fillCache();
-  if (_cache.is_empty()) {
-    assert(_curMarkedIndex == _numMarkedRegions,
-           "if cache is empty, list should also be empty");
-    ergo_verbose0(ErgoCSetConstruction,
-                  "stop adding old regions to CSet",
-                  ergo_format_reason("cache is empty"));
-    return NULL;
-  }
-
-  HeapRegion *hr = _cache.get_first();
-  assert(hr != NULL, "if cache not empty, first entry should be non-null");
-  double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false);
-
-  if (g1p->adaptive_young_list_length()) {
-    if (time_remaining - predicted_time < 0.0) {
-      g1h->check_if_region_is_too_expensive(predicted_time);
-      ergo_verbose2(ErgoCSetConstruction,
-                    "stop adding old regions to CSet",
-                    ergo_format_reason("predicted old region time higher than remaining time")
-                    ergo_format_ms("predicted old region time")
-                    ergo_format_ms("remaining time"),
-                    predicted_time, time_remaining);
-      return NULL;
-    }
-  } else {
-    double threshold = 2.0 * avg_prediction;
-    if (predicted_time > threshold) {
-      ergo_verbose2(ErgoCSetConstruction,
-                    "stop adding old regions to CSet",
-                    ergo_format_reason("predicted old region time higher than threshold")
-                    ergo_format_ms("predicted old region time")
-                    ergo_format_ms("threshold"),
-                    predicted_time, threshold);
-      return NULL;
+void CollectionSetChooser::clearMarkedHeapRegions() {
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    HeapRegion* r = _markedRegions.at(i);
+    if (r != NULL) {
+      r->set_sort_index(-1);
     }
   }
-
-  HeapRegion *hr2 = _cache.remove_first();
-  assert(hr == hr2, "cache contents should not have changed");
-
-  return hr;
-}
+  _markedRegions.clear();
+  _curr_index = 0;
+  _length = 0;
+  _remainingReclaimableBytes = 0;
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Wed Feb 15 13:06:53 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,28 +28,6 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "utilities/growableArray.hpp"
 
-// We need to sort heap regions by collection desirability.
-// This sorting is currently done in two "stages". An initial sort is
-// done following a cleanup pause as soon as all of the marked but
-// non-empty regions have been identified and the completely empty
-// ones reclaimed.
-// This gives us a global sort on a GC efficiency metric
-// based on predictive data available at that time. However,
-// any of these regions that are collected will only be collected
-// during a future GC pause, by which time it is possible that newer
-// data might allow us to revise and/or refine the earlier
-// pause predictions, leading to changes in expected gc efficiency
-// order. To somewhat mitigate this obsolescence, more so in the
-// case of regions towards the end of the list, which will be
-// picked later, these pre-sorted regions from the _markedRegions
-// array are not used as is, but a small prefix thereof is
-// insertion-sorted again into a small cache, based on more
-// recent remembered set information. Regions are then drawn
-// from this cache to construct the collection set at each
-// incremental GC.
-// This scheme and/or its implementation may be subject to
-// revision in the future.
-
 class CSetChooserCache VALUE_OBJ_CLASS_SPEC {
 private:
   enum {
@@ -103,24 +81,82 @@
 class CollectionSetChooser: public CHeapObj {
 
   GrowableArray<HeapRegion*> _markedRegions;
-  int _curMarkedIndex;
-  int _numMarkedRegions;
-  CSetChooserCache _cache;
+
+  // The index of the next candidate old region to be considered for
+  // addition to the CSet.
+  int _curr_index;
+
+  // The number of candidate old regions added to the CSet chooser.
+  int _length;
 
-  // True iff last collection pause ran of out new "age 0" regions, and
-  // returned an "age 1" region.
-  bool _unmarked_age_1_returned_as_new;
+  CSetChooserCache _cache;
+  jint _first_par_unreserved_idx;
 
-  jint _first_par_unreserved_idx;
+  // If a region has more live bytes than this threshold, it will not
+  // be added to the CSet chooser and will not be a candidate for
+  // collection.
+  size_t _regionLiveThresholdBytes;
+
+  // The sum of reclaimable bytes over all the regions in the CSet chooser.
+  size_t _remainingReclaimableBytes;
 
 public:
 
-  HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction);
+  // Return the current candidate region to be considered for
+  // collection without removing it from the CSet chooser.
+  HeapRegion* peek() {
+    HeapRegion* res = NULL;
+    if (_curr_index < _length) {
+      res = _markedRegions.at(_curr_index);
+      assert(res != NULL,
+             err_msg("Unexpected NULL hr in _markedRegions at index %d",
+                     _curr_index));
+    }
+    return res;
+  }
+
+  // Remove the given region from the CSet chooser and move to the
+  // next one. The given region should be the current candidate region
+  // in the CSet chooser.
+  void remove_and_move_to_next(HeapRegion* hr) {
+    assert(hr != NULL, "pre-condition");
+    assert(_curr_index < _length, "pre-condition");
+    assert(_markedRegions.at(_curr_index) == hr, "pre-condition");
+    hr->set_sort_index(-1);
+    _markedRegions.at_put(_curr_index, NULL);
+    assert(hr->reclaimable_bytes() <= _remainingReclaimableBytes,
+           err_msg("remaining reclaimable bytes inconsistent "
+                   "from region: "SIZE_FORMAT" remaining: "SIZE_FORMAT,
+                   hr->reclaimable_bytes(), _remainingReclaimableBytes));
+    _remainingReclaimableBytes -= hr->reclaimable_bytes();
+    _curr_index += 1;
+  }
 
   CollectionSetChooser();
 
   void sortMarkedHeapRegions();
   void fillCache();
+
+  // Determine whether to add the given region to the CSet chooser or
+  // not. Currently, we skip humongous regions (we never add them to
+  // the CSet, we only reclaim them during cleanup) and regions whose
+  // live bytes are over the threshold.
+  bool shouldAdd(HeapRegion* hr) {
+    assert(hr->is_marked(), "pre-condition");
+    assert(!hr->is_young(), "should never consider young regions");
+    return !hr->isHumongous() &&
+            hr->live_bytes() < _regionLiveThresholdBytes;
+  }
+
+  // Calculate the minimum number of old regions we'll add to the CSet
+  // during a mixed GC.
+  size_t calcMinOldCSetLength();
+
+  // Calculate the maximum number of old regions we'll add to the CSet
+  // during a mixed GC.
+  size_t calcMaxOldCSetLength();
+
+  // Serial version.
   void addMarkedHeapRegion(HeapRegion *hr);
 
   // Must be called before calls to getParMarkedHeapRegionChunk.
@@ -133,14 +169,21 @@
   // Set the marked array entry at index to hr.  Careful to claim the index
   // first if in parallel.
   void setMarkedHeapRegion(jint index, HeapRegion* hr);
-  // Atomically increment the number of claimed regions by "inc_by".
-  void incNumMarkedHeapRegions(jint inc_by);
+  // Atomically increment the number of added regions by region_num
+  // and the amount of reclaimable bytes by reclaimable_bytes.
+  void updateTotals(jint region_num, size_t reclaimable_bytes);
 
   void clearMarkedHeapRegions();
 
-  void updateAfterFullCollection();
+  // Return the number of candidate regions that remain to be collected.
+  size_t remainingRegions() { return _length - _curr_index; }
 
-  bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; }
+  // Determine whether the CSet chooser has more candidate regions or not.
+  bool isEmpty() { return remainingRegions() == 0; }
+
+  // Return the reclaimable bytes that remain to be collected on
+  // all the candidate regions in the CSet chooser.
+  size_t remainingReclaimableBytes () { return _remainingReclaimableBytes; }
 
   // Returns true if the used portion of "_markedRegions" is properly
   // sorted, otherwise asserts false.
@@ -148,9 +191,17 @@
   bool verify(void);
   bool regionProperlyOrdered(HeapRegion* r) {
     int si = r->sort_index();
-    return (si == -1) ||
-      (si > -1 && _markedRegions.at(si) == r) ||
-      (si < -1 && _cache.region_in_cache(r));
+    if (si > -1) {
+      guarantee(_curr_index <= si && si < _length,
+                err_msg("curr: %d sort index: %d: length: %d",
+                        _curr_index, si, _length));
+      guarantee(_markedRegions.at(si) == r,
+                err_msg("sort index: %d at: "PTR_FORMAT" r: "PTR_FORMAT,
+                        si, _markedRegions.at(si), r));
+    } else {
+      guarantee(si == -1, err_msg("sort index: %d", si));
+    }
+    return true;
   }
 #endif
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Feb 15 13:06:53 2012 -0500
@@ -3447,16 +3447,6 @@
   }
 }
 
-double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr,
-                                                       bool young) {
-  return _g1_policy->predict_region_elapsed_time_ms(hr, young);
-}
-
-void G1CollectedHeap::check_if_region_is_too_expensive(double
-                                                           predicted_time_ms) {
-  _g1_policy->check_if_region_is_too_expensive(predicted_time_ms);
-}
-
 size_t G1CollectedHeap::pending_card_num() {
   size_t extra_cards = 0;
   JavaThread *curr = Threads::first();
@@ -3728,12 +3718,12 @@
         g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE
 
-        g1_policy()->choose_collection_set(target_pause_time_ms);
+        g1_policy()->finalize_cset(target_pause_time_ms);
 
         _cm->note_start_of_gc();
         // We should not verify the per-thread SATB buffers given that
         // we have not filtered them yet (we'll do so during the
-        // GC). We also call this after choose_collection_set() to
+        // GC). We also call this after finalize_cset() to
         // ensure that the CSet has been finalized.
         _cm->verify_no_cset_oops(true  /* verify_stacks */,
                                  true  /* verify_enqueued_buffers */,
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Feb 15 13:06:53 2012 -0500
@@ -1182,6 +1182,12 @@
   bool free_regions_coming() { return _free_regions_coming; }
   void wait_while_free_regions_coming();
 
+  // Determine whether the given region is one that we are using as an
+  // old GC alloc region.
+  bool is_old_gc_alloc_region(HeapRegion* hr) {
+    return hr == _retained_old_gc_alloc_region;
+  }
+
   // Perform a collection of the heap; intended for use in implementing
   // "System.gc".  This probably implies as full a collection as the
   // "CollectedHeap" supports.
@@ -1662,8 +1668,6 @@
 public:
   void stop_conc_gc_threads();
 
-  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
-  void check_if_region_is_too_expensive(double predicted_time_ms);
   size_t pending_card_num();
   size_t max_pending_card_num();
   size_t cards_scanned();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Feb 15 13:06:53 2012 -0500
@@ -206,7 +206,6 @@
 
   _initiate_conc_mark_if_possible(false),
   _during_initial_mark_pause(false),
-  _should_revert_to_young_gcs(false),
   _last_young_gc(false),
   _last_gc_was_young(false),
 
@@ -295,9 +294,6 @@
   _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];
   _par_last_gc_worker_other_times_ms = new double[_parallel_gc_threads];
 
-  // start conservatively
-  _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
-
   int index;
   if (ParallelGCThreads == 0)
     index = 0;
@@ -629,16 +625,9 @@
       // possible to maximize how many old regions we can add to it.
     }
   } else {
-    if (gcs_are_young()) {
-      young_list_target_length = _young_list_fixed_length;
-    } else {
-      // A bit arbitrary: during mixed GCs we allocate half
-      // the young regions to try to add old regions to the CSet.
-      young_list_target_length = _young_list_fixed_length / 2;
-      // We choose to accept that we might go under the desired min
-      // length given that we intentionally ask for a smaller young gen.
-      desired_min_length = absolute_min_length;
-    }
+    // The user asked for a fixed young gen so we'll fix the young gen
+    // whether the next GC is young or mixed.
+    young_list_target_length = _young_list_fixed_length;
   }
 
   // Make sure we don't go over the desired max length, nor under the
@@ -872,7 +861,6 @@
   // transitions and make sure we start with young GCs after the Full GC.
   set_gcs_are_young(true);
   _last_young_gc = false;
-  _should_revert_to_young_gcs = false;
   clear_initiate_conc_mark_if_possible();
   clear_during_initial_mark_pause();
   _known_garbage_bytes = 0;
@@ -889,7 +877,7 @@
   // Reset survivors SurvRateGroup.
   _survivor_surv_rate_group->reset();
   update_young_list_target_length();
-  _collectionSetChooser->updateAfterFullCollection();
+  _collectionSetChooser->clearMarkedHeapRegions();
 }
 
 void G1CollectorPolicy::record_stop_world_start() {
@@ -1000,7 +988,6 @@
 }
 
 void G1CollectorPolicy::record_concurrent_mark_cleanup_completed() {
-  _should_revert_to_young_gcs = false;
   _last_young_gc = true;
   _in_marking_window = false;
 }
@@ -1205,9 +1192,7 @@
   last_pause_included_initial_mark = during_initial_mark_pause();
   if (last_pause_included_initial_mark) {
     record_concurrent_mark_init_end(0.0);
-  }
-
-  if (!_last_young_gc && need_to_start_conc_mark("end of GC")) {
+  } else if (!_last_young_gc && need_to_start_conc_mark("end of GC")) {
     // Note: this might have already been set, if during the last
     // pause we decided to start a cycle but at the beginning of
     // this pause we decided to postpone it. That's OK.
@@ -1492,12 +1477,14 @@
   }
 
   if (_last_young_gc) {
+    // This is supposed to to be the "last young GC" before we start
+    // doing mixed GCs. Here we decide whether to start mixed GCs or not.
+
     if (!last_pause_included_initial_mark) {
-      ergo_verbose2(ErgoMixedGCs,
-                    "start mixed GCs",
-                    ergo_format_byte_perc("known garbage"),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
-      set_gcs_are_young(false);
+      if (next_gc_should_be_mixed("start mixed GCs",
+                                  "do not start mixed GCs")) {
+        set_gcs_are_young(false);
+      }
     } else {
       ergo_verbose0(ErgoMixedGCs,
                     "do not start mixed GCs",
@@ -1507,39 +1494,14 @@
   }
 
   if (!_last_gc_was_young) {
-    if (_should_revert_to_young_gcs) {
-      ergo_verbose2(ErgoMixedGCs,
-                    "end mixed GCs",
-                    ergo_format_reason("mixed GCs end requested")
-                    ergo_format_byte_perc("known garbage"),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
-      set_gcs_are_young(true);
-    } else if (_known_garbage_ratio < 0.05) {
-      ergo_verbose3(ErgoMixedGCs,
-               "end mixed GCs",
-               ergo_format_reason("known garbage percent lower than threshold")
-               ergo_format_byte_perc("known garbage")
-               ergo_format_perc("threshold"),
-               _known_garbage_bytes, _known_garbage_ratio * 100.0,
-               0.05 * 100.0);
-      set_gcs_are_young(true);
-    } else if (adaptive_young_list_length() &&
-              (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) {
-      ergo_verbose5(ErgoMixedGCs,
-                    "end mixed GCs",
-                    ergo_format_reason("current GC efficiency lower than "
-                                       "predicted young GC efficiency")
-                    ergo_format_double("GC efficiency factor")
-                    ergo_format_double("current GC efficiency")
-                    ergo_format_double("predicted young GC efficiency")
-                    ergo_format_byte_perc("known garbage"),
-                    get_gc_eff_factor(), cur_efficiency,
-                    predict_young_gc_eff(),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
+    // This is a mixed GC. Here we decide whether to continue doing
+    // mixed GCs or not.
+
+    if (!next_gc_should_be_mixed("continue mixed GCs",
+                                 "do not continue mixed GCs")) {
       set_gcs_are_young(true);
     }
   }
-  _should_revert_to_young_gcs = false;
 
   if (_last_gc_was_young && !_during_marking) {
     _young_gc_eff_seq->add(cur_efficiency);
@@ -1648,15 +1610,6 @@
 
     _pending_cards_seq->add((double) _pending_cards);
     _rs_lengths_seq->add((double) _max_rs_lengths);
-
-    double expensive_region_limit_ms =
-      (double) MaxGCPauseMillis - predict_constant_other_time_ms();
-    if (expensive_region_limit_ms < 0.0) {
-      // this means that the other time was predicted to be longer than
-      // than the max pause time
-      expensive_region_limit_ms = (double) MaxGCPauseMillis;
-    }
-    _expensive_region_limit_ms = expensive_region_limit_ms;
   }
 
   _in_marking_window = new_in_marking_window;
@@ -1838,13 +1791,11 @@
   if (hr->is_marked())
     bytes_to_copy = hr->max_live_bytes();
   else {
-    guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
-               "invariant" );
+    assert(hr->is_young() && hr->age_in_surv_rate_group() != -1, "invariant");
     int age = hr->age_in_surv_rate_group();
     double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group());
     bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
   }
-
   return bytes_to_copy;
 }
 
@@ -1860,22 +1811,6 @@
   _recorded_rs_lengths = rs_lengths;
 }
 
-void G1CollectorPolicy::check_if_region_is_too_expensive(double
-                                                           predicted_time_ms) {
-  // I don't think we need to do this when in young GC mode since
-  // marking will be initiated next time we hit the soft limit anyway...
-  if (predicted_time_ms > _expensive_region_limit_ms) {
-    ergo_verbose2(ErgoMixedGCs,
-              "request mixed GCs end",
-              ergo_format_reason("predicted region time higher than threshold")
-              ergo_format_ms("predicted region time")
-              ergo_format_ms("threshold"),
-              predicted_time_ms, _expensive_region_limit_ms);
-    // no point in doing another mixed GC
-    _should_revert_to_young_gcs = true;
-  }
-}
-
 void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
                                                double elapsed_ms) {
   _recent_gc_times_ms->add(elapsed_ms);
@@ -2274,12 +2209,12 @@
 }
 
 class KnownGarbageClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CollectionSetChooser* _hrSorted;
 
 public:
   KnownGarbageClosure(CollectionSetChooser* hrSorted) :
-    _hrSorted(hrSorted)
-  {}
+    _g1h(G1CollectedHeap::heap()), _hrSorted(hrSorted) { }
 
   bool doHeapRegion(HeapRegion* r) {
     // We only include humongous regions in collection
@@ -2288,11 +2223,10 @@
 
     // Do we have any marking information for this region?
     if (r->is_marked()) {
-      // We don't include humongous regions in collection
-      // sets because we collect them immediately at the end of a marking
-      // cycle.  We also don't include young regions because we *must*
-      // include them in the next collection pause.
-      if (!r->isHumongous() && !r->is_young()) {
+      // We will skip any region that's currently used as an old GC
+      // alloc region (we should not consider those for collection
+      // before we fill them up).
+      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
         _hrSorted->addMarkedHeapRegion(r);
       }
     }
@@ -2301,8 +2235,10 @@
 };
 
 class ParKnownGarbageHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CollectionSetChooser* _hrSorted;
   jint _marked_regions_added;
+  size_t _reclaimable_bytes_added;
   jint _chunk_size;
   jint _cur_chunk_idx;
   jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end)
@@ -2320,6 +2256,7 @@
     assert(_cur_chunk_idx < _cur_chunk_end, "postcondition");
     _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r);
     _marked_regions_added++;
+    _reclaimable_bytes_added += r->reclaimable_bytes();
     _cur_chunk_idx++;
   }
 
@@ -2327,10 +2264,10 @@
   ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted,
                            jint chunk_size,
                            int worker) :
-    _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
-    _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0),
-    _invokes(0)
-  {}
+      _g1h(G1CollectedHeap::heap()),
+      _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
+      _marked_regions_added(0), _reclaimable_bytes_added(0),
+      _cur_chunk_idx(0), _cur_chunk_end(0), _invokes(0) { }
 
   bool doHeapRegion(HeapRegion* r) {
     // We only include humongous regions in collection
@@ -2340,17 +2277,17 @@
 
     // Do we have any marking information for this region?
     if (r->is_marked()) {
-      // We don't include humongous regions in collection
-      // sets because we collect them immediately at the end of a marking
-      // cycle.
-      // We also do not include young regions in collection sets
-      if (!r->isHumongous() && !r->is_young()) {
+      // We will skip any region that's currently used as an old GC
+      // alloc region (we should not consider those for collection
+      // before we fill them up).
+      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
         add_region(r);
       }
     }
     return false;
   }
   jint marked_regions_added() { return _marked_regions_added; }
+  size_t reclaimable_bytes_added() { return _reclaimable_bytes_added; }
   int invokes() { return _invokes; }
 };
 
@@ -2362,8 +2299,7 @@
   ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) :
     AbstractGangTask("ParKnownGarbageTask"),
     _hrSorted(hrSorted), _chunk_size(chunk_size),
-    _g1(G1CollectedHeap::heap())
-  {}
+    _g1(G1CollectedHeap::heap()) { }
 
   void work(uint worker_id) {
     ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted,
@@ -2374,7 +2310,9 @@
                                          _g1->workers()->active_workers(),
                                          HeapRegion::InitialClaimValue);
     jint regions_added = parKnownGarbageCl.marked_regions_added();
-    _hrSorted->incNumMarkedHeapRegions(regions_added);
+    size_t reclaimable_bytes_added =
+                                   parKnownGarbageCl.reclaimable_bytes_added();
+    _hrSorted->updateTotals(regions_added, reclaimable_bytes_added);
     if (G1PrintParCleanupStats) {
       gclog_or_tty->print_cr("     Thread %d called %d times, added %d regions to list.",
                  worker_id, parKnownGarbageCl.invokes(), regions_added);
@@ -2658,7 +2596,43 @@
 }
 #endif // !PRODUCT
 
-void G1CollectorPolicy::choose_collection_set(double target_pause_time_ms) {
+bool G1CollectorPolicy::next_gc_should_be_mixed(const char* true_action_str,
+                                                const char* false_action_str) {
+  CollectionSetChooser* cset_chooser = _collectionSetChooser;
+  if (cset_chooser->isEmpty()) {
+    ergo_verbose0(ErgoMixedGCs,
+                  false_action_str,
+                  ergo_format_reason("candidate old regions not available"));
+    return false;
+  }
+  size_t reclaimable_bytes = cset_chooser->remainingReclaimableBytes();
+  size_t capacity_bytes = _g1->capacity();
+  double perc = (double) reclaimable_bytes * 100.0 / (double) capacity_bytes;
+  double threshold = (double) G1OldReclaimableThresholdPercent;
+  if (perc < threshold) {
+    ergo_verbose4(ErgoMixedGCs,
+              false_action_str,
+              ergo_format_reason("reclaimable percentage lower than threshold")
+              ergo_format_region("candidate old regions")
+              ergo_format_byte_perc("reclaimable")
+              ergo_format_perc("threshold"),
+              cset_chooser->remainingRegions(),
+              reclaimable_bytes, perc, threshold);
+    return false;
+  }
+
+  ergo_verbose4(ErgoMixedGCs,
+                true_action_str,
+                ergo_format_reason("candidate old regions available")
+                ergo_format_region("candidate old regions")
+                ergo_format_byte_perc("reclaimable")
+                ergo_format_perc("threshold"),
+                cset_chooser->remainingRegions(),
+                reclaimable_bytes, perc, threshold);
+  return true;
+}
+
+void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
   // Set this here - in case we're not doing young collections.
   double non_young_start_time_sec = os::elapsedTime();
 
@@ -2672,7 +2646,6 @@
 
   double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
   double predicted_pause_time_ms = base_time_ms;
-
   double time_remaining_ms = target_pause_time_ms - base_time_ms;
 
   ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
@@ -2682,22 +2655,6 @@
                 ergo_format_ms("target pause time"),
                 base_time_ms, time_remaining_ms, target_pause_time_ms);
 
-  // the 10% and 50% values are arbitrary...
-  double threshold = 0.10 * target_pause_time_ms;
-  if (time_remaining_ms < threshold) {
-    double prev_time_remaining_ms = time_remaining_ms;
-    time_remaining_ms = 0.50 * target_pause_time_ms;
-    ergo_verbose3(ErgoCSetConstruction,
-                  "adjust remaining time",
-                  ergo_format_reason("remaining time lower than threshold")
-                  ergo_format_ms("remaining time")
-                  ergo_format_ms("threshold")
-                  ergo_format_ms("adjusted remaining time"),
-                  prev_time_remaining_ms, threshold, time_remaining_ms);
-  }
-
-  size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes;
-
   HeapRegion* hr;
   double young_start_time_sec = os::elapsedTime();
 
@@ -2752,78 +2709,97 @@
   non_young_start_time_sec = young_end_time_sec;
 
   if (!gcs_are_young()) {
-    bool should_continue = true;
-    NumberSeq seq;
-    double avg_prediction = 100000000000000000.0; // something very large
-
-    double prev_predicted_pause_time_ms = predicted_pause_time_ms;
-    do {
-      // Note that add_old_region_to_cset() increments the
-      // _old_cset_region_length field and cset_region_length() returns the
-      // sum of _eden_cset_region_length, _survivor_cset_region_length, and
-      // _old_cset_region_length. So, as old regions are added to the
-      // CSet, _old_cset_region_length will be incremented and
-      // cset_region_length(), which is used below, will always reflect
-      // the the total number of regions added up to this point to the CSet.
-
-      hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
-                                                      avg_prediction);
-      if (hr != NULL) {
-        _g1->old_set_remove(hr);
-        double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
-        time_remaining_ms -= predicted_time_ms;
-        predicted_pause_time_ms += predicted_time_ms;
-        add_old_region_to_cset(hr);
-        seq.add(predicted_time_ms);
-        avg_prediction = seq.avg() + seq.sd();
+    CollectionSetChooser* cset_chooser = _collectionSetChooser;
+    assert(cset_chooser->verify(), "CSet Chooser verification - pre");
+    const size_t min_old_cset_length = cset_chooser->calcMinOldCSetLength();
+    const size_t max_old_cset_length = cset_chooser->calcMaxOldCSetLength();
+
+    size_t expensive_region_num = 0;
+    bool check_time_remaining = adaptive_young_list_length();
+    HeapRegion* hr = cset_chooser->peek();
+    while (hr != NULL) {
+      if (old_cset_region_length() >= max_old_cset_length) {
+        // Added maximum number of old regions to the CSet.
+        ergo_verbose2(ErgoCSetConstruction,
+                      "finish adding old regions to CSet",
+                      ergo_format_reason("old CSet region num reached max")
+                      ergo_format_region("old")
+                      ergo_format_region("max"),
+                      old_cset_region_length(), max_old_cset_length);
+        break;
       }
 
-      should_continue = true;
-      if (hr == NULL) {
-        // No need for an ergo verbose message here,
-        // getNextMarkRegion() does this when it returns NULL.
-        should_continue = false;
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+      if (check_time_remaining) {
+        if (predicted_time_ms > time_remaining_ms) {
+          // Too expensive for the current CSet.
+
+          if (old_cset_region_length() >= min_old_cset_length) {
+            // We have added the minimum number of old regions to the CSet,
+            // we are done with this CSet.
+            ergo_verbose4(ErgoCSetConstruction,
+                          "finish adding old regions to CSet",
+                          ergo_format_reason("predicted time is too high")
+                          ergo_format_ms("predicted time")
+                          ergo_format_ms("remaining time")
+                          ergo_format_region("old")
+                          ergo_format_region("min"),
+                          predicted_time_ms, time_remaining_ms,
+                          old_cset_region_length(), min_old_cset_length);
+            break;
+          }
+
+          // We'll add it anyway given that we haven't reached the
+          // minimum number of old regions.
+          expensive_region_num += 1;
+        }
       } else {
-        if (adaptive_young_list_length()) {
-          if (time_remaining_ms < 0.0) {
-            ergo_verbose1(ErgoCSetConstruction,
-                          "stop adding old regions to CSet",
-                          ergo_format_reason("remaining time is lower than 0")
-                          ergo_format_ms("remaining time"),
-                          time_remaining_ms);
-            should_continue = false;
-          }
-        } else {
-          if (cset_region_length() >= _young_list_fixed_length) {
-            ergo_verbose2(ErgoCSetConstruction,
-                          "stop adding old regions to CSet",
-                          ergo_format_reason("CSet length reached target")
-                          ergo_format_region("CSet")
-                          ergo_format_region("young target"),
-                          cset_region_length(), _young_list_fixed_length);
-            should_continue = false;
-          }
+        if (old_cset_region_length() >= min_old_cset_length) {
+          // In the non-auto-tuning case, we'll finish adding regions
+          // to the CSet if we reach the minimum.
+          ergo_verbose2(ErgoCSetConstruction,
+                        "finish adding old regions to CSet",
+                        ergo_format_reason("old CSet region num reached min")
+                        ergo_format_region("old")
+                        ergo_format_region("min"),
+                        old_cset_region_length(), min_old_cset_length);
+          break;
         }
       }
-    } while (should_continue);
-
-    if (!adaptive_young_list_length() &&
-        cset_region_length() < _young_list_fixed_length) {
-      ergo_verbose2(ErgoCSetConstruction,
-                    "request mixed GCs end",
-                    ergo_format_reason("CSet length lower than target")
-                    ergo_format_region("CSet")
-                    ergo_format_region("young target"),
-                    cset_region_length(), _young_list_fixed_length);
-      _should_revert_to_young_gcs  = true;
+
+      // We will add this region to the CSet.
+      time_remaining_ms -= predicted_time_ms;
+      predicted_pause_time_ms += predicted_time_ms;
+      cset_chooser->remove_and_move_to_next(hr);
+      _g1->old_set_remove(hr);
+      add_old_region_to_cset(hr);
+
+      hr = cset_chooser->peek();
+    }
+    if (hr == NULL) {
+      ergo_verbose0(ErgoCSetConstruction,
+                    "finish adding old regions to CSet",
+                    ergo_format_reason("candidate old regions not available"));
     }
 
-    ergo_verbose2(ErgoCSetConstruction | ErgoHigh,
-                  "add old regions to CSet",
-                  ergo_format_region("old")
-                  ergo_format_ms("predicted old region time"),
-                  old_cset_region_length(),
-                  predicted_pause_time_ms - prev_predicted_pause_time_ms);
+    if (expensive_region_num > 0) {
+      // We print the information once here at the end, predicated on
+      // whether we added any apparently expensive regions or not, to
+      // avoid generating output per region.
+      ergo_verbose4(ErgoCSetConstruction,
+                    "added expensive regions to CSet",
+                    ergo_format_reason("old CSet region num not reached min")
+                    ergo_format_region("old")
+                    ergo_format_region("expensive")
+                    ergo_format_region("min")
+                    ergo_format_ms("remaining time"),
+                    old_cset_region_length(),
+                    expensive_region_num,
+                    min_old_cset_length,
+                    time_remaining_ms);
+    }
+
+    assert(cset_chooser->verify(), "CSet Chooser verification - post");
   }
 
   stop_incremental_cset_building();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Feb 15 13:06:53 2012 -0500
@@ -312,16 +312,13 @@
   double _recorded_non_young_free_cset_time_ms;
 
   double _sigma;
-  double _expensive_region_limit_ms;
 
   size_t _rs_lengths_prediction;
 
   size_t _known_garbage_bytes;
   double _known_garbage_ratio;
 
-  double sigma() {
-    return _sigma;
-  }
+  double sigma() { return _sigma; }
 
   // A function that prevents us putting too much stock in small sample
   // sets.  Returns a number between 2.0 and 1.0, depending on the number
@@ -491,8 +488,6 @@
            get_new_prediction(_non_young_other_cost_per_region_ms_seq);
   }
 
-  void check_if_region_is_too_expensive(double predicted_time_ms);
-
   double predict_young_collection_elapsed_time_ms(size_t adjustment);
   double predict_base_elapsed_time_ms(size_t pending_cards);
   double predict_base_elapsed_time_ms(size_t pending_cards,
@@ -707,7 +702,6 @@
   // initial-mark work.
   volatile bool _during_initial_mark_pause;
 
-  bool _should_revert_to_young_gcs;
   bool _last_young_gc;
 
   // This set of variables tracks the collector efficiency, in order to
@@ -946,10 +940,17 @@
     return _bytes_copied_during_gc;
   }
 
+  // Determine whether the next GC should be mixed. Called to determine
+  // whether to start mixed GCs or whether to carry on doing mixed
+  // GCs. The two action strings are used in the ergo output when the
+  // method returns true or false.
+  bool next_gc_should_be_mixed(const char* true_action_str,
+                               const char* false_action_str);
+
   // Choose a new collection set.  Marks the chosen regions as being
   // "in_collection_set", and links them together.  The head and number of
   // the collection set are available via access methods.
-  void choose_collection_set(double target_pause_time_ms);
+  void finalize_cset(double target_pause_time_ms);
 
   // The head of the list (via "next_in_collection_set()") representing the
   // current collection set.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Wed Feb 15 13:06:53 2012 -0500
@@ -131,8 +131,8 @@
                              ", " _name_ ": "SIZE_FORMAT" bytes (%1.2f %%)"
 
 // Generates the format string
-#define ergo_format(_action_, _extra_format_)                   \
-  " %1.3f: [G1Ergonomics (%s) " _action_ _extra_format_ "]"
+#define ergo_format(_extra_format_)                           \
+  " %1.3f: [G1Ergonomics (%s) %s" _extra_format_ "]"
 
 // Conditionally, prints an ergonomic decision record. _extra_format_
 // is the format string for the optional items we'd like to print
@@ -145,20 +145,21 @@
 // them to the print method. For convenience, we have wrapper macros
 // below which take a specific number of arguments and set the rest to
 // a default value.
-#define ergo_verbose_common(_tag_, _action_, _extra_format_,            \
+#define ergo_verbose_common(_tag_, _action_, _extra_format_,                \
                             _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_) \
-  do {                                                                  \
-    if (G1ErgoVerbose::enabled((_tag_))) {                              \
-      gclog_or_tty->print_cr(ergo_format(_action_, _extra_format_),     \
-                             os::elapsedTime(),                         \
-                             G1ErgoVerbose::to_string((_tag_)),         \
-                             (_arg0_), (_arg1_), (_arg2_),              \
-                             (_arg3_), (_arg4_), (_arg5_));             \
-    }                                                                   \
+  do {                                                                      \
+    if (G1ErgoVerbose::enabled((_tag_))) {                                  \
+      gclog_or_tty->print_cr(ergo_format(_extra_format_),                   \
+                             os::elapsedTime(),                             \
+                             G1ErgoVerbose::to_string((_tag_)),             \
+                             (_action_),                                    \
+                             (_arg0_), (_arg1_), (_arg2_),                  \
+                             (_arg3_), (_arg4_), (_arg5_));                 \
+    }                                                                       \
   } while (0)
 
 
-#define ergo_verbose(_tag_, _action_)                           \
+#define ergo_verbose(_tag_, _action_)                                   \
   ergo_verbose_common(_tag_, _action_, "", 0, 0, 0, 0, 0, 0)
 
 #define ergo_verbose0(_tag_, _action_, _extra_format_)                  \
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Feb 15 13:06:53 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -297,7 +297,23 @@
                                                                             \
   develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
           "Percentage (0-100) of the heap size to use as maximum "          \
-          "young gen size.")
+          "young gen size.")                                                \
+                                                                            \
+  develop(uintx, G1OldCSetRegionLiveThresholdPercent, 95,                   \
+          "Threshold for regions to be added to the collection set. "       \
+          "Regions with more live bytes that this will not be collected.")  \
+                                                                            \
+  develop(uintx, G1OldReclaimableThresholdPercent, 1,                       \
+          "Threshold for the remaining old reclaimable bytes, expressed "   \
+          "as a percentage of the heap size. If the old reclaimable bytes " \
+          "are under this we will not collect them with more mixed GCs.")   \
+                                                                            \
+  develop(uintx, G1MaxMixedGCNum, 4,                                        \
+          "The maximum desired number of mixed GCs after a marking cycle.") \
+                                                                            \
+  develop(uintx, G1OldCSetRegionThresholdPercent, 10,                       \
+          "An upper bound for the number of old CSet regions expressed "    \
+          "as a percentage of the heap size.")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Feb 15 13:06:53 2012 -0500
@@ -387,13 +387,12 @@
   ct_bs->clear(MemRegion(bottom(), end()));
 }
 
-// <PREDICTION>
 void HeapRegion::calc_gc_efficiency() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  _gc_efficiency = (double) garbage_bytes() /
-                            g1h->predict_region_elapsed_time_ms(this, false);
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  _gc_efficiency = (double) reclaimable_bytes() /
+                            g1p->predict_region_elapsed_time_ms(this, false);
 }
-// </PREDICTION>
 
 void HeapRegion::set_startsHumongous(HeapWord* new_top, HeapWord* new_end) {
   assert(!isHumongous(), "sanity / pre-condition");
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Feb 15 13:06:53 2012 -0500
@@ -415,6 +415,16 @@
     return used_at_mark_start_bytes - marked_bytes();
   }
 
+  // Return the amount of bytes we'll reclaim if we collect this
+  // region. This includes not only the known garbage bytes in the
+  // region but also any unallocated space in it, i.e., [top, end),
+  // since it will also be reclaimed if we collect the region.
+  size_t reclaimable_bytes() {
+    size_t known_live_bytes = live_bytes();
+    assert(known_live_bytes <= capacity(), "sanity");
+    return capacity() - known_live_bytes;
+  }
+
   // An upper bound on the number of live bytes in the region.
   size_t max_live_bytes() { return used() - garbage_bytes(); }
 
@@ -648,10 +658,8 @@
     init_top_at_mark_start();
   }
 
-  // <PREDICTION>
   void calc_gc_efficiency(void);
   double gc_efficiency() { return _gc_efficiency;}
-  // </PREDICTION>
 
   bool is_young() const     { return _young_type != NotYoung; }
   bool is_survivor() const  { return _young_type == Survivor; }