8133051: Concurrent refinement threads may be activated and deactivated at random
authorkbarrett
Mon, 18 Apr 2016 14:52:31 -0400
changeset 37510 cf066fe4531b
parent 37508 3bf78337709d
child 37511 2cd9b35e0eda
8133051: Concurrent refinement threads may be activated and deactivated at random Summary: Establish min threshold step; earlier primary refinement thread activation. Reviewed-by: tschatzl, drwhite, mgerdin, jmasa
hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp
hotspot/src/share/vm/gc/g1/concurrentG1Refine.hpp
hotspot/src/share/vm/gc/g1/concurrentG1RefineThread.cpp
hotspot/src/share/vm/gc/g1/concurrentG1RefineThread.hpp
hotspot/src/share/vm/gc/g1/g1_globals.hpp
hotspot/src/share/vm/logging/logPrefix.hpp
hotspot/src/share/vm/runtime/arguments.cpp
hotspot/src/share/vm/utilities/globalDefinitions.hpp
hotspot/test/gc/arguments/TestG1ConcRefinementThreads.java
--- a/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp	Mon Apr 18 14:52:31 2016 -0400
@@ -29,42 +29,174 @@
 #include "gc/g1/g1HotCardCache.hpp"
 #include "gc/g1/g1Predictions.hpp"
 #include "runtime/java.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/pair.hpp"
+#include <math.h>
 
-ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h, const G1Predictions* predictor) :
+// Arbitrary but large limits, to simplify some of the zone calculations.
+// The general idea is to allow expressions like
+//   MIN2(x OP y, max_XXX_zone)
+// without needing to check for overflow in "x OP y", because the
+// ranges for x and y have been restricted.
+STATIC_ASSERT(sizeof(LP64_ONLY(jint) NOT_LP64(jshort)) <= (sizeof(size_t)/2));
+const size_t max_yellow_zone = LP64_ONLY(max_jint) NOT_LP64(max_jshort);
+const size_t max_green_zone = max_yellow_zone / 2;
+const size_t max_red_zone = INT_MAX; // For dcqs.set_max_completed_queue.
+STATIC_ASSERT(max_yellow_zone <= max_red_zone);
+
+// Range check assertions for green zone values.
+#define assert_zone_constraints_g(green)                        \
+  do {                                                          \
+    size_t azc_g_green = (green);                               \
+    assert(azc_g_green <= max_green_zone,                       \
+           "green exceeds max: " SIZE_FORMAT, azc_g_green);     \
+  } while (0)
+
+// Range check assertions for green and yellow zone values.
+#define assert_zone_constraints_gy(green, yellow)                       \
+  do {                                                                  \
+    size_t azc_gy_green = (green);                                      \
+    size_t azc_gy_yellow = (yellow);                                    \
+    assert_zone_constraints_g(azc_gy_green);                            \
+    assert(azc_gy_yellow <= max_yellow_zone,                            \
+           "yellow exceeds max: " SIZE_FORMAT, azc_gy_yellow);          \
+    assert(azc_gy_green <= azc_gy_yellow,                               \
+           "green (" SIZE_FORMAT ") exceeds yellow (" SIZE_FORMAT ")",  \
+           azc_gy_green, azc_gy_yellow);                                \
+  } while (0)
+
+// Range check assertions for green, yellow, and red zone values.
+#define assert_zone_constraints_gyr(green, yellow, red)                 \
+  do {                                                                  \
+    size_t azc_gyr_green = (green);                                     \
+    size_t azc_gyr_yellow = (yellow);                                   \
+    size_t azc_gyr_red = (red);                                         \
+    assert_zone_constraints_gy(azc_gyr_green, azc_gyr_yellow);          \
+    assert(azc_gyr_red <= max_red_zone,                                 \
+           "red exceeds max: " SIZE_FORMAT, azc_gyr_red);               \
+    assert(azc_gyr_yellow <= azc_gyr_red,                               \
+           "yellow (" SIZE_FORMAT ") exceeds red (" SIZE_FORMAT ")",    \
+           azc_gyr_yellow, azc_gyr_red);                                \
+  } while (0)
+
+// Logging tag sequence for refinement control updates.
+#define CTRL_TAGS gc, ergo, refine
+
+// For logging zone values, ensuring consistency of level and tags.
+#define LOG_ZONES(...) log_debug( CTRL_TAGS )(__VA_ARGS__)
+
+// Package for pair of refinement thread activation and deactivation
+// thresholds.  The activation and deactivation levels are resp. the first
+// and second values of the pair.
+typedef Pair<size_t, size_t> Thresholds;
+inline size_t activation_level(const Thresholds& t) { return t.first; }
+inline size_t deactivation_level(const Thresholds& t) { return t.second; }
+
+static Thresholds calc_thresholds(size_t green_zone,
+                                  size_t yellow_zone,
+                                  uint worker_i) {
+  double yellow_size = yellow_zone - green_zone;
+  double step = yellow_size / ConcurrentG1Refine::thread_num();
+  if (worker_i == 0) {
+    // Potentially activate worker 0 more aggressively, to keep
+    // available buffers near green_zone value.  When yellow_size is
+    // large we don't want to allow a full step to accumulate before
+    // doing any processing, as that might lead to significantly more
+    // than green_zone buffers to be processed by update_rs.
+    step = MIN2(step, ParallelGCThreads / 2.0);
+  }
+  size_t activate_offset = static_cast<size_t>(ceil(step * (worker_i + 1)));
+  size_t deactivate_offset = static_cast<size_t>(floor(step * worker_i));
+  return Thresholds(green_zone + activate_offset,
+                    green_zone + deactivate_offset);
+}
+
+ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h,
+                                       size_t green_zone,
+                                       size_t yellow_zone,
+                                       size_t red_zone,
+                                       size_t min_yellow_zone_size) :
   _threads(NULL),
   _sample_thread(NULL),
-  _predictor_sigma(predictor->sigma()),
+  _n_worker_threads(thread_num()),
+  _green_zone(green_zone),
+  _yellow_zone(yellow_zone),
+  _red_zone(red_zone),
+  _min_yellow_zone_size(min_yellow_zone_size),
   _hot_card_cache(g1h)
 {
-  // Ergonomically select initial concurrent refinement parameters
-  if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
-    FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, ParallelGCThreads);
-  }
-  set_green_zone(G1ConcRefinementGreenZone);
+  assert_zone_constraints_gyr(green_zone, yellow_zone, red_zone);
+}
 
-  if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
-    FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
+static size_t calc_min_yellow_zone_size() {
+  size_t step = G1ConcRefinementThresholdStep;
+  uint n_workers = ConcurrentG1Refine::thread_num();
+  if ((max_yellow_zone / step) < n_workers) {
+    return max_yellow_zone;
+  } else {
+    return step * n_workers;
   }
-  set_yellow_zone(MAX2(G1ConcRefinementYellowZone, green_zone()));
+}
 
-  if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
-    FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
+static size_t calc_init_green_zone() {
+  size_t green = G1ConcRefinementGreenZone;
+  if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
+    green = ParallelGCThreads;
   }
-  set_red_zone(MAX2(G1ConcRefinementRedZone, yellow_zone()));
-
+  return MIN2(green, max_green_zone);
 }
 
-ConcurrentG1Refine* ConcurrentG1Refine::create(G1CollectedHeap* g1h, CardTableEntryClosure* refine_closure, jint* ecode) {
-  G1CollectorPolicy* policy = g1h->g1_policy();
-  ConcurrentG1Refine* cg1r = new ConcurrentG1Refine(g1h, &policy->predictor());
+static size_t calc_init_yellow_zone(size_t green, size_t min_size) {
+  size_t config = G1ConcRefinementYellowZone;
+  size_t size = 0;
+  if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
+    size = green * 2;
+  } else if (green < config) {
+    size = config - green;
+  }
+  size = MAX2(size, min_size);
+  size = MIN2(size, max_yellow_zone);
+  return MIN2(green + size, max_yellow_zone);
+}
+
+static size_t calc_init_red_zone(size_t green, size_t yellow) {
+  size_t size = yellow - green;
+  if (!FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
+    size_t config = G1ConcRefinementRedZone;
+    if (yellow < config) {
+      size = MAX2(size, config - yellow);
+    }
+  }
+  return MIN2(yellow + size, max_red_zone);
+}
+
+ConcurrentG1Refine* ConcurrentG1Refine::create(G1CollectedHeap* g1h,
+                                               CardTableEntryClosure* refine_closure,
+                                               jint* ecode) {
+  size_t min_yellow_zone_size = calc_min_yellow_zone_size();
+  size_t green_zone = calc_init_green_zone();
+  size_t yellow_zone = calc_init_yellow_zone(green_zone, min_yellow_zone_size);
+  size_t red_zone = calc_init_red_zone(green_zone, yellow_zone);
+
+  LOG_ZONES("Initial Refinement Zones: "
+            "green: " SIZE_FORMAT ", "
+            "yellow: " SIZE_FORMAT ", "
+            "red: " SIZE_FORMAT ", "
+            "min yellow size: " SIZE_FORMAT,
+            green_zone, yellow_zone, red_zone, min_yellow_zone_size);
+
+  ConcurrentG1Refine* cg1r = new ConcurrentG1Refine(g1h,
+                                                    green_zone,
+                                                    yellow_zone,
+                                                    red_zone,
+                                                    min_yellow_zone_size);
+
   if (cg1r == NULL) {
     *ecode = JNI_ENOMEM;
     vm_shutdown_during_initialization("Could not create ConcurrentG1Refine");
     return NULL;
   }
-  cg1r->_n_worker_threads = thread_num();
-
-  cg1r->reset_threshold_step();
 
   cg1r->_threads = NEW_C_HEAP_ARRAY_RETURN_NULL(ConcurrentG1RefineThread*, cg1r->_n_worker_threads, mtGC);
   if (cg1r->_threads == NULL) {
@@ -77,7 +209,15 @@
 
   ConcurrentG1RefineThread *next = NULL;
   for (uint i = cg1r->_n_worker_threads - 1; i != UINT_MAX; i--) {
-    ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(cg1r, next, refine_closure, worker_id_offset, i);
+    Thresholds thresholds = calc_thresholds(green_zone, yellow_zone, i);
+    ConcurrentG1RefineThread* t =
+      new ConcurrentG1RefineThread(cg1r,
+                                   next,
+                                   refine_closure,
+                                   worker_id_offset,
+                                   i,
+                                   activation_level(thresholds),
+                                   deactivation_level(thresholds));
     assert(t != NULL, "Conc refine should have been created");
     if (t->osthread() == NULL) {
       *ecode = JNI_ENOMEM;
@@ -101,14 +241,6 @@
   return cg1r;
 }
 
-void ConcurrentG1Refine::reset_threshold_step() {
-  if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
-    _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
-  } else {
-    _thread_threshold_step = G1ConcRefinementThresholdStep;
-  }
-}
-
 void ConcurrentG1Refine::init(G1RegionToSpaceMapper* card_counts_storage) {
   _hot_card_cache.initialize(card_counts_storage);
 }
@@ -120,10 +252,11 @@
   _sample_thread->stop();
 }
 
-void ConcurrentG1Refine::reinitialize_threads() {
-  reset_threshold_step();
+void ConcurrentG1Refine::update_thread_thresholds() {
   for (uint i = 0; i < _n_worker_threads; i++) {
-    _threads[i]->initialize();
+    Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, i);
+    _threads[i]->update_thresholds(activation_level(thresholds),
+                                   deactivation_level(thresholds));
   }
 }
 
@@ -142,7 +275,7 @@
 }
 
 void ConcurrentG1Refine::worker_threads_do(ThreadClosure * tc) {
-  for (uint i = 0; i < worker_thread_num(); i++) {
+  for (uint i = 0; i < _n_worker_threads; i++) {
     tc->do_thread(_threads[i]);
   }
 }
@@ -160,34 +293,80 @@
   st->cr();
 }
 
+static size_t calc_new_green_zone(size_t green,
+                                  double update_rs_time,
+                                  size_t update_rs_processed_buffers,
+                                  double goal_ms) {
+  // Adjust green zone based on whether we're meeting the time goal.
+  // Limit to max_green_zone.
+  const double inc_k = 1.1, dec_k = 0.9;
+  if (update_rs_time > goal_ms) {
+    if (green > 0) {
+      green = static_cast<size_t>(green * dec_k);
+    }
+  } else if (update_rs_time < goal_ms &&
+             update_rs_processed_buffers > green) {
+    green = static_cast<size_t>(MAX2(green * inc_k, green + 1.0));
+    green = MIN2(green, max_green_zone);
+  }
+  return green;
+}
+
+static size_t calc_new_yellow_zone(size_t green, size_t min_yellow_size) {
+  size_t size = green * 2;
+  size = MAX2(size, min_yellow_size);
+  return MIN2(green + size, max_yellow_zone);
+}
+
+static size_t calc_new_red_zone(size_t green, size_t yellow) {
+  return MIN2(yellow + (yellow - green), max_red_zone);
+}
+
+void ConcurrentG1Refine::update_zones(double update_rs_time,
+                                      size_t update_rs_processed_buffers,
+                                      double goal_ms) {
+  log_trace( CTRL_TAGS )("Updating Refinement Zones: "
+                         "update_rs time: %.3fms, "
+                         "update_rs buffers: " SIZE_FORMAT ", "
+                         "update_rs goal time: %.3fms",
+                         update_rs_time,
+                         update_rs_processed_buffers,
+                         goal_ms);
+
+  _green_zone = calc_new_green_zone(_green_zone,
+                                    update_rs_time,
+                                    update_rs_processed_buffers,
+                                    goal_ms);
+  _yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size);
+  _red_zone = calc_new_red_zone(_green_zone, _yellow_zone);
+
+  assert_zone_constraints_gyr(_green_zone, _yellow_zone, _red_zone);
+  LOG_ZONES("Updated Refinement Zones: "
+            "green: " SIZE_FORMAT ", "
+            "yellow: " SIZE_FORMAT ", "
+            "red: " SIZE_FORMAT,
+            _green_zone, _yellow_zone, _red_zone);
+}
+
 void ConcurrentG1Refine::adjust(double update_rs_time,
-                                double update_rs_processed_buffers,
+                                size_t update_rs_processed_buffers,
                                 double goal_ms) {
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
 
   if (G1UseAdaptiveConcRefinement) {
-    const int k_gy = 3, k_gr = 6;
-    const double inc_k = 1.1, dec_k = 0.9;
+    update_zones(update_rs_time, update_rs_processed_buffers, goal_ms);
+    update_thread_thresholds();
 
-    size_t g = green_zone();
-    if (update_rs_time > goal_ms) {
-      g = (size_t)(g * dec_k);  // Can become 0, that's OK. That would mean a mutator-only processing.
+    // Change the barrier params
+    if (_n_worker_threads == 0) {
+      // Disable dcqs notification when there are no threads to notify.
+      dcqs.set_process_completed_threshold(INT_MAX);
     } else {
-      if (update_rs_time < goal_ms && update_rs_processed_buffers > g) {
-        g = (size_t)MAX2(g * inc_k, g + 1.0);
-      }
+      // Worker 0 is the primary; wakeup is via dcqs notification.
+      STATIC_ASSERT(max_yellow_zone <= INT_MAX);
+      size_t activate = _threads[0]->activation_threshold();
+      dcqs.set_process_completed_threshold((int)activate);
     }
-    // Change the refinement threads params
-    set_green_zone(g);
-    set_yellow_zone(g * k_gy);
-    set_red_zone(g * k_gr);
-    reinitialize_threads();
-
-    size_t processing_threshold_delta = MAX2<size_t>(green_zone() * _predictor_sigma, 1);
-    size_t processing_threshold = MIN2(green_zone() + processing_threshold_delta,
-                                    yellow_zone());
-    // Change the barrier params
-    dcqs.set_process_completed_threshold((int)processing_threshold);
     dcqs.set_max_completed_queue((int)red_zone());
   }
 
--- a/hotspot/src/share/vm/gc/g1/concurrentG1Refine.hpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/concurrentG1Refine.hpp	Mon Apr 18 14:52:31 2016 -0400
@@ -65,18 +65,24 @@
   size_t _green_zone;
   size_t _yellow_zone;
   size_t _red_zone;
-
-  size_t _thread_threshold_step;
-
-  double _predictor_sigma;
+  size_t _min_yellow_zone_size;
 
   // We delay the refinement of 'hot' cards using the hot card cache.
   G1HotCardCache _hot_card_cache;
 
-  // Reset the threshold step value based of the current zone boundaries.
-  void reset_threshold_step();
+  ConcurrentG1Refine(G1CollectedHeap* g1h,
+                     size_t green_zone,
+                     size_t yellow_zone,
+                     size_t red_zone,
+                     size_t min_yellow_zone_size);
 
-  ConcurrentG1Refine(G1CollectedHeap* g1h, const G1Predictions* predictions);
+  // Update green/yellow/red zone values based on how well goals are being met.
+  void update_zones(double update_rs_time,
+                    size_t update_rs_processed_buffers,
+                    double goal_ms);
+
+  // Update thread thresholds to account for updated zone values.
+  void update_thread_thresholds();
 
  public:
   ~ConcurrentG1Refine();
@@ -88,9 +94,7 @@
   void init(G1RegionToSpaceMapper* card_counts_storage);
   void stop();
 
-  void adjust(double update_rs_time, double update_rs_processed_buffers, double goal_ms);
-
-  void reinitialize_threads();
+  void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms);
 
   // Iterate over all concurrent refinement threads
   void threads_do(ThreadClosure *tc);
@@ -105,18 +109,10 @@
 
   void print_worker_threads_on(outputStream* st) const;
 
-  void set_green_zone(size_t x)  { _green_zone = x;  }
-  void set_yellow_zone(size_t x) { _yellow_zone = x; }
-  void set_red_zone(size_t x)    { _red_zone = x;    }
-
   size_t green_zone() const      { return _green_zone;  }
   size_t yellow_zone() const     { return _yellow_zone; }
   size_t red_zone() const        { return _red_zone;    }
 
-  uint worker_thread_num() const { return _n_worker_threads; }
-
-  size_t thread_threshold_step() const { return _thread_threshold_step; }
-
   G1HotCardCache* hot_card_cache() { return &_hot_card_cache; }
 
   static bool hot_card_cache_enabled() { return G1HotCardCache::default_use_cache(); }
--- a/hotspot/src/share/vm/gc/g1/concurrentG1RefineThread.cpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/concurrentG1RefineThread.cpp	Mon Apr 18 14:52:31 2016 -0400
@@ -36,7 +36,8 @@
 ConcurrentG1RefineThread::
 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
                          CardTableEntryClosure* refine_closure,
-                         uint worker_id_offset, uint worker_id) :
+                         uint worker_id_offset, uint worker_id,
+                         size_t activate, size_t deactivate) :
   ConcurrentGCThread(),
   _refine_closure(refine_closure),
   _worker_id_offset(worker_id_offset),
@@ -45,7 +46,9 @@
   _next(next),
   _monitor(NULL),
   _cg1r(cg1r),
-  _vtime_accum(0.0)
+  _vtime_accum(0.0),
+  _activation_threshold(activate),
+  _deactivation_threshold(deactivate)
 {
 
   // Each thread has its own monitor. The i-th thread is responsible for signaling
@@ -58,21 +61,17 @@
   } else {
     _monitor = DirtyCardQ_CBL_mon;
   }
-  initialize();
 
   // set name
   set_name("G1 Refine#%d", worker_id);
   create_and_start();
 }
 
-void ConcurrentG1RefineThread::initialize() {
-  // Current thread activation threshold
-  _threshold = MIN2(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(),
-                    cg1r()->yellow_zone());
-  // A thread deactivates once the number of buffer reached a deactivation threshold
-   _deactivation_threshold =
-     MAX2(_threshold - MIN2(_threshold, cg1r()->thread_threshold_step()),
-          cg1r()->green_zone());
+void ConcurrentG1RefineThread::update_thresholds(size_t activate,
+                                                 size_t deactivate) {
+  assert(deactivate < activate, "precondition");
+  _activation_threshold = activate;
+  _deactivation_threshold = deactivate;
 }
 
 void ConcurrentG1RefineThread::wait_for_completed_buffers() {
@@ -118,9 +117,10 @@
       break;
     }
 
+    size_t buffers_processed = 0;
     DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
     log_debug(gc, refine)("Activated %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
-                          _worker_id, _threshold, dcqs.completed_buffers_num());
+                          _worker_id, _activation_threshold, dcqs.completed_buffers_num());
 
     {
       SuspendibleThreadSetJoiner sts_join;
@@ -139,7 +139,9 @@
         }
 
         // Check if we need to activate the next thread.
-        if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) {
+        if ((_next != NULL) &&
+            !_next->is_active() &&
+            (curr_buffer_num > _next->_activation_threshold)) {
           _next->activate();
         }
 
@@ -150,14 +152,16 @@
                                                     false /* during_pause */)) {
           break; // Deactivate, number of buffers fell below threshold.
         }
+        ++buffers_processed;
       }
     }
 
     deactivate();
     log_debug(gc, refine)("Deactivated %d, off threshold: " SIZE_FORMAT
-                          ", current: " SIZE_FORMAT,
+                          ", current: " SIZE_FORMAT ", processed: " SIZE_FORMAT,
                           _worker_id, _deactivation_threshold,
-                          dcqs.completed_buffers_num());
+                          dcqs.completed_buffers_num(),
+                          buffers_processed);
 
     if (os::supports_vtime()) {
       _vtime_accum = (os::elapsedVTime() - _vtime_start);
--- a/hotspot/src/share/vm/gc/g1/concurrentG1RefineThread.hpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/concurrentG1RefineThread.hpp	Mon Apr 18 14:52:31 2016 -0400
@@ -53,10 +53,8 @@
   // The closure applied to completed log buffers.
   CardTableEntryClosure* _refine_closure;
 
-  size_t _thread_threshold_step;
-  // This thread activation threshold
-  size_t _threshold;
-  // This thread deactivation threshold
+  // This thread's activation/deactivation thresholds
+  size_t _activation_threshold;
   size_t _deactivation_threshold;
 
   void wait_for_completed_buffers();
@@ -75,9 +73,11 @@
   // Constructor
   ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next,
                            CardTableEntryClosure* refine_closure,
-                           uint worker_id_offset, uint worker_id);
+                           uint worker_id_offset, uint worker_id,
+                           size_t activate, size_t deactivate);
 
-  void initialize();
+  void update_thresholds(size_t activate, size_t deactivate);
+  size_t activation_threshold() const { return _activation_threshold; }
 
   // Total virtual time so far.
   double vtime_accum() { return _vtime_accum; }
--- a/hotspot/src/share/vm/gc/g1/g1_globals.hpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1_globals.hpp	Mon Apr 18 14:52:31 2016 -0400
@@ -112,8 +112,7 @@
   product(size_t, G1ConcRefinementRedZone, 0,                               \
           "Maximum number of enqueued update buffers before mutator "       \
           "threads start processing new ones instead of enqueueing them. "  \
-          "Will be selected ergonomically by default. Zero will disable "   \
-          "concurrent processing.")                                         \
+          "Will be selected ergonomically by default.")                     \
           range(0, max_intx)                                                \
                                                                             \
   product(size_t, G1ConcRefinementGreenZone, 0,                             \
@@ -127,11 +126,12 @@
           "specified number of milliseconds to do miscellaneous work.")     \
           range(0, max_jint)                                                \
                                                                             \
-  product(size_t, G1ConcRefinementThresholdStep, 0,                         \
+  product(size_t, G1ConcRefinementThresholdStep, 2,                         \
           "Each time the rset update queue increases by this amount "       \
           "activate the next refinement thread if available. "              \
-          "Will be selected ergonomically by default.")                     \
-          range(0, SIZE_MAX)                                                \
+          "The actual step size will be selected ergonomically by "         \
+          "default, with this value used to determine a lower bound.")      \
+          range(1, SIZE_MAX)                                                \
                                                                             \
   product(intx, G1RSetUpdatingPauseTimePercent, 10,                         \
           "A target percentage of time that is allowed to be spend on "     \
@@ -201,9 +201,9 @@
           range(0, 32*M)                                                    \
           constraint(G1HeapRegionSizeConstraintFunc,AfterMemoryInit)        \
                                                                             \
-  product(uintx, G1ConcRefinementThreads, 0,                                \
-          "If non-0 is the number of parallel rem set update threads, "     \
-          "otherwise the value is determined ergonomically.")               \
+  product(uint, G1ConcRefinementThreads, 0,                                 \
+          "The number of parallel rem set update threads. "                 \
+          "Will be set ergonomically by default.")                          \
           range(0, (max_jint-1)/wordSize)                                   \
                                                                             \
   develop(bool, G1VerifyCTCleanup, false,                                   \
--- a/hotspot/src/share/vm/logging/logPrefix.hpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/logging/logPrefix.hpp	Mon Apr 18 14:52:31 2016 -0400
@@ -55,6 +55,7 @@
   LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, ergo, cset)) \
   LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, ergo, heap)) \
   LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, ergo, ihop)) \
+  LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, ergo, refine)) \
   LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, heap)) \
   LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, heap, region)) \
   LOG_PREFIX(GCId::print_prefix, LOG_TAGS(gc, freelist)) \
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Mon Apr 18 14:52:31 2016 -0400
@@ -2095,8 +2095,8 @@
   }
 
 #if INCLUDE_ALL_GCS
-  if (G1ConcRefinementThreads == 0) {
-    FLAG_SET_DEFAULT(G1ConcRefinementThreads, ParallelGCThreads);
+  if (FLAG_IS_DEFAULT(G1ConcRefinementThreads)) {
+    FLAG_SET_ERGO(uint, G1ConcRefinementThreads, ParallelGCThreads);
   }
 #endif
 
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Mon Apr 18 14:52:31 2016 -0400
@@ -199,9 +199,6 @@
 const size_t G                  = M*K;
 const size_t HWperKB            = K / sizeof(HeapWord);
 
-const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint
-const jint max_jint = (juint)min_jint - 1;                     // 0x7FFFFFFF == largest jint
-
 // Constants for converting from a base unit to milli-base units.  For
 // example from seconds to milliseconds and microseconds
 
@@ -381,6 +378,14 @@
 typedef jint   s4;
 typedef jlong  s8;
 
+const jbyte min_jbyte = -(1 << 7);       // smallest jbyte
+const jbyte max_jbyte = (1 << 7) - 1;    // largest jbyte
+const jshort min_jshort = -(1 << 15);    // smallest jshort
+const jshort max_jshort = (1 << 15) - 1; // largest jshort
+
+const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint
+const jint max_jint = (juint)min_jint - 1;                     // 0x7FFFFFFF == largest jint
+
 //----------------------------------------------------------------------------------------------------
 // JVM spec restrictions
 
--- a/hotspot/test/gc/arguments/TestG1ConcRefinementThreads.java	Fri Apr 15 17:17:58 2016 +0200
+++ b/hotspot/test/gc/arguments/TestG1ConcRefinementThreads.java	Mon Apr 18 14:52:31 2016 -0400
@@ -38,7 +38,7 @@
 
 public class TestG1ConcRefinementThreads {
 
-  static final int AUTO_SELECT_THREADS_COUNT = 0;
+  static final int AUTO_SELECT_THREADS_COUNT = -1;
   static final int PASSED_THREADS_COUNT = 11;
 
   public static void main(String args[]) throws Exception {
@@ -49,8 +49,8 @@
 
     // zero setting case
     runG1ConcRefinementThreadsTest(
-        new String[]{"-XX:G1ConcRefinementThreads=0"}, // automatically selected
-        AUTO_SELECT_THREADS_COUNT /* set to zero */);
+        new String[]{"-XX:G1ConcRefinementThreads=0"},
+        0);
 
     // non-zero sestting case
     runG1ConcRefinementThreadsTest(
@@ -77,7 +77,7 @@
   private static void checkG1ConcRefinementThreadsConsistency(String output, int expectedValue) {
     int actualValue = getIntValue("G1ConcRefinementThreads", output);
 
-    if (expectedValue == 0) {
+    if (expectedValue == AUTO_SELECT_THREADS_COUNT) {
       // If expectedValue is automatically selected, set it same as ParallelGCThreads.
       expectedValue = getIntValue("ParallelGCThreads", output);
     }