Merge
authoramurillo
Thu, 28 May 2015 11:37:13 -0700
changeset 30886 d2a0ec86d6ef
parent 30779 92bb39a2a876 (current diff)
parent 30885 c8ac8bbecc64 (diff)
child 30887 7bd2db9e41b8
child 31018 794e08f6dad4
child 31019 d05fcdd70109
child 31021 1dbe2a0324d6
child 31028 8c5b4d489c41
child 31045 31769e6e8293
Merge
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Thu May 28 11:37:13 2015 -0700
@@ -3768,7 +3768,7 @@
   return NULL;
 }
 
-#define EXIT_TIMEOUT     PRODUCT_ONLY(1000) NOT_PRODUCT(4000) /* 1 sec in product, 4 sec in debug */
+#define EXIT_TIMEOUT 300000 /* 5 minutes */
 
 static BOOL CALLBACK init_crit_sect_call(PINIT_ONCE, PVOID pcrit_sect, PVOID*) {
   InitializeCriticalSection((CRITICAL_SECTION*)pcrit_sect);
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Thu May 28 11:37:13 2015 -0700
@@ -1469,7 +1469,9 @@
   } else {
     guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
               "Assumption");
-    flag_type = T_BYTE;
+    // Use unsigned type T_BOOLEAN here rather than signed T_BYTE since some platforms, eg. ARM,
+    // need to use unsigned instructions to use the large offset to load the satb_mark_queue.
+    flag_type = T_BOOLEAN;
   }
   LIR_Opr thrd = getThreadPointer();
   LIR_Address* mark_active_flag_addr =
--- a/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp	Thu May 28 11:37:13 2015 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_CMS_CMSOOPCLOSURES_HPP
 
 #include "gc/shared/genOopClosures.hpp"
+#include "gc/shared/taskqueue.hpp"
 #include "memory/iterator.hpp"
 
 /////////////////////////////////////////////////////////////////
--- a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp	Thu May 28 11:37:13 2015 -0700
@@ -641,6 +641,7 @@
 class FreeListSpace_DCTOC : public Filtering_DCTOC {
   CompactibleFreeListSpace* _cfls;
   CMSCollector* _collector;
+  bool _parallel;
 protected:
   // Override.
 #define walk_mem_region_with_cl_DECL(ClosureType)                       \
@@ -661,9 +662,10 @@
                       CMSCollector* collector,
                       ExtendedOopClosure* cl,
                       CardTableModRefBS::PrecisionStyle precision,
-                      HeapWord* boundary) :
+                      HeapWord* boundary,
+                      bool parallel) :
     Filtering_DCTOC(sp, cl, precision, boundary),
-    _cfls(sp), _collector(collector) {}
+    _cfls(sp), _collector(collector), _parallel(parallel) {}
 };
 
 // We de-virtualize the block-related calls below, since we know that our
@@ -674,10 +676,7 @@
                                                  HeapWord* bottom,              \
                                                  HeapWord* top,                 \
                                                  ClosureType* cl) {             \
-   bool is_par = GenCollectedHeap::heap()->n_par_threads() > 0;                 \
-   if (is_par) {                                                                \
-     assert(GenCollectedHeap::heap()->n_par_threads() ==                        \
-            GenCollectedHeap::heap()->workers()->active_workers(), "Mismatch"); \
+   if (_parallel) {                                                             \
      walk_mem_region_with_cl_par(mr, bottom, top, cl);                          \
    } else {                                                                     \
      walk_mem_region_with_cl_nopar(mr, bottom, top, cl);                        \
@@ -747,8 +746,9 @@
 DirtyCardToOopClosure*
 CompactibleFreeListSpace::new_dcto_cl(ExtendedOopClosure* cl,
                                       CardTableModRefBS::PrecisionStyle precision,
-                                      HeapWord* boundary) {
-  return new FreeListSpace_DCTOC(this, _collector, cl, precision, boundary);
+                                      HeapWord* boundary,
+                                      bool parallel) {
+  return new FreeListSpace_DCTOC(this, _collector, cl, precision, boundary, parallel);
 }
 
 
@@ -1897,11 +1897,9 @@
   assert(chunk->is_free() && ffc->is_free(), "Error");
   _bt.split_block((HeapWord*)chunk, chunk->size(), new_size);
   if (rem_sz < SmallForDictionary) {
-    bool is_par = (GenCollectedHeap::heap()->n_par_threads() > 0);
+    // The freeList lock is held, but multiple GC task threads might be executing in parallel.
+    bool is_par = Thread::current()->is_GC_task_thread();
     if (is_par) _indexedFreeListParLocks[rem_sz]->lock();
-    assert(!is_par ||
-           (GenCollectedHeap::heap()->n_par_threads() ==
-            GenCollectedHeap::heap()->workers()->active_workers()), "Mismatch");
     returnChunkToFreeList(ffc);
     split(size, rem_sz);
     if (is_par) _indexedFreeListParLocks[rem_sz]->unlock();
@@ -1972,8 +1970,6 @@
 
 bool CompactibleFreeListSpace::no_allocs_since_save_marks() {
   assert(_promoInfo.tracking(), "No preceding save_marks?");
-  assert(GenCollectedHeap::heap()->n_par_threads() == 0,
-         "Shouldn't be called if using parallel gc.");
   return _promoInfo.noPromotions();
 }
 
@@ -1981,8 +1977,6 @@
                                                                             \
 void CompactibleFreeListSpace::                                             \
 oop_since_save_marks_iterate##nv_suffix(OopClosureType* blk) {              \
-  assert(GenCollectedHeap::heap()->n_par_threads() == 0,                    \
-         "Shouldn't be called (yet) during parallel part of gc.");          \
   _promoInfo.promoted_oops_iterate##nv_suffix(blk);                         \
   /*                                                                        \
    * This also restores any displaced headers and removes the elements from \
--- a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp	Thu May 28 11:37:13 2015 -0700
@@ -438,7 +438,8 @@
   // Override: provides a DCTO_CL specific to this kind of space.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
                                      CardTableModRefBS::PrecisionStyle precision,
-                                     HeapWord* boundary);
+                                     HeapWord* boundary,
+                                     bool parallel);
 
   void blk_iterate(BlkClosure* cl);
   void blk_iterate_careful(BlkClosureCareful* cl);
--- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp	Thu May 28 11:37:13 2015 -0700
@@ -2428,14 +2428,18 @@
   MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
-  gch->gen_process_roots(_cmsGen->level(),
-                         true,   // younger gens are roots
-                         true,   // activate StrongRootsScope
-                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
-                         should_unload_classes(),
-                         &notOlder,
-                         NULL,
-                         NULL);  // SSS: Provide correct closure
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           _cmsGen->level(),
+                           true,   // younger gens are roots
+                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
+                           should_unload_classes(),
+                           &notOlder,
+                           NULL,
+                           NULL);
+  }
 
   // Now mark from the roots
   MarkFromRootsClosure markFromRootsClosure(this, _span,
@@ -2496,14 +2500,18 @@
 
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
-  gch->gen_process_roots(_cmsGen->level(),
-                         true,   // younger gens are roots
-                         true,   // activate StrongRootsScope
-                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
-                         should_unload_classes(),
-                         &notOlder,
-                         NULL,
-                         &cld_closure);
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           _cmsGen->level(),
+                           true,   // younger gens are roots
+                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
+                           should_unload_classes(),
+                           &notOlder,
+                           NULL,
+                           &cld_closure);
+  }
 
   // Now mark from the roots
   MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
@@ -2913,10 +2921,11 @@
 
 // Parallel initial mark task
 class CMSParInitialMarkTask: public CMSParMarkTask {
+  StrongRootsScope* _strong_roots_scope;
  public:
-  CMSParInitialMarkTask(CMSCollector* collector, uint n_workers) :
-      CMSParMarkTask("Scan roots and young gen for initial mark in parallel",
-                     collector, n_workers) {}
+  CMSParInitialMarkTask(CMSCollector* collector, StrongRootsScope* strong_roots_scope, uint n_workers) :
+      CMSParMarkTask("Scan roots and young gen for initial mark in parallel", collector, n_workers),
+      _strong_roots_scope(strong_roots_scope) {}
   void work(uint worker_id);
 };
 
@@ -3004,24 +3013,26 @@
       FlexibleWorkGang* workers = gch->workers();
       assert(workers != NULL, "Need parallel worker threads.");
       uint n_workers = workers->active_workers();
-      CMSParInitialMarkTask tsk(this, n_workers);
-      gch->set_par_threads(n_workers);
+
+      StrongRootsScope srs(n_workers);
+
+      CMSParInitialMarkTask tsk(this, &srs, n_workers);
       initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
       if (n_workers > 1) {
-        StrongRootsScope srs;
         workers->run_task(&tsk);
       } else {
-        StrongRootsScope srs;
         tsk.work(0);
       }
-      gch->set_par_threads(0);
     } else {
       // The serial version.
       CLDToOopClosure cld_closure(&notOlder, true);
       gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-      gch->gen_process_roots(_cmsGen->level(),
+
+      StrongRootsScope srs(1);
+
+      gch->gen_process_roots(&srs,
+                             _cmsGen->level(),
                              true,   // younger gens are roots
-                             true,   // activate StrongRootsScope
                              GenCollectedHeap::ScanningOption(roots_scanning_options()),
                              should_unload_classes(),
                              &notOlder,
@@ -4452,9 +4463,9 @@
 
   CLDToOopClosure cld_closure(&par_mri_cl, true);
 
-  gch->gen_process_roots(_collector->_cmsGen->level(),
+  gch->gen_process_roots(_strong_roots_scope,
+                         _collector->_cmsGen->level(),
                          false,     // yg was scanned above
-                         false,     // this is parallel code
                          GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mri_cl,
@@ -4478,6 +4489,7 @@
   // The per-thread work queues, available here for stealing.
   OopTaskQueueSet*       _task_queues;
   ParallelTaskTerminator _term;
+  StrongRootsScope*      _strong_roots_scope;
 
  public:
   // A value of 0 passed to n_workers will cause the number of
@@ -4485,12 +4497,14 @@
   CMSParRemarkTask(CMSCollector* collector,
                    CompactibleFreeListSpace* cms_space,
                    uint n_workers, FlexibleWorkGang* workers,
-                   OopTaskQueueSet* task_queues):
+                   OopTaskQueueSet* task_queues,
+                   StrongRootsScope* strong_roots_scope):
     CMSParMarkTask("Rescan roots and grey objects in parallel",
                    collector, n_workers),
     _cms_space(cms_space),
     _task_queues(task_queues),
-    _term(n_workers, task_queues) { }
+    _term(n_workers, task_queues),
+    _strong_roots_scope(strong_roots_scope) { }
 
   OopTaskQueueSet* task_queues() { return _task_queues; }
 
@@ -4588,9 +4602,9 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_roots(_collector->_cmsGen->level(),
+  gch->gen_process_roots(_strong_roots_scope,
+                         _collector->_cmsGen->level(),
                          false,     // yg was scanned above
-                         false,     // this is parallel code
                          GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mrias_cl,
@@ -5058,22 +5072,15 @@
   FlexibleWorkGang* workers = gch->workers();
   assert(workers != NULL, "Need parallel worker threads.");
   // Choose to use the number of GC workers most recently set
-  // into "active_workers".  If active_workers is not set, set it
-  // to ParallelGCThreads.
+  // into "active_workers".
   uint n_workers = workers->active_workers();
-  if (n_workers == 0) {
-    assert(n_workers > 0, "Should have been set during scavenge");
-    n_workers = ParallelGCThreads;
-    workers->set_active_workers(n_workers);
-  }
+
   CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
 
-  CMSParRemarkTask tsk(this,
-    cms_space,
-    n_workers, workers, task_queues());
-
-  // Set up for parallel process_roots work.
-  gch->set_par_threads(n_workers);
+  StrongRootsScope srs(n_workers);
+
+  CMSParRemarkTask tsk(this, cms_space, n_workers, workers, task_queues(), &srs);
+
   // We won't be iterating over the cards in the card table updating
   // the younger_gen cards, so we shouldn't call the following else
   // the verification code as well as subsequent younger_refs_iterate
@@ -5105,15 +5112,12 @@
     // necessarily be so, since it's possible that we are doing
     // ST marking.
     ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
-    StrongRootsScope srs;
     workers->run_task(&tsk);
   } else {
     ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
-    StrongRootsScope srs;
     tsk.work(0);
   }
 
-  gch->set_par_threads(0);  // 0 ==> non-parallel.
   // restore, single-threaded for now, any preserved marks
   // as a result of work_q overflow
   restore_preserved_marks_if_any();
@@ -5177,11 +5181,11 @@
     verify_work_stacks_empty();
 
     gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-    StrongRootsScope srs;
-
-    gch->gen_process_roots(_cmsGen->level(),
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           _cmsGen->level(),
                            true,  // younger gens as roots
-                           false, // use the local StrongRootsScope
                            GenCollectedHeap::ScanningOption(roots_scanning_options()),
                            should_unload_classes(),
                            &mrias_cl,
@@ -5254,18 +5258,14 @@
                       CMSBitMap*       mark_bit_map,
                       AbstractWorkGang* workers,
                       OopTaskQueueSet* task_queues):
-    // XXX Should superclass AGTWOQ also know about AWG since it knows
-    // about the task_queues used by the AWG? Then it could initialize
-    // the terminator() object. See 6984287. The set_for_termination()
-    // below is a temporary band-aid for the regression in 6984287.
     AbstractGangTaskWOopQueues("Process referents by policy in parallel",
-      task_queues),
+      task_queues,
+      workers->active_workers()),
     _task(task),
     _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
   {
     assert(_collector->_span.equals(_span) && !_span.is_empty(),
            "Inconsistency in _span");
-    set_for_termination(workers->active_workers());
   }
 
   OopTaskQueueSet* task_queues() { return queues(); }
--- a/hotspot/src/share/vm/gc/cms/parCardTableModRefBS.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/parCardTableModRefBS.cpp	Thu May 28 11:37:13 2015 -0700
@@ -39,16 +39,11 @@
 void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
                                                              OopsInGenClosure* cl,
                                                              CardTableRS* ct,
-                                                             int n_threads) {
-  assert(n_threads > 0, "Error: expected n_threads > 0");
-  assert((n_threads == 1 && ParallelGCThreads == 0) ||
-         n_threads <= (int)ParallelGCThreads,
-         "# worker threads != # requested!");
-  assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread");
-  assert(UseDynamicNumberOfGCThreads ||
-         !FLAG_IS_DEFAULT(ParallelGCThreads) ||
-         n_threads == (int)ParallelGCThreads,
-         "# worker threads != # requested!");
+                                                             uint n_threads) {
+  assert(n_threads > 0, "expected n_threads > 0");
+  assert(n_threads <= ParallelGCThreads,
+         err_msg("n_threads: %u > ParallelGCThreads: " UINTX_FORMAT, n_threads, ParallelGCThreads));
+
   // Make sure the LNC array is valid for the space.
   jbyte**   lowest_non_clean;
   uintptr_t lowest_non_clean_base_chunk_index;
@@ -66,7 +61,8 @@
 
   uint stride = 0;
   while (!pst->is_task_claimed(/* reference */ stride)) {
-    process_stride(sp, mr, stride, n_strides, cl, ct,
+    process_stride(sp, mr, stride, n_strides,
+                   cl, ct,
                    lowest_non_clean,
                    lowest_non_clean_base_chunk_index,
                    lowest_non_clean_chunk_size);
@@ -132,9 +128,13 @@
     assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)");
     assert(used.contains(chunk_mr), "chunk_mr should be subset of used");
 
+    // This function is used by the parallel card table iteration.
+    const bool parallel = true;
+
     DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(),
-                                                     cl->gen_boundary());
-    ClearNoncleanCardWrapper clear_cl(dcto_cl, ct);
+                                                     cl->gen_boundary(),
+                                                     parallel);
+    ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel);
 
 
     // Process the chunk.
--- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp	Thu May 28 11:37:13 2015 -0700
@@ -567,23 +567,15 @@
 }
 
 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* old_gen,
-                             HeapWord* young_old_boundary, ParScanThreadStateSet* state_set) :
+                             HeapWord* young_old_boundary, ParScanThreadStateSet* state_set,
+                             StrongRootsScope* strong_roots_scope) :
     AbstractGangTask("ParNewGeneration collection"),
     _gen(gen), _old_gen(old_gen),
     _young_old_boundary(young_old_boundary),
-    _state_set(state_set)
+    _state_set(state_set),
+    _strong_roots_scope(strong_roots_scope)
   {}
 
-// Reset the terminator for the given number of
-// active threads.
-void ParNewGenTask::set_for_termination(uint active_workers) {
-  _state_set->reset(active_workers, _gen->promotion_failed());
-  // Should the heap be passed in?  There's only 1 for now so
-  // grab it instead.
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->set_n_termination(active_workers);
-}
-
 void ParNewGenTask::work(uint worker_id) {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   // Since this is being done in a separate thread, need new resource
@@ -603,10 +595,10 @@
                                            false);
 
   par_scan_state.start_strong_roots();
-  gch->gen_process_roots(_gen->level(),
+  gch->gen_process_roots(_strong_roots_scope,
+                         _gen->level(),
                          true,  // Process younger gens, if any,
                                 // as strong roots.
-                         false, // no scope; this is parallel code
                          GenCollectedHeap::SO_ScavengeCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &par_scan_state.to_space_root_closure(),
@@ -759,9 +751,6 @@
 
 private:
   virtual void work(uint worker_id);
-  virtual void set_for_termination(uint active_workers) {
-    _state_set.terminator()->reset_for_reuse(active_workers);
-  }
 private:
   ParNewGeneration&      _gen;
   ProcessTask&           _task;
@@ -838,7 +827,6 @@
 {
   _state_set.flush();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->set_par_threads(0);  // 0 ==> non-parallel.
   gch->save_marks();
 }
 
@@ -939,33 +927,35 @@
   to()->clear(SpaceDecorator::Mangle);
 
   gch->save_marks();
-  assert(workers != NULL, "Need parallel worker threads.");
-  uint n_workers = active_workers;
 
   // Set the correct parallelism (number of queues) in the reference processor
-  ref_processor()->set_active_mt_degree(n_workers);
+  ref_processor()->set_active_mt_degree(active_workers);
 
   // Always set the terminator for the active number of workers
   // because only those workers go through the termination protocol.
-  ParallelTaskTerminator _term(n_workers, task_queues());
-  ParScanThreadStateSet thread_state_set(workers->active_workers(),
+  ParallelTaskTerminator _term(active_workers, task_queues());
+  ParScanThreadStateSet thread_state_set(active_workers,
                                          *to(), *this, *_old_gen, *task_queues(),
                                          _overflow_stacks, desired_plab_sz(), _term);
 
-  ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set);
-  gch->set_par_threads(n_workers);
-  gch->rem_set()->prepare_for_younger_refs_iterate(true);
-  // It turns out that even when we're using 1 thread, doing the work in a
-  // separate thread causes wide variance in run times.  We can't help this
-  // in the multi-threaded case, but we special-case n=1 here to get
-  // repeatable measurements of the 1-thread overhead of the parallel code.
-  if (n_workers > 1) {
-    StrongRootsScope srs;
-    workers->run_task(&tsk);
-  } else {
-    StrongRootsScope srs;
-    tsk.work(0);
+  thread_state_set.reset(active_workers, promotion_failed());
+
+  {
+    StrongRootsScope srs(active_workers);
+
+    ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set, &srs);
+    gch->rem_set()->prepare_for_younger_refs_iterate(true);
+    // It turns out that even when we're using 1 thread, doing the work in a
+    // separate thread causes wide variance in run times.  We can't help this
+    // in the multi-threaded case, but we special-case n=1 here to get
+    // repeatable measurements of the 1-thread overhead of the parallel code.
+    if (active_workers > 1) {
+      workers->run_task(&tsk);
+    } else {
+      tsk.work(0);
+    }
   }
+
   thread_state_set.reset(0 /* Bad value in debug if not reset */,
                          promotion_failed());
 
@@ -995,7 +985,6 @@
                                               _gc_timer, _gc_tracer.gc_id());
   } else {
     thread_state_set.flush();
-    gch->set_par_threads(0);  // 0 ==> non-parallel.
     gch->save_marks();
     stats = rp->process_discovered_references(&is_alive, &keep_alive,
                                               &evacuate_followers, NULL,
@@ -1033,7 +1022,7 @@
   to()->set_concurrent_iteration_safe_limit(to()->top());
 
   if (ResizePLAB) {
-    plab_stats()->adjust_desired_plab_sz(n_workers);
+    plab_stats()->adjust_desired_plab_sz(active_workers);
   }
 
   if (PrintGC && !PrintGCDetails) {
@@ -1477,9 +1466,9 @@
     _ref_processor =
       new ReferenceProcessor(_reserved,                  // span
                              ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
-                             (int) ParallelGCThreads,    // mt processing degree
+                             (uint) ParallelGCThreads,   // mt processing degree
                              refs_discovery_is_mt(),     // mt discovery
-                             (int) ParallelGCThreads,    // mt discovery degree
+                             (uint) ParallelGCThreads,   // mt discovery degree
                              refs_discovery_is_atomic(), // atomic_discovery
                              NULL);                      // is_alive_non_header
   }
--- a/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp	Thu May 28 11:37:13 2015 -0700
@@ -39,6 +39,7 @@
 class ParRootScanWithoutBarrierClosure;
 class ParRootScanWithBarrierTwoGensClosure;
 class ParEvacuateFollowersClosure;
+class StrongRootsScope;
 
 // It would be better if these types could be kept local to the .cpp file,
 // but they must be here to allow ParScanClosure::do_oop_work to be defined
@@ -237,20 +238,18 @@
   Generation*                  _old_gen;
   HeapWord*                    _young_old_boundary;
   class ParScanThreadStateSet* _state_set;
+  StrongRootsScope*            _strong_roots_scope;
 
 public:
   ParNewGenTask(ParNewGeneration*      gen,
                 Generation*            old_gen,
                 HeapWord*              young_old_boundary,
-                ParScanThreadStateSet* state_set);
+                ParScanThreadStateSet* state_set,
+                StrongRootsScope*      strong_roots_scope);
 
   HeapWord* young_old_boundary() { return _young_old_boundary; }
 
   void work(uint worker_id);
-
-  // Reset the terminator in ParScanThreadStateSet for
-  // "active_workers" threads.
-  virtual void set_for_termination(uint active_workers);
 };
 
 class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
--- a/hotspot/src/share/vm/gc/cms/parOopClosures.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/parOopClosures.hpp	Thu May 28 11:37:13 2015 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_CMS_PAROOPCLOSURES_HPP
 
 #include "gc/shared/genOopClosures.hpp"
+#include "gc/shared/taskqueue.hpp"
 #include "memory/padded.hpp"
 
 // Closures for ParNewGeneration
--- a/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp	Thu May 28 11:37:13 2015 -0700
@@ -147,6 +147,13 @@
   bool completed() const { return _status == COMPLETED; }
   bool aborted()   const { return _status == ABORTED; }
   bool active()    const { return _status == ACTIVE; }
+
+  // This method configures the task for proper termination.
+  // Some tasks do not have any requirements on termination
+  // and may inherit this method that does nothing.  Some
+  // tasks do some coordination on termination and override
+  // this method to implement that coordination.
+  virtual void set_for_termination(uint active_workers) {}
 };
 // Class YieldingWorkGang: A subclass of WorkGang.
 // In particular, a YieldingWorkGang is made up of
--- a/hotspot/src/share/vm/gc/g1/collectionSetChooser.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/collectionSetChooser.cpp	Thu May 28 11:37:13 2015 -0700
@@ -158,20 +158,10 @@
   hr->calc_gc_efficiency();
 }
 
-void CollectionSetChooser::prepare_for_par_region_addition(uint n_regions,
+void CollectionSetChooser::prepare_for_par_region_addition(uint n_threads,
+                                                           uint n_regions,
                                                            uint chunk_size) {
   _first_par_unreserved_idx = 0;
-  uint n_threads = (uint) ParallelGCThreads;
-  if (UseDynamicNumberOfGCThreads) {
-    assert(G1CollectedHeap::heap()->workers()->active_workers() > 0,
-      "Should have been set earlier");
-    // This is defensive code. As the assertion above says, the number
-    // of active threads should be > 0, but in case there is some path
-    // or some improperly initialized variable with leads to no
-    // active threads, protect against that in a product build.
-    n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
-                     1U);
-  }
   uint max_waste = n_threads * chunk_size;
   // it should be aligned with respect to chunk_size
   uint aligned_n_regions = (n_regions + chunk_size - 1) / chunk_size * chunk_size;
--- a/hotspot/src/share/vm/gc/g1/collectionSetChooser.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/collectionSetChooser.hpp	Thu May 28 11:37:13 2015 -0700
@@ -121,7 +121,7 @@
 
   // Must be called before calls to claim_array_chunk().
   // n_regions is the number of regions, chunk_size the chunk size.
-  void prepare_for_par_region_addition(uint n_regions, uint chunk_size);
+  void prepare_for_par_region_addition(uint n_threads, uint n_regions, uint chunk_size);
   // Returns the first index in a contiguous chunk of chunk_size indexes
   // that the calling thread has reserved.  These must be set by the
   // calling thread using set_region() (to NULL if necessary).
--- a/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp	Thu May 28 11:37:13 2015 -0700
@@ -35,7 +35,7 @@
 {
   // Ergonomically select initial concurrent refinement parameters
   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
-    FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
+    FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, (intx)ParallelGCThreads);
   }
   set_green_zone(G1ConcRefinementGreenZone);
 
--- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp	Thu May 28 11:37:13 2015 -0700
@@ -518,7 +518,7 @@
   _markStack(this),
   // _finger set in set_non_marking_state
 
-  _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
+  _max_worker_id((uint)ParallelGCThreads),
   // _active_tasks set in set_non_marking_state
   // _tasks set inside the constructor
   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
@@ -1218,15 +1218,13 @@
     "Maximum number of marking threads exceeded");
 
   uint active_workers = MAX2(1U, parallel_marking_threads());
+  assert(active_workers > 0, "Should have been set");
 
   // Parallel task terminator is set in "set_concurrency_and_phase()"
   set_concurrency_and_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
   _parallel_workers->set_active_workers(active_workers);
-  // Don't set _n_par_threads because it affects MT in process_roots()
-  // and the decisions on that MT processing is made elsewhere.
-  assert(_parallel_workers->active_workers() > 0, "Should have been set");
   _parallel_workers->run_task(&markingTask);
   print_stats();
 }
@@ -1761,28 +1759,20 @@
   }
 };
 
-class G1ParNoteEndTask;
-
 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
   G1CollectedHeap* _g1;
-  size_t _max_live_bytes;
-  uint _regions_claimed;
   size_t _freed_bytes;
   FreeRegionList* _local_cleanup_list;
   HeapRegionSetCount _old_regions_removed;
   HeapRegionSetCount _humongous_regions_removed;
   HRRSCleanupTask* _hrrs_cleanup_task;
-  double _claimed_region_time;
-  double _max_region_time;
 
 public:
   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
                              FreeRegionList* local_cleanup_list,
                              HRRSCleanupTask* hrrs_cleanup_task) :
     _g1(g1),
-    _max_live_bytes(0), _regions_claimed(0),
     _freed_bytes(0),
-    _claimed_region_time(0.0), _max_region_time(0.0),
     _local_cleanup_list(local_cleanup_list),
     _old_regions_removed(),
     _humongous_regions_removed(),
@@ -1799,10 +1789,7 @@
     // We use a claim value of zero here because all regions
     // were claimed with value 1 in the FinalCount task.
     _g1->reset_gc_time_stamps(hr);
-    double start = os::elapsedTime();
-    _regions_claimed++;
     hr->note_end_of_marking();
-    _max_live_bytes += hr->max_live_bytes();
 
     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
       _freed_bytes += hr->used();
@@ -1819,18 +1806,8 @@
       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
     }
 
-    double region_time = (os::elapsedTime() - start);
-    _claimed_region_time += region_time;
-    if (region_time > _max_region_time) {
-      _max_region_time = region_time;
-    }
     return false;
   }
-
-  size_t max_live_bytes() { return _max_live_bytes; }
-  uint regions_claimed() { return _regions_claimed; }
-  double claimed_region_time_sec() { return _claimed_region_time; }
-  double max_region_time_sec() { return _max_region_time; }
 };
 
 class G1ParNoteEndTask: public AbstractGangTask {
@@ -1838,14 +1815,12 @@
 
 protected:
   G1CollectedHeap* _g1h;
-  size_t _max_live_bytes;
-  size_t _freed_bytes;
   FreeRegionList* _cleanup_list;
   HeapRegionClaimer _hrclaimer;
 
 public:
   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
-      AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
+      AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
   }
 
   void work(uint worker_id) {
@@ -1861,8 +1836,6 @@
     {
       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
-      _max_live_bytes += g1_note_end.max_live_bytes();
-      _freed_bytes += g1_note_end.freed_bytes();
 
       // If we iterate over the global cleanup list at the end of
       // cleanup to do this printing we will not guarantee to only
@@ -1887,8 +1860,6 @@
       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
     }
   }
-  size_t max_live_bytes() { return _max_live_bytes; }
-  size_t freed_bytes() { return _freed_bytes; }
 };
 
 class G1ParScrubRemSetTask: public AbstractGangTask {
@@ -1938,18 +1909,10 @@
 
   HeapRegionRemSet::reset_for_cleanup_tasks();
 
-  uint n_workers;
-
   // Do counting once more with the world stopped for good measure.
   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
 
-  g1h->set_par_threads();
-  n_workers = g1h->n_par_threads();
-  assert(g1h->n_par_threads() == n_workers,
-         "Should not have been reset");
   g1h->workers()->run_task(&g1_par_count_task);
-  // Done with the parallel phase so reset to 0.
-  g1h->set_par_threads(0);
 
   if (VerifyDuringGC) {
     // Verify that the counting data accumulated during marking matches
@@ -1965,10 +1928,7 @@
                                                  &expected_region_bm,
                                                  &expected_card_bm);
 
-    g1h->set_par_threads((int)n_workers);
     g1h->workers()->run_task(&g1_par_verify_task);
-    // Done with the parallel phase so reset to 0.
-    g1h->set_par_threads(0);
 
     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
   }
@@ -1990,11 +1950,11 @@
 
   g1h->reset_gc_time_stamp();
 
+  uint n_workers = _g1h->workers()->active_workers();
+
   // Note end of marking in all heap regions.
   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
-  g1h->set_par_threads((int)n_workers);
   g1h->workers()->run_task(&g1_par_note_end_task);
-  g1h->set_par_threads(0);
   g1h->check_gc_time_stamps();
 
   if (!cleanup_list_is_empty()) {
@@ -2009,9 +1969,7 @@
   if (G1ScrubRemSets) {
     double rs_scrub_start = os::elapsedTime();
     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
-    g1h->set_par_threads((int)n_workers);
     g1h->workers()->run_task(&g1_par_scrub_rs_task);
-    g1h->set_par_threads(0);
 
     double rs_scrub_end = os::elapsedTime();
     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
@@ -2020,7 +1978,7 @@
 
   // this will also free any regions totally full of garbage objects,
   // and sort the regions.
-  g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
+  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
 
   // Statistics.
   double end = os::elapsedTime();
@@ -2312,9 +2270,7 @@
   // and overflow handling in CMTask::do_marking_step() knows
   // how many workers to wait for.
   _cm->set_concurrency(_active_workers);
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&proc_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
@@ -2344,9 +2300,7 @@
   // and overflow handling in CMTask::do_marking_step() knows
   // how many workers to wait for.
   _cm->set_concurrency(_active_workers);
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&enq_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
@@ -2608,27 +2562,23 @@
 
   g1h->ensure_parsability(false);
 
-  StrongRootsScope srs;
   // this is remark, so we'll use up all active threads
   uint active_workers = g1h->workers()->active_workers();
-  if (active_workers == 0) {
-    assert(active_workers > 0, "Should have been set earlier");
-    active_workers = (uint) ParallelGCThreads;
-    g1h->workers()->set_active_workers(active_workers);
-  }
   set_concurrency_and_phase(active_workers, false /* concurrent */);
   // Leave _parallel_marking_threads at it's
   // value originally calculated in the ConcurrentMark
   // constructor and pass values of the active workers
   // through the gang in the task.
 
-  CMRemarkTask remarkTask(this, active_workers);
-  // We will start all available threads, even if we decide that the
-  // active_workers will be fewer. The extra ones will just bail out
-  // immediately.
-  g1h->set_par_threads(active_workers);
-  g1h->workers()->run_task(&remarkTask);
-  g1h->set_par_threads(0);
+  {
+    StrongRootsScope srs(active_workers);
+
+    CMRemarkTask remarkTask(this, active_workers);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    g1h->workers()->run_task(&remarkTask);
+  }
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   guarantee(has_overflown() ||
@@ -3001,9 +2951,7 @@
   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
                                            _max_worker_id, n_workers);
 
-  _g1h->set_par_threads(n_workers);
   _g1h->workers()->run_task(&g1_par_agg_task);
-  _g1h->set_par_threads(0);
 }
 
 // Clear the per-worker arrays used to store the per-region counting data
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Thu May 28 11:37:13 2015 -0700
@@ -1326,27 +1326,10 @@
         AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
                                                 workers()->active_workers(),
                                                 Threads::number_of_non_daemon_threads());
-      assert(UseDynamicNumberOfGCThreads ||
-             n_workers == workers()->total_workers(),
-             "If not dynamic should be using all the  workers");
       workers()->set_active_workers(n_workers);
-      // Set parallel threads in the heap (_n_par_threads) only
-      // before a parallel phase and always reset it to 0 after
-      // the phase so that the number of parallel threads does
-      // no get carried forward to a serial phase where there
-      // may be code that is "possibly_parallel".
-      set_par_threads(n_workers);
 
       ParRebuildRSTask rebuild_rs_task(this);
-      assert(UseDynamicNumberOfGCThreads ||
-             workers()->active_workers() == workers()->total_workers(),
-             "Unless dynamic should use total workers");
-      // Use the most recent number of  active workers
-      assert(workers()->active_workers() > 0,
-             "Active workers not properly set");
-      set_par_threads(workers()->active_workers());
       workers()->run_task(&rebuild_rs_task);
-      set_par_threads(0);
 
       // Rebuild the strong code root lists for each region
       rebuild_strong_code_roots();
@@ -1769,7 +1752,7 @@
   _allocator = G1Allocator::create_allocator(this);
   _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2;
 
-  int n_queues = MAX2((int)ParallelGCThreads, 1);
+  int n_queues = (int)ParallelGCThreads;
   _task_queues = new RefToScanQueueSet(n_queues);
 
   uint n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
@@ -2081,11 +2064,11 @@
     new ReferenceProcessor(mr,    // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1),
                                 // mt processing
-                           (int) ParallelGCThreads,
+                           (uint) ParallelGCThreads,
                                 // degree of mt processing
                            (ParallelGCThreads > 1) || (ConcGCThreads > 1),
                                 // mt discovery
-                           (int) MAX2(ParallelGCThreads, ConcGCThreads),
+                           (uint) MAX2(ParallelGCThreads, ConcGCThreads),
                                 // degree of mt discovery
                            false,
                                 // Reference discovery is not atomic
@@ -2098,11 +2081,11 @@
     new ReferenceProcessor(mr,    // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1),
                                 // mt processing
-                           MAX2((int)ParallelGCThreads, 1),
+                           (uint) ParallelGCThreads,
                                 // degree of mt processing
                            (ParallelGCThreads > 1),
                                 // mt discovery
-                           MAX2((int)ParallelGCThreads, 1),
+                           (uint) ParallelGCThreads,
                                 // degree of mt discovery
                            true,
                                 // Reference discovery is atomic
@@ -2502,8 +2485,7 @@
   assert(_worker_cset_start_region != NULL, "sanity");
   assert(_worker_cset_start_region_time_stamp != NULL, "sanity");
 
-  int n_queues = MAX2((int)ParallelGCThreads, 1);
-  for (int i = 0; i < n_queues; i++) {
+  for (uint i = 0; i < ParallelGCThreads; i++) {
     _worker_cset_start_region[i] = NULL;
     _worker_cset_start_region_time_stamp[i] = 0;
   }
@@ -2541,9 +2523,6 @@
   result = g1_policy()->collection_set();
   uint cs_size = g1_policy()->cset_region_length();
   uint active_workers = workers()->active_workers();
-  assert(UseDynamicNumberOfGCThreads ||
-           active_workers == workers()->total_workers(),
-           "Unless dynamic should use total workers");
 
   uint end_ind   = (cs_size * worker_i) / active_workers;
   uint start_ind = 0;
@@ -3021,7 +3000,7 @@
     G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
 
     {
-      G1RootProcessor root_processor(this);
+      G1RootProcessor root_processor(this, 1);
       root_processor.process_all_roots(&rootsCl,
                                        &cldCl,
                                        &blobsCl);
@@ -3042,13 +3021,7 @@
     if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
 
       G1ParVerifyTask task(this, vo);
-      assert(UseDynamicNumberOfGCThreads ||
-        workers()->active_workers() == workers()->total_workers(),
-        "If not dynamic should be using all the workers");
-      uint n_workers = workers()->active_workers();
-      set_par_threads(n_workers);
       workers()->run_task(&task);
-      set_par_threads(0);
       if (task.failures()) {
         failures = true;
       }
@@ -3572,6 +3545,10 @@
 };
 #endif // ASSERT
 
+uint G1CollectedHeap::num_task_queues() const {
+  return _task_queues->size();
+}
+
 #if TASKQUEUE_STATS
 void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) {
   st->print_raw_cr("GC Task Stats");
@@ -3583,7 +3560,7 @@
   print_taskqueue_stats_hdr(st);
 
   TaskQueueStats totals;
-  const uint n = workers()->total_workers();
+  const uint n = num_task_queues();
   for (uint i = 0; i < n; ++i) {
     st->print("%3u ", i); task_queue(i)->stats.print(st); st->cr();
     totals += task_queue(i)->stats;
@@ -3594,7 +3571,7 @@
 }
 
 void G1CollectedHeap::reset_taskqueue_stats() {
-  const uint n = workers()->total_workers();
+  const uint n = num_task_queues();
   for (uint i = 0; i < n; ++i) {
     task_queue(i)->stats.reset();
   }
@@ -3696,9 +3673,6 @@
     uint active_workers = AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
                                                                   workers()->active_workers(),
                                                                   Threads::number_of_non_daemon_threads());
-    assert(UseDynamicNumberOfGCThreads ||
-           active_workers == workers()->total_workers(),
-           "If not dynamic should be using all the  workers");
     workers()->set_active_workers(active_workers);
 
     double pause_start_sec = os::elapsedTime();
@@ -3873,8 +3847,7 @@
 
         if (evacuation_failed()) {
           _allocator->set_used(recalculate_used());
-          uint n_queues = MAX2((int)ParallelGCThreads, 1);
-          for (uint i = 0; i < n_queues; i++) {
+          for (uint i = 0; i < ParallelGCThreads; i++) {
             if (_evacuation_failed_info_array[i].has_failed()) {
               _gc_tracer_stw->report_evacuation_failed(_evacuation_failed_info_array[i]);
             }
@@ -4041,10 +4014,8 @@
 void G1CollectedHeap::remove_self_forwarding_pointers() {
   double remove_self_forwards_start = os::elapsedTime();
 
-  set_par_threads();
   G1ParRemoveSelfForwardPtrsTask rsfp_task(this);
   workers()->run_task(&rsfp_task);
-  set_par_threads(0);
 
   // Now restore saved marks, if any.
   assert(_objs_with_preserved_marks.size() ==
@@ -4308,12 +4279,13 @@
   Mutex* stats_lock() { return &_stats_lock; }
 
 public:
-  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor)
+  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
       _queues(task_queues),
       _root_processor(root_processor),
-      _terminator(0, _queues),
+      _terminator(n_workers, _queues),
+      _n_workers(n_workers),
       _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
   {}
 
@@ -4325,12 +4297,6 @@
 
   ParallelTaskTerminator* terminator() { return &_terminator; }
 
-  virtual void set_for_termination(uint active_workers) {
-    _root_processor->set_num_workers(active_workers);
-    terminator()->reset_for_reuse(active_workers);
-    _n_workers = active_workers;
-  }
-
   // Helps out with CLD processing.
   //
   // During InitialMark we need to:
@@ -4811,19 +4777,14 @@
 
   G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols,
                                         n_workers, class_unloading_occurred);
-  set_par_threads(n_workers);
   workers()->run_task(&g1_unlink_task);
-  set_par_threads(0);
 }
 
 void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
                                                      bool process_strings, bool process_symbols) {
   {
-    uint n_workers = workers()->active_workers();
     G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
-    set_par_threads(n_workers);
     workers()->run_task(&g1_unlink_task);
-    set_par_threads(0);
   }
 
   if (G1StringDedup::is_enabled()) {
@@ -4851,13 +4812,9 @@
 void G1CollectedHeap::redirty_logged_cards() {
   double redirty_logged_cards_start = os::elapsedTime();
 
-  uint n_workers = workers()->active_workers();
-
   G1RedirtyLoggedCardsTask redirty_task(&dirty_card_queue_set());
   dirty_card_queue_set().reset_for_par_iteration();
-  set_par_threads(n_workers);
   workers()->run_task(&redirty_task);
-  set_par_threads(0);
 
   DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
   dcq.merge_bufferlists(&dirty_card_queue_set());
@@ -5093,9 +5050,7 @@
   ParallelTaskTerminator terminator(_active_workers, _queues);
   G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator);
 
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&proc_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 // Gang task for parallel reference enqueueing.
@@ -5124,9 +5079,7 @@
 
   G1STWRefEnqueueTaskProxy enq_task_proxy(enq_task);
 
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&enq_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 // End of weak reference support closures
@@ -5219,7 +5172,7 @@
 };
 
 // Weak Reference processing during an evacuation pause (part 1).
-void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) {
+void G1CollectedHeap::process_discovered_references() {
   double ref_proc_start = os::elapsedTime();
 
   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5246,17 +5199,14 @@
   // referents points to another object which is also referenced by an
   // object discovered by the STW ref processor.
 
-  assert(no_of_gc_workers == workers()->active_workers(), "Need to reset active GC workers");
-
-  set_par_threads(no_of_gc_workers);
+  uint no_of_gc_workers = workers()->active_workers();
+
   G1ParPreserveCMReferentsTask keep_cm_referents(this,
                                                  no_of_gc_workers,
                                                  _task_queues);
 
   workers()->run_task(&keep_cm_referents);
 
-  set_par_threads(0);
-
   // Closure to test whether a referent is alive.
   G1STWIsAliveClosure is_alive(this);
 
@@ -5330,7 +5280,7 @@
 }
 
 // Weak Reference processing during an evacuation pause (part 2).
-void G1CollectedHeap::enqueue_discovered_references(uint no_of_gc_workers) {
+void G1CollectedHeap::enqueue_discovered_references() {
   double ref_enq_start = os::elapsedTime();
 
   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5344,12 +5294,12 @@
   } else {
     // Parallel reference enqueueing
 
-    assert(no_of_gc_workers == workers()->active_workers(),
-           "Need to reset active workers");
-    assert(rp->num_q() == no_of_gc_workers, "sanity");
-    assert(no_of_gc_workers <= rp->max_num_q(), "sanity");
-
-    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers);
+    uint n_workers = workers()->active_workers();
+
+    assert(rp->num_q() == n_workers, "sanity");
+    assert(n_workers <= rp->max_num_q(), "sanity");
+
+    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, n_workers);
     rp->enqueue_discovered_references(&par_task_executor);
   }
 
@@ -5380,11 +5330,6 @@
   hot_card_cache->set_use_cache(false);
 
   const uint n_workers = workers()->active_workers();
-  assert(UseDynamicNumberOfGCThreads ||
-         n_workers == workers()->total_workers(),
-         "If not dynamic should be using all the  workers");
-  set_par_threads(n_workers);
-
 
   init_for_evac_failure(NULL);
 
@@ -5393,19 +5338,16 @@
   double end_par_time_sec;
 
   {
-    G1RootProcessor root_processor(this);
-    G1ParTask g1_par_task(this, _task_queues, &root_processor);
+    G1RootProcessor root_processor(this, n_workers);
+    G1ParTask g1_par_task(this, _task_queues, &root_processor, n_workers);
     // InitialMark needs claim bits to keep track of the marked-through CLDs.
     if (g1_policy()->during_initial_mark_pause()) {
       ClassLoaderDataGraph::clear_claimed_marks();
     }
 
-     // The individual threads will set their evac-failure closures.
-     if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
-     // These tasks use ShareHeap::_process_strong_tasks
-     assert(UseDynamicNumberOfGCThreads ||
-            workers()->active_workers() == workers()->total_workers(),
-            "If not dynamic should be using all the  workers");
+    // The individual threads will set their evac-failure closures.
+    if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
+
     workers()->run_task(&g1_par_task);
     end_par_time_sec = os::elapsedTime();
 
@@ -5425,14 +5367,12 @@
         (os::elapsedTime() - end_par_time_sec) * 1000.0;
   phase_times->record_code_root_fixup_time(code_root_fixup_time_ms);
 
-  set_par_threads(0);
-
   // Process any discovered reference objects - we have
   // to do this _before_ we retire the GC alloc regions
   // as we may have to copy some 'reachable' referent
   // objects (and their reachable sub-graphs) that were
   // not copied during the pause.
-  process_discovered_references(n_workers);
+  process_discovered_references();
 
   if (G1StringDedup::is_enabled()) {
     double fixup_start = os::elapsedTime();
@@ -5474,7 +5414,7 @@
   // will log these updates (and dirty their associated
   // cards). We need these updates logged to update any
   // RSets.
-  enqueue_discovered_references(n_workers);
+  enqueue_discovered_references();
 
   redirty_logged_cards();
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
@@ -5779,9 +5719,7 @@
     // Iterate over the dirty cards region list.
     G1ParCleanupCTTask cleanup_task(ct_bs, this);
 
-    set_par_threads();
     workers()->run_task(&cleanup_task);
-    set_par_threads(0);
 #ifndef PRODUCT
     if (G1VerifyCTCleanup || VerifyAfterGC) {
       G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
@@ -6314,21 +6252,6 @@
   g1mm()->update_eden_size();
 }
 
-void G1CollectedHeap::set_par_threads() {
-  // Don't change the number of workers.  Use the value previously set
-  // in the workgroup.
-  uint n_workers = workers()->active_workers();
-  assert(UseDynamicNumberOfGCThreads ||
-           n_workers == workers()->total_workers(),
-      "Otherwise should be using the total number of workers");
-  if (n_workers == 0) {
-    assert(false, "Should have been set in prior evacuation pause.");
-    n_workers = ParallelGCThreads;
-    workers()->set_active_workers(n_workers);
-  }
-  set_par_threads(n_workers);
-}
-
 // Methods for the GC alloc regions
 
 HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size,
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Thu May 28 11:37:13 2015 -0700
@@ -606,11 +606,11 @@
 
   // Process any reference objects discovered during
   // an incremental evacuation pause.
-  void process_discovered_references(uint no_of_gc_workers);
+  void process_discovered_references();
 
   // Enqueue any remaining discovered references
   // after processing.
-  void enqueue_discovered_references(uint no_of_gc_workers);
+  void enqueue_discovered_references();
 
 public:
   FlexibleWorkGang* workers() const { return _workers; }
@@ -981,6 +981,8 @@
 
   RefToScanQueue *task_queue(uint i) const;
 
+  uint num_task_queues() const;
+
   // A set of cards where updates happened during the GC
   DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
 
@@ -1012,11 +1014,6 @@
   // Initialize weak reference processing.
   void ref_processing_init();
 
-  // Explicitly import set_par_threads into this scope
-  using CollectedHeap::set_par_threads;
-  // Set _n_par_threads according to a policy TBD.
-  void set_par_threads();
-
   virtual Name kind() const {
     return CollectedHeap::G1CollectedHeap;
   }
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Thu May 28 11:37:13 2015 -0700
@@ -1587,14 +1587,17 @@
 }
 
 void
-G1CollectorPolicy::record_concurrent_mark_cleanup_end(uint n_workers) {
+G1CollectorPolicy::record_concurrent_mark_cleanup_end() {
   _collectionSetChooser->clear();
 
+  FlexibleWorkGang* workers = _g1->workers();
+  uint n_workers = workers->active_workers();
+
   uint n_regions = _g1->num_regions();
   uint chunk_size = calculate_parallel_work_chunk_size(n_workers, n_regions);
-  _collectionSetChooser->prepare_for_par_region_addition(n_regions, chunk_size);
+  _collectionSetChooser->prepare_for_par_region_addition(n_workers, n_regions, chunk_size);
   ParKnownGarbageTask par_known_garbage_task(_collectionSetChooser, chunk_size, n_workers);
-  _g1->workers()->run_task(&par_known_garbage_task);
+  workers->run_task(&par_known_garbage_task);
 
   _collectionSetChooser->sort_regions();
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Thu May 28 11:37:13 2015 -0700
@@ -692,7 +692,7 @@
 
   // Record start, end, and completion of cleanup.
   void record_concurrent_mark_cleanup_start();
-  void record_concurrent_mark_cleanup_end(uint n_workers);
+  void record_concurrent_mark_cleanup_end();
   void record_concurrent_mark_cleanup_completed();
 
   // Records the information about the heap size for reporting in
--- a/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp	Thu May 28 11:37:13 2015 -0700
@@ -127,7 +127,7 @@
 
   MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
   {
-    G1RootProcessor root_processor(g1h);
+    G1RootProcessor root_processor(g1h, 1);
     root_processor.process_strong_roots(&GenMarkSweep::follow_root_closure,
                                         &GenMarkSweep::follow_cld_closure,
                                         &follow_code_closure);
@@ -237,7 +237,7 @@
 
   CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
   {
-    G1RootProcessor root_processor(g1h);
+    G1RootProcessor root_processor(g1h, 1);
     root_processor.process_all_roots(&GenMarkSweep::adjust_pointer_closure,
                                      &GenMarkSweep::adjust_cld_closure,
                                      &adjust_code_closure);
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Thu May 28 11:37:13 2015 -0700
@@ -50,8 +50,8 @@
   _par_scan_state = par_scan_state;
   _worker_id = par_scan_state->queue_num();
 
-  assert(_worker_id < MAX2((uint)ParallelGCThreads, 1u),
-         err_msg("The given worker id %u must be less than the number of threads %u", _worker_id, MAX2((uint)ParallelGCThreads, 1u)));
+  assert(_worker_id < ParallelGCThreads,
+         err_msg("The given worker id %u must be less than the number of threads " UINTX_FORMAT, _worker_id, ParallelGCThreads));
 }
 
 // Generate G1 specialized oop_oop_iterate functions.
--- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp	Thu May 28 11:37:13 2015 -0700
@@ -90,11 +90,10 @@
 
 
 void G1RootProcessor::worker_has_discovered_all_strong_classes() {
-  uint n_workers = _g1h->n_par_threads();
   assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
 
   uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes);
-  if (new_value == n_workers) {
+  if (new_value == n_workers()) {
     // This thread is last. Notify the others.
     MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
     _lock.notify_all();
@@ -102,21 +101,20 @@
 }
 
 void G1RootProcessor::wait_until_all_strong_classes_discovered() {
-  uint n_workers = _g1h->n_par_threads();
   assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
 
-  if ((uint)_n_workers_discovered_strong_classes != n_workers) {
+  if ((uint)_n_workers_discovered_strong_classes != n_workers()) {
     MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
-    while ((uint)_n_workers_discovered_strong_classes != n_workers) {
+    while ((uint)_n_workers_discovered_strong_classes != n_workers()) {
       _lock.wait(Mutex::_no_safepoint_check_flag, 0, false);
     }
   }
 }
 
-G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) :
+G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h, uint n_workers) :
     _g1h(g1h),
     _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)),
-    _srs(),
+    _srs(n_workers),
     _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false, Monitor::_safepoint_check_never),
     _n_workers_discovered_strong_classes(0) {}
 
@@ -206,7 +204,7 @@
     }
   }
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(n_workers());
 }
 
 void G1RootProcessor::process_strong_roots(OopClosure* oops,
@@ -216,7 +214,7 @@
   process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0);
   process_vm_roots(oops, NULL, NULL, 0);
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(n_workers());
 }
 
 void G1RootProcessor::process_all_roots(OopClosure* oops,
@@ -230,7 +228,7 @@
     CodeCache::blobs_do(blobs);
   }
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(n_workers());
 }
 
 void G1RootProcessor::process_java_roots(OopClosure* strong_roots,
@@ -253,7 +251,7 @@
 
   {
     G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i);
-    bool is_par = _g1h->n_par_threads() > 0;
+    bool is_par = n_workers() > 1;
     Threads::possibly_parallel_oops_do(is_par, strong_roots, thread_stack_clds, strong_code);
   }
 }
@@ -329,6 +327,6 @@
   _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
 }
 
-void G1RootProcessor::set_num_workers(uint active_workers) {
-  _process_strong_tasks->set_n_threads(active_workers);
+uint G1RootProcessor::n_workers() const {
+  return _srs.n_threads();
 }
--- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Thu May 28 11:37:13 2015 -0700
@@ -85,7 +85,7 @@
                         uint worker_i);
 
 public:
-  G1RootProcessor(G1CollectedHeap* g1h);
+  G1RootProcessor(G1CollectedHeap* g1h, uint n_workers);
 
   // Apply closures to the strongly and weakly reachable roots in the system
   // in a single pass.
@@ -114,8 +114,8 @@
                             OopClosure* scan_non_heap_weak_roots,
                             uint worker_i);
 
-  // Inform the root processor about the number of worker threads
-  void set_num_workers(uint active_workers);
+  // Number of worker threads used by the root processor.
+  uint n_workers() const;
 };
 
 #endif // SHARE_VM_GC_G1_G1ROOTPROCESSOR_HPP
--- a/hotspot/src/share/vm/gc/g1/g1StringDedup.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1StringDedup.cpp	Thu May 28 11:37:13 2015 -0700
@@ -153,9 +153,7 @@
 
   G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times);
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  g1h->set_par_threads();
   g1h->workers()->run_task(&task);
-  g1h->set_par_threads(0);
 }
 
 void G1StringDedup::threads_do(ThreadClosure* tc) {
--- a/hotspot/src/share/vm/gc/g1/g1StringDedupQueue.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1StringDedupQueue.cpp	Thu May 28 11:37:13 2015 -0700
@@ -42,7 +42,7 @@
   _cancel(false),
   _empty(true),
   _dropped(0) {
-  _nqueues = MAX2(ParallelGCThreads, (size_t)1);
+  _nqueues = ParallelGCThreads;
   _queues = NEW_C_HEAP_ARRAY(G1StringDedupWorkerQueue, _nqueues, mtGC);
   for (size_t i = 0; i < _nqueues; i++) {
     new (_queues + i) G1StringDedupWorkerQueue(G1StringDedupWorkerQueue::default_segment_size(), _max_cache_size, _max_size);
--- a/hotspot/src/share/vm/gc/g1/g1StringDedupTable.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/g1/g1StringDedupTable.cpp	Thu May 28 11:37:13 2015 -0700
@@ -112,7 +112,7 @@
 };
 
 G1StringDedupEntryCache::G1StringDedupEntryCache() {
-  _nlists = MAX2(ParallelGCThreads, (size_t)1);
+  _nlists = ParallelGCThreads;
   _lists = PaddedArray<G1StringDedupEntryFreeList, mtGC>::create_unfreeable((uint)_nlists);
 }
 
--- a/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp	Thu May 28 11:37:13 2015 -0700
@@ -832,9 +832,9 @@
   _ref_processor =
     new ReferenceProcessor(mr,            // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
-                           (int) ParallelGCThreads, // mt processing degree
+                           (uint) ParallelGCThreads, // mt processing degree
                            true,          // mt discovery
-                           (int) ParallelGCThreads, // mt discovery degree
+                           (uint) ParallelGCThreads, // mt discovery degree
                            true,          // atomic_discovery
                            &_is_alive_closure); // non-header is alive closure
   _counters = new CollectorCounters("PSParallelCompact", 1);
@@ -2029,7 +2029,6 @@
     // Set the number of GC threads to be used in this collection
     gc_task_manager()->set_active_gang();
     gc_task_manager()->task_idle_workers();
-    heap->set_par_threads(gc_task_manager()->active_workers());
 
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer.gc_id());
--- a/hotspot/src/share/vm/gc/parallel/psScavenge.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/parallel/psScavenge.cpp	Thu May 28 11:37:13 2015 -0700
@@ -382,7 +382,6 @@
     // Get the active number of workers here and use that value
     // throughout the methods.
     uint active_workers = gc_task_manager()->active_workers();
-    heap->set_par_threads(active_workers);
 
     PSPromotionManager::pre_scavenge();
 
@@ -846,9 +845,9 @@
   _ref_processor =
     new ReferenceProcessor(mr,                         // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
-                           (int) ParallelGCThreads,    // mt processing degree
+                           (uint) ParallelGCThreads,   // mt processing degree
                            true,                       // mt discovery
-                           (int) ParallelGCThreads,    // mt discovery degree
+                           (uint) ParallelGCThreads,   // mt discovery degree
                            true,                       // atomic_discovery
                            NULL);                      // header provides liveness info
 
--- a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp	Thu May 28 11:37:13 2015 -0700
@@ -38,6 +38,7 @@
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/space.inline.hpp"
 #include "gc/shared/spaceDecorator.hpp"
+#include "gc/shared/strongRootsScope.hpp"
 #include "memory/iterator.hpp"
 #include "oops/instanceRefKlass.hpp"
 #include "oops/oop.inline.hpp"
@@ -454,7 +455,7 @@
   }
 }
 
-void DefNewGeneration::younger_refs_iterate(OopsInGenClosure* cl) {
+void DefNewGeneration::younger_refs_iterate(OopsInGenClosure* cl, uint n_threads) {
   assert(false, "NYI -- are you sure you want to call this?");
 }
 
@@ -625,15 +626,22 @@
   assert(gch->no_allocs_since_save_marks(0),
          "save marks have not been newly set.");
 
-  gch->gen_process_roots(_level,
-                         true,  // Process younger gens, if any,
-                                // as strong roots.
-                         true,  // activate StrongRootsScope
-                         GenCollectedHeap::SO_ScavengeCodeCache,
-                         GenCollectedHeap::StrongAndWeakRoots,
-                         &fsc_with_no_gc_barrier,
-                         &fsc_with_gc_barrier,
-                         &cld_scan_closure);
+  {
+    // DefNew needs to run with n_threads == 0, to make sure the serial
+    // version of the card table scanning code is used.
+    // See: CardTableModRefBS::non_clean_card_iterate_possibly_parallel.
+    StrongRootsScope srs(0);
+
+    gch->gen_process_roots(&srs,
+                           _level,
+                           true,  // Process younger gens, if any,
+                                  // as strong roots.
+                           GenCollectedHeap::SO_ScavengeCodeCache,
+                           GenCollectedHeap::StrongAndWeakRoots,
+                           &fsc_with_no_gc_barrier,
+                           &fsc_with_gc_barrier,
+                           &cld_scan_closure);
+  }
 
   // "evacuate followers".
   evacuate_followers.do_void();
--- a/hotspot/src/share/vm/gc/serial/defNewGeneration.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.hpp	Thu May 28 11:37:13 2015 -0700
@@ -255,7 +255,7 @@
   // Iteration
   void object_iterate(ObjectClosure* blk);
 
-  void younger_refs_iterate(OopsInGenClosure* cl);
+  void younger_refs_iterate(OopsInGenClosure* cl, uint n_threads);
 
   void space_iterate(SpaceClosure* blk, bool usedOnly = false);
 
--- a/hotspot/src/share/vm/gc/serial/genMarkSweep.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/serial/genMarkSweep.cpp	Thu May 28 11:37:13 2015 -0700
@@ -40,6 +40,7 @@
 #include "gc/shared/modRefBarrierSet.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/space.hpp"
+#include "gc/shared/strongRootsScope.hpp"
 #include "oops/instanceRefKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiExport.hpp"
@@ -200,14 +201,18 @@
   // Need new claim bits before marking starts.
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  gch->gen_process_roots(level,
-                         false, // Younger gens are not roots.
-                         true,  // activate StrongRootsScope
-                         GenCollectedHeap::SO_None,
-                         GenCollectedHeap::StrongRootsOnly,
-                         &follow_root_closure,
-                         &follow_root_closure,
-                         &follow_cld_closure);
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           level,
+                           false, // Younger gens are not roots.
+                           GenCollectedHeap::SO_None,
+                           GenCollectedHeap::StrongRootsOnly,
+                           &follow_root_closure,
+                           &follow_root_closure,
+                           &follow_cld_closure);
+  }
 
   // Process reference objects found during marking
   {
@@ -284,14 +289,18 @@
   assert(level == 1, "We don't use mark-sweep on young generations.");
   adjust_pointer_closure.set_orig_generation(gch->old_gen());
 
-  gch->gen_process_roots(level,
-                         false, // Younger gens are not roots.
-                         true,  // activate StrongRootsScope
-                         GenCollectedHeap::SO_AllCodeCache,
-                         GenCollectedHeap::StrongAndWeakRoots,
-                         &adjust_pointer_closure,
-                         &adjust_pointer_closure,
-                         &adjust_cld_closure);
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           level,
+                           false, // Younger gens are not roots.
+                           GenCollectedHeap::SO_AllCodeCache,
+                           GenCollectedHeap::StrongAndWeakRoots,
+                           &adjust_pointer_closure,
+                           &adjust_pointer_closure,
+                           &adjust_cld_closure);
+  }
 
   gch->gen_process_weak_roots(&adjust_pointer_closure);
 
--- a/hotspot/src/share/vm/gc/shared/adaptiveSizePolicy.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/adaptiveSizePolicy.cpp	Thu May 28 11:37:13 2015 -0700
@@ -161,7 +161,7 @@
       }
       _debug_perturbation = !_debug_perturbation;
     }
-    assert((new_active_workers <= (uintx) ParallelGCThreads) &&
+    assert((new_active_workers <= ParallelGCThreads) &&
            (new_active_workers >= min_workers),
       "Jiggled active workers too much");
   }
--- a/hotspot/src/share/vm/gc/shared/cardGeneration.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/cardGeneration.cpp	Thu May 28 11:37:13 2015 -0700
@@ -353,8 +353,8 @@
   blk->do_space(space());
 }
 
-void CardGeneration::younger_refs_iterate(OopsInGenClosure* blk) {
+void CardGeneration::younger_refs_iterate(OopsInGenClosure* blk, uint n_threads) {
   blk->set_generation(this);
-  younger_refs_in_space_iterate(space(), blk);
+  younger_refs_in_space_iterate(space(), blk, n_threads);
   blk->reset_generation();
 }
--- a/hotspot/src/share/vm/gc/shared/cardGeneration.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/cardGeneration.hpp	Thu May 28 11:37:13 2015 -0700
@@ -89,7 +89,7 @@
 
   void space_iterate(SpaceClosure* blk, bool usedOnly = false);
 
-  void younger_refs_iterate(OopsInGenClosure* blk);
+  void younger_refs_iterate(OopsInGenClosure* blk, uint n_threads);
 
   bool is_in(const void* p) const;
 
--- a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp	Thu May 28 11:37:13 2015 -0700
@@ -440,31 +440,11 @@
 void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
                                                                  MemRegion mr,
                                                                  OopsInGenClosure* cl,
-                                                                 CardTableRS* ct) {
+                                                                 CardTableRS* ct,
+                                                                 uint n_threads) {
   if (!mr.is_empty()) {
-    // Caller (process_roots()) claims that all GC threads
-    // execute this call.  With UseDynamicNumberOfGCThreads now all
-    // active GC threads execute this call.  The number of active GC
-    // threads needs to be passed to par_non_clean_card_iterate_work()
-    // to get proper partitioning and termination.
-    //
-    // This is an example of where n_par_threads() is used instead
-    // of workers()->active_workers().  n_par_threads can be set to 0 to
-    // turn off parallelism.  For example when this code is called as
-    // part of verification during root processing then n_par_threads()
-    // may have been set to 0. active_workers is not overloaded with
-    // the meaning that it is a switch to disable parallelism and so keeps
-    // the meaning of the number of active gc workers. If parallelism has
-    // not been shut off by setting n_par_threads to 0, then n_par_threads
-    // should be equal to active_workers.  When a different mechanism for
-    // shutting off parallelism is used, then active_workers can be used in
-    // place of n_par_threads.
-    int n_threads =  GenCollectedHeap::heap()->n_par_threads();
-    bool is_par = n_threads > 0;
-    if (is_par) {
+    if (n_threads > 0) {
 #if INCLUDE_ALL_GCS
-      assert(GenCollectedHeap::heap()->n_par_threads() ==
-             GenCollectedHeap::heap()->workers()->active_workers(), "Mismatch");
       non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads);
 #else  // INCLUDE_ALL_GCS
       fatal("Parallel gc not supported here.");
@@ -472,8 +452,11 @@
     } else {
       // clear_cl finds contiguous dirty ranges of cards to process and clear.
 
-      DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), cl->gen_boundary());
-      ClearNoncleanCardWrapper clear_cl(dcto_cl, ct);
+      // This is the single-threaded version used by DefNew.
+      const bool parallel = false;
+
+      DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), cl->gen_boundary(), parallel);
+      ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel);
 
       clear_cl.do_MemRegion(mr);
     }
--- a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp	Thu May 28 11:37:13 2015 -0700
@@ -178,14 +178,15 @@
   // region mr in the given space and apply cl to any dirty sub-regions
   // of mr. Clears the dirty cards as they are processed.
   void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
-                                                OopsInGenClosure* cl, CardTableRS* ct);
+                                                OopsInGenClosure* cl, CardTableRS* ct,
+                                                uint n_threads);
 
  private:
   // Work method used to implement non_clean_card_iterate_possibly_parallel()
   // above in the parallel case.
   void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
                                             OopsInGenClosure* cl, CardTableRS* ct,
-                                            int n_threads);
+                                            uint n_threads);
 
  protected:
   // Dirty the bytes corresponding to "mr" (not all of which must be
--- a/hotspot/src/share/vm/gc/shared/cardTableRS.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/cardTableRS.cpp	Thu May 28 11:37:13 2015 -0700
@@ -102,9 +102,10 @@
 }
 
 void CardTableRS::younger_refs_iterate(Generation* g,
-                                       OopsInGenClosure* blk) {
+                                       OopsInGenClosure* blk,
+                                       uint n_threads) {
   _last_cur_val_in_gen[g->level()+1] = cur_youngergen_card_val();
-  g->younger_refs_iterate(blk);
+  g->younger_refs_iterate(blk, n_threads);
 }
 
 inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) {
@@ -164,15 +165,8 @@
 }
 
 ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
-  DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) :
-    _dirty_card_closure(dirty_card_closure), _ct(ct) {
-    // Cannot yet substitute active_workers for n_par_threads
-    // in the case where parallelism is being turned off by
-    // setting n_par_threads to 0.
-    _is_par = (GenCollectedHeap::heap()->n_par_threads() > 0);
-    assert(!_is_par ||
-           (GenCollectedHeap::heap()->n_par_threads() ==
-            GenCollectedHeap::heap()->workers()->active_workers()), "Mismatch");
+  DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct, bool is_par) :
+    _dirty_card_closure(dirty_card_closure), _ct(ct), _is_par(is_par) {
 }
 
 bool ClearNoncleanCardWrapper::is_word_aligned(jbyte* entry) {
@@ -272,7 +266,8 @@
 }
 
 void CardTableRS::younger_refs_in_space_iterate(Space* sp,
-                                                OopsInGenClosure* cl) {
+                                                OopsInGenClosure* cl,
+                                                uint n_threads) {
   const MemRegion urasm = sp->used_region_at_save_marks();
 #ifdef ASSERT
   // Convert the assertion check to a warning if we are running
@@ -301,7 +296,7 @@
     ShouldNotReachHere();
   }
 #endif
-  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this);
+  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this, n_threads);
 }
 
 void CardTableRS::clear_into_younger(Generation* old_gen) {
--- a/hotspot/src/share/vm/gc/shared/cardTableRS.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/cardTableRS.hpp	Thu May 28 11:37:13 2015 -0700
@@ -56,7 +56,7 @@
 
   CardTableModRefBSForCTRS* _ct_bs;
 
-  virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl);
+  virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl, uint n_threads);
 
   void verify_space(Space* s, HeapWord* gen_start);
 
@@ -116,7 +116,7 @@
   // Card table entries are cleared before application; "blk" is
   // responsible for dirtying if the oop is still older-to-younger after
   // closure application.
-  void younger_refs_iterate(Generation* g, OopsInGenClosure* blk);
+  void younger_refs_iterate(Generation* g, OopsInGenClosure* blk, uint n_threads);
 
   void inline_write_ref_field_gc(void* field, oop new_val) {
     jbyte* byte = _ct_bs->byte_for(field);
@@ -183,7 +183,7 @@
   bool is_word_aligned(jbyte* entry);
 
 public:
-  ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct);
+  ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct, bool is_par);
   void do_MemRegion(MemRegion mr);
 };
 
--- a/hotspot/src/share/vm/gc/shared/collectedHeap.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/collectedHeap.cpp	Thu May 28 11:37:13 2015 -0700
@@ -160,8 +160,7 @@
 // Memory state functions.
 
 
-CollectedHeap::CollectedHeap() : _n_par_threads(0)
-{
+CollectedHeap::CollectedHeap() {
   const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT));
   const size_t elements_per_word = HeapWordSize / sizeof(jint);
   _filler_array_max_size = align_object_size(filler_array_hdr_size() +
--- a/hotspot/src/share/vm/gc/shared/collectedHeap.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/collectedHeap.hpp	Thu May 28 11:37:13 2015 -0700
@@ -101,7 +101,6 @@
  protected:
   BarrierSet* _barrier_set;
   bool _is_gc_active;
-  uint _n_par_threads;
 
   unsigned int _total_collections;          // ... started
   unsigned int _total_full_collections;     // ... started
@@ -291,12 +290,6 @@
   }
   GCCause::Cause gc_cause() { return _gc_cause; }
 
-  // Number of threads currently working on GC tasks.
-  uint n_par_threads() { return _n_par_threads; }
-
-  // May be overridden to set additional parallelism.
-  virtual void set_par_threads(uint t) { _n_par_threads = t; };
-
   // General obj/array allocation facilities.
   inline static oop obj_allocate(KlassHandle klass, int size, TRAPS);
   inline static oop array_allocate(KlassHandle klass, int size, int length, TRAPS);
--- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp	Thu May 28 11:37:13 2015 -0700
@@ -561,16 +561,6 @@
   return collector_policy()->satisfy_failed_allocation(size, is_tlab);
 }
 
-void GenCollectedHeap::set_par_threads(uint t) {
-  assert(t == 0 || !UseSerialGC, "Cannot have parallel threads");
-  CollectedHeap::set_par_threads(t);
-  set_n_termination(t);
-}
-
-void GenCollectedHeap::set_n_termination(uint t) {
-  _process_strong_tasks->set_n_threads(t);
-}
-
 #ifdef ASSERT
 class AssertNonScavengableClosure: public OopClosure {
 public:
@@ -582,15 +572,13 @@
 static AssertNonScavengableClosure assert_is_non_scavengable_closure;
 #endif
 
-void GenCollectedHeap::process_roots(bool activate_scope,
+void GenCollectedHeap::process_roots(StrongRootsScope* scope,
                                      ScanningOption so,
                                      OopClosure* strong_roots,
                                      OopClosure* weak_roots,
                                      CLDClosure* strong_cld_closure,
                                      CLDClosure* weak_cld_closure,
                                      CodeBlobClosure* code_roots) {
-  StrongRootsScope srs(activate_scope);
-
   // General roots.
   assert(Threads::thread_claim_parity() != 0, "must have called prologue code");
   assert(code_roots != NULL, "code root closure should always be set");
@@ -609,7 +597,7 @@
   // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
   CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
 
-  bool is_par = n_par_threads() > 0;
+  bool is_par = scope->n_threads() > 1;
   Threads::possibly_parallel_oops_do(is_par, strong_roots, roots_from_clds_p, roots_from_code_p);
 
   if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) {
@@ -669,9 +657,9 @@
 
 }
 
-void GenCollectedHeap::gen_process_roots(int level,
+void GenCollectedHeap::gen_process_roots(StrongRootsScope* scope,
+                                         int level,
                                          bool younger_gens_as_roots,
-                                         bool activate_scope,
                                          ScanningOption so,
                                          bool only_strong_roots,
                                          OopsInGenClosure* not_older_gens,
@@ -689,7 +677,7 @@
   OopsInGenClosure* weak_roots = only_strong_roots ? NULL : not_older_gens;
   CLDClosure* weak_cld_closure = only_strong_roots ? NULL : cld_closure;
 
-  process_roots(activate_scope, so,
+  process_roots(scope, so,
                 not_older_gens, weak_roots,
                 cld_closure, weak_cld_closure,
                 &mark_code_closure);
@@ -707,11 +695,11 @@
   // older-gen scanning.
   if (level == 0) {
     older_gens->set_generation(_old_gen);
-    rem_set()->younger_refs_iterate(_old_gen, older_gens);
+    rem_set()->younger_refs_iterate(_old_gen, older_gens, scope->n_threads());
     older_gens->reset_generation();
   }
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(scope->n_threads());
 }
 
 
--- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp	Thu May 28 11:37:13 2015 -0700
@@ -30,8 +30,9 @@
 #include "gc/shared/collectorPolicy.hpp"
 #include "gc/shared/generation.hpp"
 
+class FlexibleWorkGang;
+class StrongRootsScope;
 class SubTasksDone;
-class FlexibleWorkGang;
 
 // A "GenCollectedHeap" is a CollectedHeap that uses generational
 // collection.  It has two generations, young and old.
@@ -363,9 +364,6 @@
   // asserted to be this type.
   static GenCollectedHeap* heap();
 
-  void set_par_threads(uint t);
-  void set_n_termination(uint t);
-
   // Invoke the "do_oop" method of one of the closures "not_older_gens"
   // or "older_gens" on root locations for the generation at
   // "level".  (The "older_gens" closure is used for scanning references
@@ -385,7 +383,7 @@
   };
 
  private:
-  void process_roots(bool activate_scope,
+  void process_roots(StrongRootsScope* scope,
                      ScanningOption so,
                      OopClosure* strong_roots,
                      OopClosure* weak_roots,
@@ -393,24 +391,13 @@
                      CLDClosure* weak_cld_closure,
                      CodeBlobClosure* code_roots);
 
-  void gen_process_roots(int level,
-                         bool younger_gens_as_roots,
-                         bool activate_scope,
-                         ScanningOption so,
-                         OopsInGenClosure* not_older_gens,
-                         OopsInGenClosure* weak_roots,
-                         OopsInGenClosure* older_gens,
-                         CLDClosure* cld_closure,
-                         CLDClosure* weak_cld_closure,
-                         CodeBlobClosure* code_closure);
-
  public:
   static const bool StrongAndWeakRoots = false;
   static const bool StrongRootsOnly    = true;
 
-  void gen_process_roots(int level,
+  void gen_process_roots(StrongRootsScope* scope,
+                         int level,
                          bool younger_gens_as_roots,
-                         bool activate_scope,
                          ScanningOption so,
                          bool only_strong_roots,
                          OopsInGenClosure* not_older_gens,
--- a/hotspot/src/share/vm/gc/shared/genOopClosures.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/genOopClosures.hpp	Thu May 28 11:37:13 2015 -0700
@@ -35,11 +35,6 @@
 class DefNewGeneration;
 class KlassRemSet;
 
-template<class E, MEMFLAGS F, unsigned int N> class GenericTaskQueue;
-typedef GenericTaskQueue<oop, mtGC, TASKQUEUE_SIZE> OopTaskQueue;
-template<class T, MEMFLAGS F> class GenericTaskQueueSet;
-typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;
-
 // Closure for iterating roots from a particular generation
 // Note: all classes deriving from this MUST call this do_barrier
 // method at the end of their own do_oop method!
--- a/hotspot/src/share/vm/gc/shared/genRemSet.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/genRemSet.hpp	Thu May 28 11:37:13 2015 -0700
@@ -77,10 +77,11 @@
   //  1) that are in objects allocated in "g" at the time of the last call
   //     to "save_Marks", and
   //  2) that point to objects in younger generations.
-  virtual void younger_refs_iterate(Generation* g, OopsInGenClosure* blk) = 0;
+  virtual void younger_refs_iterate(Generation* g, OopsInGenClosure* blk, uint n_threads) = 0;
 
   virtual void younger_refs_in_space_iterate(Space* sp,
-                                             OopsInGenClosure* cl) = 0;
+                                             OopsInGenClosure* cl,
+                                             uint n_threads) = 0;
 
   // This method is used to notify the remembered set that "new_val" has
   // been written into "field" by the garbage collector.
--- a/hotspot/src/share/vm/gc/shared/generation.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/generation.cpp	Thu May 28 11:37:13 2015 -0700
@@ -293,9 +293,10 @@
 }
 
 void Generation::younger_refs_in_space_iterate(Space* sp,
-                                               OopsInGenClosure* cl) {
+                                               OopsInGenClosure* cl,
+                                               uint n_threads) {
   GenRemSet* rs = GenCollectedHeap::heap()->rem_set();
-  rs->younger_refs_in_space_iterate(sp, cl);
+  rs->younger_refs_in_space_iterate(sp, cl, n_threads);
 }
 
 class GenerationObjIterateClosure : public SpaceClosure {
--- a/hotspot/src/share/vm/gc/shared/generation.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/generation.hpp	Thu May 28 11:37:13 2015 -0700
@@ -122,7 +122,7 @@
   // The iteration is only over objects allocated at the start of the
   // iterations; objects allocated as a result of applying the closure are
   // not included.
-  void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl);
+  void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl, uint n_threads);
 
  public:
   // The set of possible generation kinds.
@@ -526,7 +526,7 @@
   // in the current generation that contain pointers to objects in younger
   // generations. Objects allocated since the last "save_marks" call are
   // excluded.
-  virtual void younger_refs_iterate(OopsInGenClosure* cl) = 0;
+  virtual void younger_refs_iterate(OopsInGenClosure* cl, uint n_threads) = 0;
 
   // Inform a generation that it longer contains references to objects
   // in any younger generation.    [e.g. Because younger gens are empty,
--- a/hotspot/src/share/vm/gc/shared/space.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/space.cpp	Thu May 28 11:37:13 2015 -0700
@@ -181,7 +181,8 @@
 
 DirtyCardToOopClosure* Space::new_dcto_cl(ExtendedOopClosure* cl,
                                           CardTableModRefBS::PrecisionStyle precision,
-                                          HeapWord* boundary) {
+                                          HeapWord* boundary,
+                                          bool parallel) {
   return new DirtyCardToOopClosure(this, cl, precision, boundary);
 }
 
@@ -260,7 +261,8 @@
 DirtyCardToOopClosure*
 ContiguousSpace::new_dcto_cl(ExtendedOopClosure* cl,
                              CardTableModRefBS::PrecisionStyle precision,
-                             HeapWord* boundary) {
+                             HeapWord* boundary,
+                             bool parallel) {
   return new ContiguousSpaceDCTOC(this, cl, precision, boundary);
 }
 
--- a/hotspot/src/share/vm/gc/shared/space.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/space.hpp	Thu May 28 11:37:13 2015 -0700
@@ -183,7 +183,8 @@
   // operate. ResourceArea allocated.
   virtual DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
                                              CardTableModRefBS::PrecisionStyle precision,
-                                             HeapWord* boundary = NULL);
+                                             HeapWord* boundary,
+                                             bool parallel);
 
   // If "p" is in the space, returns the address of the start of the
   // "block" that contains "p".  We say "block" instead of "object" since
@@ -629,7 +630,8 @@
   // Override.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
                                      CardTableModRefBS::PrecisionStyle precision,
-                                     HeapWord* boundary = NULL);
+                                     HeapWord* boundary,
+                                     bool parallel);
 
   // Apply "blk->do_oop" to the addresses of all reference fields in objects
   // starting with the _saved_mark_word, which was noted during a generation's
--- a/hotspot/src/share/vm/gc/shared/strongRootsScope.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/strongRootsScope.cpp	Thu May 28 11:37:13 2015 -0700
@@ -28,24 +28,18 @@
 #include "gc/shared/strongRootsScope.hpp"
 #include "runtime/thread.hpp"
 
-MarkScope::MarkScope(bool activate) : _active(activate) {
-  if (_active) {
-    nmethod::oops_do_marking_prologue();
-  }
+MarkScope::MarkScope() {
+  nmethod::oops_do_marking_prologue();
 }
 
 MarkScope::~MarkScope() {
-  if (_active) {
-    nmethod::oops_do_marking_epilogue();
-  }
+  nmethod::oops_do_marking_epilogue();
 }
 
-StrongRootsScope::StrongRootsScope(bool activate) : MarkScope(activate) {
-  if (_active) {
-    Threads::change_thread_claim_parity();
-    // Zero the claimed high water mark in the StringTable
-    StringTable::clear_parallel_claimed_index();
-  }
+StrongRootsScope::StrongRootsScope(uint n_threads) : _n_threads(n_threads) {
+  Threads::change_thread_claim_parity();
+  // Zero the claimed high water mark in the StringTable
+  StringTable::clear_parallel_claimed_index();
 }
 
 StrongRootsScope::~StrongRootsScope() {
--- a/hotspot/src/share/vm/gc/shared/strongRootsScope.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/strongRootsScope.hpp	Thu May 28 11:37:13 2015 -0700
@@ -29,18 +29,21 @@
 
 class MarkScope : public StackObj {
  protected:
-  bool _active;
- public:
-  MarkScope(bool activate = true);
+  MarkScope();
   ~MarkScope();
 };
 
 // Sets up and tears down the required state for parallel root processing.
 
 class StrongRootsScope : public MarkScope {
+  // Number of threads participating in the roots processing.
+  const uint _n_threads;
+
  public:
-  StrongRootsScope(bool activate = true);
+  StrongRootsScope(uint n_threads);
   ~StrongRootsScope();
+
+  uint n_threads() const { return _n_threads; }
 };
 
 #endif // SHARE_VM_GC_SHARED_STRONGROOTSSCOPE_HPP
--- a/hotspot/src/share/vm/gc/shared/taskqueue.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/taskqueue.hpp	Thu May 28 11:37:13 2015 -0700
@@ -382,6 +382,8 @@
   bool steal(uint queue_num, int* seed, E& t);
 
   bool peek();
+
+  uint size() const { return _n; }
 };
 
 template<class T, MEMFLAGS F> void
--- a/hotspot/src/share/vm/gc/shared/workgroup.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/workgroup.cpp	Thu May 28 11:37:13 2015 -0700
@@ -133,8 +133,6 @@
 }
 
 void WorkGang::run_task(AbstractGangTask* task, uint no_of_parallel_workers) {
-  task->set_for_termination(no_of_parallel_workers);
-
   // This thread is executed by the VM thread which does not block
   // on ordinary MutexLocker's.
   MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
@@ -434,7 +432,7 @@
 // SubTasksDone functions.
 
 SubTasksDone::SubTasksDone(uint n) :
-  _n_tasks(n), _n_threads(1), _tasks(NULL) {
+  _n_tasks(n), _tasks(NULL) {
   _tasks = NEW_C_HEAP_ARRAY(uint, n, mtInternal);
   guarantee(_tasks != NULL, "alloc failure");
   clear();
@@ -444,12 +442,6 @@
   return _tasks != NULL;
 }
 
-void SubTasksDone::set_n_threads(uint t) {
-  assert(_claimed == 0 || _threads_completed == _n_threads,
-         "should not be called while tasks are being processed!");
-  _n_threads = (t == 0 ? 1 : t);
-}
-
 void SubTasksDone::clear() {
   for (uint i = 0; i < _n_tasks; i++) {
     _tasks[i] = 0;
@@ -477,7 +469,7 @@
   return res;
 }
 
-void SubTasksDone::all_tasks_completed() {
+void SubTasksDone::all_tasks_completed(uint n_threads) {
   jint observed = _threads_completed;
   jint old;
   do {
@@ -485,7 +477,10 @@
     observed = Atomic::cmpxchg(old+1, &_threads_completed, old);
   } while (observed != old);
   // If this was the last thread checking in, clear the tasks.
-  if (observed+1 == (jint)_n_threads) clear();
+  uint adjusted_thread_count = (n_threads == 0 ? 1 : n_threads);
+  if (observed + 1 == (jint)adjusted_thread_count) {
+    clear();
+  }
 }
 
 
--- a/hotspot/src/share/vm/gc/shared/workgroup.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/gc/shared/workgroup.hpp	Thu May 28 11:37:13 2015 -0700
@@ -59,13 +59,6 @@
   // The argument tells you which member of the gang you are.
   virtual void work(uint worker_id) = 0;
 
-  // This method configures the task for proper termination.
-  // Some tasks do not have any requirements on termination
-  // and may inherit this method that does nothing.  Some
-  // tasks do some coordination on termination and override
-  // this method to implement that coordination.
-  virtual void set_for_termination(uint active_workers) {};
-
   // Debugging accessor for the name.
   const char* name() const PRODUCT_RETURN_(return NULL;);
   int counter() { return _counter; }
@@ -99,12 +92,9 @@
   OopTaskQueueSet*       _queues;
   ParallelTaskTerminator _terminator;
  public:
-  AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues) :
-    AbstractGangTask(name), _queues(queues), _terminator(0, _queues) {}
+  AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues, uint n_threads) :
+    AbstractGangTask(name), _queues(queues), _terminator(n_threads, _queues) {}
   ParallelTaskTerminator* terminator() { return &_terminator; }
-  virtual void set_for_termination(uint active_workers) {
-    terminator()->reset_for_reuse(active_workers);
-  }
   OopTaskQueueSet* queues() { return _queues; }
 };
 
@@ -315,16 +305,20 @@
   uint _active_workers;
  public:
   // Constructor and destructor.
-  // Initialize active_workers to a minimum value.  Setting it to
-  // the parameter "workers" will initialize it to a maximum
-  // value which is not desirable.
   FlexibleWorkGang(const char* name, uint workers,
                    bool are_GC_task_threads,
                    bool  are_ConcurrentGC_threads) :
     WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads),
-    _active_workers(UseDynamicNumberOfGCThreads ? 1U : ParallelGCThreads) {}
-  // Accessors for fields
-  virtual uint active_workers() const { return _active_workers; }
+    _active_workers(UseDynamicNumberOfGCThreads ? 1U : workers) {}
+
+  // Accessors for fields.
+  virtual uint active_workers() const {
+    assert(_active_workers <= _total_workers,
+           err_msg("_active_workers: %u > _total_workers: %u", _active_workers, _total_workers));
+    assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers,
+           "Unless dynamic should use total workers");
+    return _active_workers;
+  }
   void set_active_workers(uint v) {
     assert(v <= _total_workers,
            "Trying to set more workers active than there are");
@@ -390,12 +384,6 @@
 class SubTasksDone: public CHeapObj<mtInternal> {
   uint* _tasks;
   uint _n_tasks;
-  // _n_threads is used to determine when a sub task is done.
-  // It does not control how many threads will execute the subtask
-  // but must be initialized to the number that do execute the task
-  // in order to correctly decide when the subtask is done (all the
-  // threads working on the task have finished).
-  uint _n_threads;
   uint _threads_completed;
 #ifdef ASSERT
   volatile uint _claimed;
@@ -413,11 +401,6 @@
   // True iff the object is in a valid state.
   bool valid();
 
-  // Get/set the number of parallel threads doing the tasks to "t".  Can only
-  // be called before tasks start or after they are complete.
-  uint n_threads() { return _n_threads; }
-  void set_n_threads(uint t);
-
   // Returns "false" if the task "t" is unclaimed, and ensures that task is
   // claimed.  The task "t" is required to be within the range of "this".
   bool is_task_claimed(uint t);
@@ -426,7 +409,9 @@
   // tasks that it will try to claim.  Every thread in the parallel task
   // must execute this.  (When the last thread does so, the task array is
   // cleared.)
-  void all_tasks_completed();
+  //
+  // n_threads - Number of threads executing the sub-tasks.
+  void all_tasks_completed(uint n_threads);
 
   // Destructor.
   ~SubTasksDone();
--- a/hotspot/src/share/vm/memory/iterator.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/memory/iterator.hpp	Thu May 28 11:37:13 2015 -0700
@@ -381,9 +381,4 @@
   template <class OopClosureType>             static bool do_metadata(OopClosureType* closure);
 };
 
-// Helper to convert the oop iterate macro suffixes into bool values that can be used by template functions.
-#define nvs_nv_to_bool true
-#define nvs_v_to_bool  false
-#define nvs_to_bool(nv_suffix) nvs##nv_suffix##_to_bool
-
 #endif // SHARE_VM_MEMORY_ITERATOR_HPP
--- a/hotspot/src/share/vm/oops/arrayKlass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/arrayKlass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -144,4 +144,36 @@
   void oop_verify_on(oop obj, outputStream* st);
 };
 
+// Array oop iteration macros for declarations.
+// Used to generate the declarations in the *ArrayKlass header files.
+
+#define OOP_OOP_ITERATE_DECL_RANGE(OopClosureType, nv_suffix)                                  \
+  int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end);
+
+#if INCLUDE_ALL_GCS
+// Named NO_BACKWARDS because the definition used by *ArrayKlass isn't reversed, see below.
+#define OOP_OOP_ITERATE_DECL_NO_BACKWARDS(OopClosureType, nv_suffix)           \
+  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
+#endif // INCLUDE_ALL_GCS
+
+
+// Array oop iteration macros for definitions.
+// Used to generate the definitions in the *ArrayKlass.inline.hpp files.
+
+#define OOP_OOP_ITERATE_DEFN_RANGE(KlassType, OopClosureType, nv_suffix)                                 \
+                                                                                                         \
+int KlassType::oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end) {  \
+  return oop_oop_iterate_range<nvs_to_bool(nv_suffix)>(obj, closure, start, end);                        \
+}
+
+#if INCLUDE_ALL_GCS
+#define OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(KlassType, OopClosureType, nv_suffix)          \
+int KlassType::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
+  /* No reverse implementation ATM. */                                                   \
+  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                          \
+}
+#else
+#define OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(KlassType, OopClosureType, nv_suffix)
+#endif
+
 #endif // SHARE_VM_OOPS_ARRAYKLASS_HPP
--- a/hotspot/src/share/vm/oops/instanceClassLoaderKlass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceClassLoaderKlass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -87,19 +87,12 @@
 
  public:
 
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)   \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);                    \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)  \
-  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
 };
--- a/hotspot/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp	Thu May 28 11:37:13 2015 -0700
@@ -78,33 +78,9 @@
   return size;
 }
 
-
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                              \
-int InstanceClassLoaderKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                               \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-                                                                                                        \
-int InstanceClassLoaderKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                                 \
-}
-#else
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                                \
-int InstanceClassLoaderKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                                     \
-}
-
 #define ALL_INSTANCE_CLASS_LOADER_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)     \
-  InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)     \
-  InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+  OOP_OOP_ITERATE_DEFN(          InstanceClassLoaderKlass, OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceClassLoaderKlass, OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceClassLoaderKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCECLASSLOADERKLASS_INLINE_HPP
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -1084,19 +1084,12 @@
 
  public:
 
-#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                   \
-  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);                    \
-  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)  \
-  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   u2 idnum_allocated_count() const      { return _idnum_allocated_count; }
--- a/hotspot/src/share/vm/oops/instanceKlass.inline.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.inline.hpp	Thu May 28 11:37:13 2015 -0700
@@ -27,6 +27,7 @@
 
 #include "memory/iterator.hpp"
 #include "oops/instanceKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -187,29 +188,9 @@
 
 #undef INLINE
 
-
-#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-int InstanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                    \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-int InstanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                      \
-}
-#else
-#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-#define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-int InstanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                          \
-}
-
 #define ALL_INSTANCE_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)   \
-  InstanceKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)   \
-  InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+  OOP_OOP_ITERATE_DEFN(          InstanceKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCEKLASS_INLINE_HPP
--- a/hotspot/src/share/vm/oops/instanceMirrorKlass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceMirrorKlass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -149,19 +149,12 @@
 
  public:
 
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)           \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);                       \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceMirrorKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceMirrorKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \
-  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 };
 
--- a/hotspot/src/share/vm/oops/instanceMirrorKlass.inline.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceMirrorKlass.inline.hpp	Thu May 28 11:37:13 2015 -0700
@@ -27,6 +27,7 @@
 #include "classfile/javaClasses.hpp"
 #include "oops/instanceKlass.inline.hpp"
 #include "oops/instanceMirrorKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -132,33 +133,9 @@
   return oop_size(obj);
 }
 
-
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                         \
-int InstanceMirrorKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                          \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-                                                                                                   \
-int InstanceMirrorKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                            \
-}
-#else
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                           \
-int InstanceMirrorKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                                \
-}
-
 #define ALL_INSTANCE_MIRROR_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+  OOP_OOP_ITERATE_DEFN(          InstanceMirrorKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceMirrorKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceMirrorKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCEMIRRORKLASS_INLINE_HPP
--- a/hotspot/src/share/vm/oops/instanceRefKlass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -119,19 +119,12 @@
 
  public:
 
-#define InstanceRefKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)               \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);                    \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)     \
-  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock);
--- a/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp	Thu May 28 11:37:13 2015 -0700
@@ -141,34 +141,9 @@
 
 // Macro to define InstanceRefKlass::oop_oop_iterate for virtual/nonvirtual for
 // all closures.  Macros calling macros above for each oop size.
-
-#define InstanceRefKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                      \
-int InstanceRefKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                       \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-                                                                                                \
-int InstanceRefKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                         \
-}
-#else
-#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                        \
-int InstanceRefKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                             \
-}
-
 #define ALL_INSTANCE_REF_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceRefKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-
+  OOP_OOP_ITERATE_DEFN(          InstanceRefKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceRefKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceRefKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCEREFKLASS_INLINE_HPP
--- a/hotspot/src/share/vm/oops/klass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/klass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -583,20 +583,20 @@
 
   // Iterators specialized to particular subtypes
   // of ExtendedOopClosure, to avoid closure virtual calls.
-#define Klass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                                      \
-  virtual int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) = 0;                    \
-  /* Iterates "closure" over all the oops in "obj" (of type "this") within "mr". */                \
-  virtual int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) = 0;
+#define Klass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                                          \
+  virtual int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) = 0;                        \
+  /* Iterates "closure" over all the oops in "obj" (of type "this") within "mr". */                    \
+  virtual int oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr) = 0;
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL)
   ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)                    \
+#define Klass_OOP_OOP_ITERATE_DECL_BACKWARDS(OopClosureType, nv_suffix)                    \
   virtual int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) = 0;
 
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   virtual void array_klasses_do(void f(Klass* k)) {}
@@ -651,4 +651,44 @@
   void klass_update_barrier_set_pre(oop* p, oop v);
 };
 
+// Helper to convert the oop iterate macro suffixes into bool values that can be used by template functions.
+#define nvs_nv_to_bool true
+#define nvs_v_to_bool  false
+#define nvs_to_bool(nv_suffix) nvs##nv_suffix##_to_bool
+
+// Oop iteration macros for declarations.
+// Used to generate declarations in the *Klass header files.
+
+#define OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                                    \
+  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);                        \
+  int oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr);
+
+#if INCLUDE_ALL_GCS
+#define OOP_OOP_ITERATE_DECL_BACKWARDS(OopClosureType, nv_suffix)              \
+  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
+#endif // INCLUDE_ALL_GCS
+
+
+// Oop iteration macros for definitions.
+// Used to generate definitions in the *Klass.inline.hpp files.
+
+#define OOP_OOP_ITERATE_DEFN(KlassType, OopClosureType, nv_suffix)             \
+int KlassType::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
+  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                \
+}
+
+#if INCLUDE_ALL_GCS
+#define OOP_OOP_ITERATE_DEFN_BACKWARDS(KlassType, OopClosureType, nv_suffix)             \
+int KlassType::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
+  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                  \
+}
+#else
+#define OOP_OOP_ITERATE_DEFN_BACKWARDS(KlassType, OopClosureType, nv_suffix)
+#endif
+
+#define OOP_OOP_ITERATE_DEFN_BOUNDED(KlassType, OopClosureType, nv_suffix)                           \
+int KlassType::oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr) {  \
+  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                          \
+}
+
 #endif // SHARE_VM_OOPS_KLASS_HPP
--- a/hotspot/src/share/vm/oops/objArrayKlass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/objArrayKlass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -163,22 +163,14 @@
 
  public:
 
-#define ObjArrayKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)   \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);         \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,      \
-                                     MemRegion mr);                     \
-  int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* blk,    \
-                                     int start, int end);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_RANGE)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_RANGE)
 
 #if INCLUDE_ALL_GCS
-#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \
-  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   // JVM support
--- a/hotspot/src/share/vm/oops/objArrayKlass.inline.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/objArrayKlass.inline.hpp	Thu May 28 11:37:13 2015 -0700
@@ -27,6 +27,8 @@
 
 #include "memory/memRegion.hpp"
 #include "memory/iterator.inline.hpp"
+#include "oops/arrayKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "oops/objArrayOop.inline.hpp"
 #include "oops/oop.inline.hpp"
@@ -149,41 +151,10 @@
   return size;
 }
 
-
-#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                   \
-int ObjArrayKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                    \
-}
-
-#if INCLUDE_ALL_GCS
-#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-int ObjArrayKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  /* No reverse implementation ATM. */                                                       \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                              \
-}
-#else
-#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                     \
-int ObjArrayKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                          \
-}
-
-#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r(OopClosureType, nv_suffix)                                      \
-                                                                                                             \
-int ObjArrayKlass::oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end) {  \
-  return oop_oop_iterate_range<nvs_to_bool(nv_suffix)>(obj, closure, start, end);                            \
-}
-
-
-#define ALL_OBJ_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  ObjArrayKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)    \
-  ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r(        OopClosureType, nv_suffix)
-
+#define ALL_OBJ_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN(             ObjArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(     ObjArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_RANGE(       ObjArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(ObjArrayKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_OBJARRAYKLASS_INLINE_HPP
--- a/hotspot/src/share/vm/oops/oop.inline.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp	Thu May 28 11:37:13 2015 -0700
@@ -741,7 +741,7 @@
 }                                                                     \
                                                                       \
 inline int oopDesc::oop_iterate(OopClosureType* blk, MemRegion mr) {  \
-  return klass()->oop_oop_iterate##nv_suffix##_m(this, blk, mr);      \
+  return klass()->oop_oop_iterate_bounded##nv_suffix(this, blk, mr);  \
 }
 
 
--- a/hotspot/src/share/vm/oops/typeArrayKlass.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/typeArrayKlass.hpp	Thu May 28 11:37:13 2015 -0700
@@ -92,24 +92,24 @@
   // The implementation used by all oop_oop_iterate functions in TypeArrayKlasses.
   inline int oop_oop_iterate_impl(oop obj, ExtendedOopClosure* closure);
 
+  // Wraps oop_oop_iterate_impl to conform to macros.
+  template <bool nv, typename OopClosureType>
+  inline int oop_oop_iterate(oop obj, OopClosureType* closure);
+
+  // Wraps oop_oop_iterate_impl to conform to macros.
+  template <bool nv, typename OopClosureType>
+  inline int oop_oop_iterate_bounded(oop obj, OopClosureType* closure, MemRegion mr);
+
  public:
 
-#define TypeArrayKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)    \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);       \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure,    \
-                                     MemRegion mr);                       \
-  int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure,  \
-                                     int start, int end);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(TypeArrayKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(TypeArrayKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_RANGE)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_RANGE)
 
 #if INCLUDE_ALL_GCS
-#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)  \
-  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
 
--- a/hotspot/src/share/vm/oops/typeArrayKlass.inline.hpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/oops/typeArrayKlass.inline.hpp	Thu May 28 11:37:13 2015 -0700
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP
 #define SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP
 
+#include "oops/arrayKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/typeArrayKlass.hpp"
 #include "oops/typeArrayOop.hpp"
@@ -39,35 +41,19 @@
   return t->object_size();
 }
 
-#define TypeArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-                                                                        \
-int TypeArrayKlass::                                                    \
-oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {          \
-  return oop_oop_iterate_impl(obj, closure);                            \
+template <bool nv, typename OopClosureType>
+int TypeArrayKlass::oop_oop_iterate(oop obj, OopClosureType* closure) {
+  return oop_oop_iterate_impl(obj, closure);
 }
 
-#if INCLUDE_ALL_GCS
-#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)  \
-                                                                                  \
-int TypeArrayKlass::                                                              \
-oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {          \
-  return oop_oop_iterate_impl(obj, closure);                                      \
-}
-#else
-#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define TypeArrayKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)          \
-                                                                                  \
-int TypeArrayKlass::                                                              \
-oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_impl(obj, closure);                                      \
+template <bool nv, typename OopClosureType>
+int TypeArrayKlass::oop_oop_iterate_bounded(oop obj, OopClosureType* closure, MemRegion mr) {
+  return oop_oop_iterate_impl(obj, closure);
 }
 
-#define ALL_TYPE_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  TypeArrayKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  TypeArrayKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+#define ALL_TYPE_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN(             TypeArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(     TypeArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(TypeArrayKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Thu May 28 11:37:13 2015 -0700
@@ -1278,10 +1278,8 @@
 
   // Preferred young gen size for "short" pauses:
   // upper bound depends on # of threads and NewRatio.
-  const uintx parallel_gc_threads =
-    (ParallelGCThreads == 0 ? 1 : ParallelGCThreads);
   const size_t preferred_max_new_size_unaligned =
-    MIN2(max_heap/(NewRatio+1), ScaleForWordSize(young_gen_per_worker * parallel_gc_threads));
+    MIN2(max_heap/(NewRatio+1), ScaleForWordSize(young_gen_per_worker * ParallelGCThreads));
   size_t preferred_max_new_size =
     align_size_up(preferred_max_new_size_unaligned, os::vm_page_size());
 
--- a/hotspot/src/share/vm/utilities/elfFile.cpp	Tue May 26 09:22:38 2015 -0700
+++ b/hotspot/src/share/vm/utilities/elfFile.cpp	Thu May 28 11:37:13 2015 -0700
@@ -261,7 +261,12 @@
       }
     }
   }
+// AARCH64 defaults to noexecstack. All others default to execstack.
+#ifdef AARCH64
+  return true;
+#else
   return false;
+#endif
 }
 #endif
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/serviceability/sa/TestClassLoaderStats.java	Thu May 28 11:37:13 2015 -0700
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import jdk.test.lib.Platform;
+import jdk.test.lib.ProcessTools;
+import jdk.test.lib.OutputAnalyzer;
+
+/*
+ * @test
+ * @library /testlibrary
+ * @build jdk.test.lib.*
+ * @run main TestClassLoaderStats
+ */
+public class TestClassLoaderStats {
+
+    public static void main(String[] args) throws Exception {
+        if (!Platform.shouldSAAttach()) {
+            System.out.println("SA attach not expected to work - test skipped.");
+            return;
+        }
+
+        ProcessBuilder processBuilder = ProcessTools.createJavaProcessBuilder(
+                "-XX:+UsePerfData",
+                "sun.jvm.hotspot.tools.ClassLoaderStats",
+                Integer.toString(ProcessTools.getProcessId()));
+        OutputAnalyzer output = ProcessTools.executeProcess(processBuilder);
+        System.out.println(output.getOutput());
+
+        output.shouldHaveExitValue(0);
+        output.shouldContain("Debugger attached successfully.");
+        // The class loader stats header needs to be presented in the output:
+        output.shouldMatch("class_loader\\W+classes\\W+bytes\\W+parent_loader\\W+alive?\\W+type");
+        output.stderrShouldNotMatch("[E|e]xception");
+        output.stderrShouldNotMatch("[E|e]rror");
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/serviceability/sa/TestStackTrace.java	Thu May 28 11:37:13 2015 -0700
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import jdk.test.lib.OutputAnalyzer;
+import jdk.test.lib.Platform;
+import jdk.test.lib.ProcessTools;
+
+/*
+ * @test
+ * @library /testlibrary
+ * @build jdk.test.lib.*
+ * @run main TestStackTrace
+ */
+public class TestStackTrace {
+
+    public static void main(String[] args) throws Exception {
+        if (!Platform.shouldSAAttach()) {
+            System.out.println("SA attach not expected to work - test skipped.");
+            return;
+        }
+
+        ProcessBuilder processBuilder = ProcessTools.createJavaProcessBuilder(
+                "-XX:+UsePerfData",
+                "sun.jvm.hotspot.tools.StackTrace",
+                Integer.toString(ProcessTools.getProcessId()));
+        OutputAnalyzer output = ProcessTools.executeProcess(processBuilder);
+        System.out.println(output.getOutput());
+
+        output.shouldHaveExitValue(0);
+        output.shouldContain("Debugger attached successfully.");
+        output.stderrShouldNotMatch("[E|e]xception");
+        output.stderrShouldNotMatch("[E|e]rror");
+     }
+
+}