Merge
authorjwilhelm
Mon, 04 May 2015 17:10:50 +0200
changeset 30579 5208524ce05c
parent 30314 3d55863a66ba (current diff)
parent 30578 8b6c44532ca2 (diff)
child 30580 394471b86a83
Merge
hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.hpp
hotspot/src/share/vm/memory/metaspaceShared.hpp
hotspot/src/share/vm/memory/referenceProcessor.cpp
hotspot/test/Makefile
hotspot/test/TEST.groups
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
-#define SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+#ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_ADAPTIVEFREELIST_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_ADAPTIVEFREELIST_HPP
 
 #include "memory/freeList.hpp"
 #include "gc_implementation/shared/allocationStats.hpp"
@@ -226,4 +226,4 @@
 #endif  // NOT PRODUCT
 };
 
-#endif // SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+#endif // SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_ADAPTIVEFREELIST_HPP
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -28,6 +28,7 @@
 #include "gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp"
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp"
 #include "oops/oop.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 // Trim our work_queue so its length is below max at return
 inline void Par_MarkRefsIntoAndScanClosure::trim_queue(uint max) {
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon May 04 17:10:50 2015 +0200
@@ -66,6 +66,7 @@
 #include "services/memoryService.hpp"
 #include "services/runtimeService.hpp"
 #include "utilities/stack.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 // statics
 CMSCollector* ConcurrentMarkSweepGeneration::_collector = NULL;
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon May 04 17:10:50 2015 +0200
@@ -54,6 +54,7 @@
 #include "runtime/atomic.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 #include "services/memTracker.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 // Concurrent marking bit map wrapper
 
@@ -2551,31 +2552,50 @@
   _nextMarkBitMap  = (CMBitMap*)  temp;
 }
 
-class CMObjectClosure;
-
-// Closure for iterating over objects, currently only used for
-// processing SATB buffers.
-class CMObjectClosure : public ObjectClosure {
+// Closure for marking entries in SATB buffers.
+class CMSATBBufferClosure : public SATBBufferClosure {
 private:
   CMTask* _task;
+  G1CollectedHeap* _g1h;
+
+  // This is very similar to CMTask::deal_with_reference, but with
+  // more relaxed requirements for the argument, so this must be more
+  // circumspect about treating the argument as an object.
+  void do_entry(void* entry) const {
+    _task->increment_refs_reached();
+    HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
+    if (entry < hr->next_top_at_mark_start()) {
+      // Until we get here, we don't know whether entry refers to a valid
+      // object; it could instead have been a stale reference.
+      oop obj = static_cast<oop>(entry);
+      assert(obj->is_oop(true /* ignore mark word */),
+             err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
+      _task->make_reference_grey(obj, hr);
+    }
+  }
 
 public:
-  void do_object(oop obj) {
-    _task->deal_with_reference(obj);
+  CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
+    : _task(task), _g1h(g1h) { }
+
+  virtual void do_buffer(void** buffer, size_t size) {
+    for (size_t i = 0; i < size; ++i) {
+      do_entry(buffer[i]);
+    }
   }
-
-  CMObjectClosure(CMTask* task) : _task(task) { }
 };
 
 class G1RemarkThreadsClosure : public ThreadClosure {
-  CMObjectClosure _cm_obj;
+  CMSATBBufferClosure _cm_satb_cl;
   G1CMOopClosure _cm_cl;
   MarkingCodeBlobClosure _code_cl;
   int _thread_parity;
 
  public:
   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
-    _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
+    _cm_satb_cl(task, g1h),
+    _cm_cl(g1h, g1h->concurrent_mark(), task),
+    _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
     _thread_parity(Threads::thread_claim_parity()) {}
 
   void do_thread(Thread* thread) {
@@ -2591,11 +2611,11 @@
         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
         jt->nmethods_do(&_code_cl);
 
-        jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
+        jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
       }
     } else if (thread->is_VM_thread()) {
       if (thread->claim_oops_do(true, _thread_parity)) {
-        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
+        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
       }
     }
   }
@@ -3693,13 +3713,13 @@
   // very counter productive if it did that. :-)
   _draining_satb_buffers = true;
 
-  CMObjectClosure oc(this);
+  CMSATBBufferClosure satb_cl(this, _g1h);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 
   // This keeps claiming and applying the closure to completed buffers
   // until we run out of buffers or we need to abort.
   while (!has_aborted() &&
-         satb_mq_set.apply_closure_to_completed_buffer(&oc)) {
+         satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
     if (_cm->verbose_medium()) {
       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
     }
@@ -3758,6 +3778,10 @@
 #endif // _MARKING_STATS_
 }
 
+bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
+  return _task_queues->steal(worker_id, hash_seed, obj);
+}
+
 /*****************************************************************************
 
     The do_marking_step(time_target_ms, ...) method is the building
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon May 04 17:10:50 2015 +0200
@@ -139,6 +139,11 @@
   static size_t compute_size(size_t heap_size);
   // Returns the amount of bytes on the heap between two marks in the bitmap.
   static size_t mark_distance();
+  // Returns how many bytes (or bits) of the heap a single byte (or bit) of the
+  // mark bitmap corresponds to. This is the same as the mark distance above.
+  static size_t heap_map_factor() {
+    return mark_distance();
+  }
 
   CMBitMap() : CMBitMapRO(LogMinObjAlignment), _listener() { _listener.set_bitmap(this); }
 
@@ -671,9 +676,7 @@
   }
 
   // Attempts to steal an object from the task queues of other tasks
-  bool try_stealing(uint worker_id, int* hash_seed, oop& obj) {
-    return _task_queues->steal(worker_id, hash_seed, obj);
-  }
+  bool try_stealing(uint worker_id, int* hash_seed, oop& obj);
 
   ConcurrentMark(G1CollectedHeap* g1h,
                  G1RegionToSpaceMapper* prev_bitmap_storage,
@@ -1095,9 +1098,9 @@
   void regular_clock_call();
   bool concurrent() { return _concurrent; }
 
-  // Test whether objAddr might have already been passed over by the
+  // Test whether obj might have already been passed over by the
   // mark bitmap scan, and so needs to be pushed onto the mark stack.
-  bool is_below_finger(HeapWord* objAddr, HeapWord* global_finger) const;
+  bool is_below_finger(oop obj, HeapWord* global_finger) const;
 
   template<bool scan> void process_grey_object(oop obj);
 
@@ -1148,8 +1151,18 @@
 
   void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
 
-  // It grays the object by marking it and, if necessary, pushing it
-  // on the local queue
+  // Increment the number of references this task has visited.
+  void increment_refs_reached() { ++_refs_reached; }
+
+  // Grey the object by marking it.  If not already marked, push it on
+  // the local queue if below the finger.
+  // Precondition: obj is in region.
+  // Precondition: obj is below region's NTAMS.
+  inline void make_reference_grey(oop obj, HeapRegion* region);
+
+  // Grey the object (by calling make_grey_reference) if required,
+  // e.g. obj is below its containing region's NTAMS.
+  // Precondition: obj is a valid heap object.
   inline void deal_with_reference(oop obj);
 
   // It scans an object and visits its children.
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -27,6 +27,7 @@
 
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 // Utility routine to set an exclusive range of cards on the given
 // card liveness bitmap
@@ -259,15 +260,15 @@
              ++_local_pushes );
 }
 
-inline bool CMTask::is_below_finger(HeapWord* objAddr,
-                                    HeapWord* global_finger) const {
-  // If objAddr is above the global finger, then the mark bitmap scan
+inline bool CMTask::is_below_finger(oop obj, HeapWord* global_finger) const {
+  // If obj is above the global finger, then the mark bitmap scan
   // will find it later, and no push is needed.  Similarly, if we have
-  // a current region and objAddr is between the local finger and the
+  // a current region and obj is between the local finger and the
   // end of the current region, then no push is needed.  The tradeoff
   // of checking both vs only checking the global finger is that the
   // local check will be more accurate and so result in fewer pushes,
   // but may also be a little slower.
+  HeapWord* objAddr = (HeapWord*)obj;
   if (_finger != NULL) {
     // We have a current region.
 
@@ -277,7 +278,7 @@
     assert(_region_limit != NULL, "invariant");
     assert(_region_limit <= global_finger, "invariant");
 
-    // True if objAddr is less than the local finger, or is between
+    // True if obj is less than the local finger, or is between
     // the region limit and the global finger.
     if (objAddr < _finger) {
       return true;
@@ -289,13 +290,65 @@
   return objAddr < global_finger;
 }
 
+inline void CMTask::make_reference_grey(oop obj, HeapRegion* hr) {
+  if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
+
+    if (_cm->verbose_high()) {
+      gclog_or_tty->print_cr("[%u] marked object " PTR_FORMAT,
+                             _worker_id, p2i(obj));
+    }
+
+    // No OrderAccess:store_load() is needed. It is implicit in the
+    // CAS done in CMBitMap::parMark() call in the routine above.
+    HeapWord* global_finger = _cm->finger();
+
+    // We only need to push a newly grey object on the mark
+    // stack if it is in a section of memory the mark bitmap
+    // scan has already examined.  Mark bitmap scanning
+    // maintains progress "fingers" for determining that.
+    //
+    // Notice that the global finger might be moving forward
+    // concurrently. This is not a problem. In the worst case, we
+    // mark the object while it is above the global finger and, by
+    // the time we read the global finger, it has moved forward
+    // past this object. In this case, the object will probably
+    // be visited when a task is scanning the region and will also
+    // be pushed on the stack. So, some duplicate work, but no
+    // correctness problems.
+    if (is_below_finger(obj, global_finger)) {
+      if (obj->is_typeArray()) {
+        // Immediately process arrays of primitive types, rather
+        // than pushing on the mark stack.  This keeps us from
+        // adding humongous objects to the mark stack that might
+        // be reclaimed before the entry is processed - see
+        // selection of candidates for eager reclaim of humongous
+        // objects.  The cost of the additional type test is
+        // mitigated by avoiding a trip through the mark stack,
+        // by only doing a bookkeeping update and avoiding the
+        // actual scan of the object - a typeArray contains no
+        // references, and the metadata is built-in.
+        process_grey_object<false>(obj);
+      } else {
+        if (_cm->verbose_high()) {
+          gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT
+                                 ", global: " PTR_FORMAT ") pushing "
+                                 PTR_FORMAT " on mark stack",
+                                 _worker_id, p2i(_finger),
+                                 p2i(global_finger), p2i(obj));
+        }
+        push(obj);
+      }
+    }
+  }
+}
+
 inline void CMTask::deal_with_reference(oop obj) {
   if (_cm->verbose_high()) {
     gclog_or_tty->print_cr("[%u] we're dealing with reference = "PTR_FORMAT,
                            _worker_id, p2i((void*) obj));
   }
 
-  ++_refs_reached;
+  increment_refs_reached();
 
   HeapWord* objAddr = (HeapWord*) obj;
   assert(obj->is_oop_or_null(true /* ignore mark word */), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(obj)));
@@ -307,55 +360,7 @@
       // anything with it).
       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
       if (!hr->obj_allocated_since_next_marking(obj)) {
-        if (_cm->verbose_high()) {
-          gclog_or_tty->print_cr("[%u] "PTR_FORMAT" is not considered marked",
-                                 _worker_id, p2i((void*) obj));
-        }
-
-        // we need to mark it first
-        if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
-          // No OrderAccess:store_load() is needed. It is implicit in the
-          // CAS done in CMBitMap::parMark() call in the routine above.
-          HeapWord* global_finger = _cm->finger();
-
-          // We only need to push a newly grey object on the mark
-          // stack if it is in a section of memory the mark bitmap
-          // scan has already examined.  Mark bitmap scanning
-          // maintains progress "fingers" for determining that.
-          //
-          // Notice that the global finger might be moving forward
-          // concurrently. This is not a problem. In the worst case, we
-          // mark the object while it is above the global finger and, by
-          // the time we read the global finger, it has moved forward
-          // past this object. In this case, the object will probably
-          // be visited when a task is scanning the region and will also
-          // be pushed on the stack. So, some duplicate work, but no
-          // correctness problems.
-          if (is_below_finger(objAddr, global_finger)) {
-            if (obj->is_typeArray()) {
-              // Immediately process arrays of primitive types, rather
-              // than pushing on the mark stack.  This keeps us from
-              // adding humongous objects to the mark stack that might
-              // be reclaimed before the entry is processed - see
-              // selection of candidates for eager reclaim of humongous
-              // objects.  The cost of the additional type test is
-              // mitigated by avoiding a trip through the mark stack,
-              // by only doing a bookkeeping update and avoiding the
-              // actual scan of the object - a typeArray contains no
-              // references, and the metadata is built-in.
-              process_grey_object<false>(obj);
-            } else {
-              if (_cm->verbose_high()) {
-                gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT
-                                       ", global: " PTR_FORMAT ") pushing "
-                                       PTR_FORMAT " on mark stack",
-                                       _worker_id, p2i(_finger),
-                                       p2i(global_finger), p2i(objAddr));
-              }
-              push(obj);
-            }
-          }
-        }
+        make_reference_grey(obj, hr);
       }
     }
   }
--- a/hotspot/src/share/vm/gc_implementation/g1/evacuationInfo.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/evacuationInfo.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_EVACUATIONINFO_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_SHARED_EVACUATIONINFO_HPP
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_EVACUATIONINFO_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_EVACUATIONINFO_HPP
 
 #include "memory/allocation.hpp"
 
@@ -78,4 +78,4 @@
   uint   regions_freed()             { return _regions_freed; }
 };
 
-#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_EVACUATIONINFO_HPP
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_EVACUATIONINFO_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp	Mon May 04 17:10:50 2015 +0200
@@ -83,7 +83,7 @@
                             &_retained_old_gc_alloc_region);
 }
 
-void G1DefaultAllocator::release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info) {
+void G1DefaultAllocator::release_gc_alloc_regions(EvacuationInfo& evacuation_info) {
   AllocationContext_t context = AllocationContext::current();
   evacuation_info.set_allocation_regions(survivor_gc_alloc_region(context)->count() +
                                          old_gc_alloc_region(context)->count());
@@ -99,8 +99,8 @@
   }
 
   if (ResizePLAB) {
-    _g1h->alloc_buffer_stats(InCSetState::Young)->adjust_desired_plab_sz(no_of_gc_workers);
-    _g1h->alloc_buffer_stats(InCSetState::Old)->adjust_desired_plab_sz(no_of_gc_workers);
+    _g1h->alloc_buffer_stats(InCSetState::Young)->adjust_desired_plab_sz();
+    _g1h->alloc_buffer_stats(InCSetState::Old)->adjust_desired_plab_sz();
   }
 }
 
@@ -119,7 +119,6 @@
   size_t gclab_word_size = _g1h->desired_plab_sz(dest);
   if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
     G1PLAB* alloc_buf = alloc_buffer(dest, context);
-    add_to_alloc_buffer_waste(alloc_buf->words_remaining());
     alloc_buf->retire();
 
     HeapWord* buf = _g1h->par_allocate_during_gc(dest, gclab_word_size, context);
@@ -153,8 +152,19 @@
   for (uint state = 0; state < InCSetState::Num; state++) {
     G1PLAB* const buf = _alloc_buffers[state];
     if (buf != NULL) {
-      add_to_alloc_buffer_waste(buf->words_remaining());
       buf->flush_and_retire_stats(_g1h->alloc_buffer_stats(state));
     }
   }
 }
+
+void G1DefaultParGCAllocator::waste(size_t& wasted, size_t& undo_wasted) {
+  wasted = 0;
+  undo_wasted = 0;
+  for (uint state = 0; state < InCSetState::Num; state++) {
+    G1PLAB * const buf = _alloc_buffers[state];
+    if (buf != NULL) {
+      wasted += buf->waste();
+      undo_wasted += buf->undo_waste();
+    }
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp	Mon May 04 17:10:50 2015 +0200
@@ -53,7 +53,7 @@
    virtual void release_mutator_alloc_region() = 0;
 
    virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
-   virtual void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info) = 0;
+   virtual void release_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
    virtual void abandon_gc_alloc_regions() = 0;
 
    virtual MutatorAllocRegion*    mutator_alloc_region(AllocationContext_t context) = 0;
@@ -114,7 +114,7 @@
   virtual void release_mutator_alloc_region();
 
   virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info);
-  virtual void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info);
+  virtual void release_gc_alloc_regions(EvacuationInfo& evacuation_info);
   virtual void abandon_gc_alloc_regions();
 
   virtual bool is_retained_old_region(HeapRegion* hr) {
@@ -188,12 +188,6 @@
   // architectures have a special compare against zero instructions.
   const uint _survivor_alignment_bytes;
 
-  size_t _alloc_buffer_waste;
-  size_t _undo_waste;
-
-  void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
-  void add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
-
   virtual void retire_alloc_buffers() = 0;
   virtual G1PLAB* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0;
 
@@ -213,15 +207,12 @@
 
 public:
   G1ParGCAllocator(G1CollectedHeap* g1h) :
-    _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()),
-    _alloc_buffer_waste(0), _undo_waste(0) {
-  }
+    _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()) { }
   virtual ~G1ParGCAllocator() { }
 
   static G1ParGCAllocator* create_allocator(G1CollectedHeap* g1h);
 
-  size_t alloc_buffer_waste() { return _alloc_buffer_waste; }
-  size_t undo_waste() {return _undo_waste; }
+  virtual void waste(size_t& wasted, size_t& undo_wasted) = 0;
 
   // Allocate word_sz words in dest, either directly into the regions or by
   // allocating a new PLAB. Returns the address of the allocated memory, NULL if
@@ -253,14 +244,7 @@
   }
 
   void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
-    if (alloc_buffer(dest, context)->contains(obj)) {
-      assert(alloc_buffer(dest, context)->contains(obj + word_sz - 1),
-             "should contain whole object");
-      alloc_buffer(dest, context)->undo_allocation(obj, word_sz);
-    } else {
-      CollectedHeap::fill_with_object(obj, word_sz);
-      add_to_undo_waste(word_sz);
-    }
+    alloc_buffer(dest, context)->undo_allocation(obj, word_sz);
   }
 };
 
@@ -280,7 +264,9 @@
     return _alloc_buffers[dest.value()];
   }
 
-  virtual void retire_alloc_buffers() ;
+  virtual void retire_alloc_buffers();
+
+  virtual void waste(size_t& wasted, size_t& undo_wasted);
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCATOR_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Mon May 04 17:10:50 2015 +0200
@@ -179,6 +179,11 @@
     return ReservedSpace::allocation_align_size_up(number_of_slots);
   }
 
+  // Returns how many bytes of the heap a single byte of the BOT corresponds to.
+  static size_t heap_map_factor() {
+    return N_bytes;
+  }
+
   enum SomePublicConstants {
     LogN = 9,
     LogN_words = LogN - LogHeapWordSize,
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.cpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,6 +39,17 @@
   _counts->clear_range(mr);
 }
 
+size_t G1CardCounts::compute_size(size_t mem_region_size_in_words) {
+  // We keep card counts for every card, so the size of the card counts table must
+  // be the same as the card table.
+  return G1SATBCardTableLoggingModRefBS::compute_size(mem_region_size_in_words);
+}
+
+size_t G1CardCounts::heap_map_factor() {
+  // See G1CardCounts::compute_size() why we reuse the card table value.
+  return G1SATBCardTableLoggingModRefBS::heap_map_factor();
+}
+
 void G1CardCounts::clear_range(size_t from_card_num, size_t to_card_num) {
   if (has_count_table()) {
     assert(from_card_num < to_card_num,
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.hpp	Mon May 04 17:10:50 2015 +0200
@@ -101,6 +101,14 @@
  public:
   G1CardCounts(G1CollectedHeap* g1h);
 
+  // Return the number of slots needed for a card counts table
+  // that covers mem_region_words words.
+  static size_t compute_size(size_t mem_region_size_in_words);
+
+  // Returns how many bytes of the heap a single byte of the card counts table
+  // corresponds to.
+  static size_t heap_map_factor();
+
   void initialize(G1RegionToSpaceMapper* mapper);
 
   // Increments the refinement count for the given card.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon May 04 17:10:50 2015 +0200
@@ -66,6 +66,7 @@
 #include "runtime/vmThread.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/stack.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 size_t G1CollectedHeap::_humongous_object_threshold_in_words = 0;
 
@@ -1166,6 +1167,7 @@
   SvcGCMarker sgcm(SvcGCMarker::FULL);
   ResourceMark rm;
 
+  G1Log::update_level();
   print_heap_before_gc();
   trace_heap_before_gc(gc_tracer);
 
@@ -1890,24 +1892,24 @@
   G1RegionToSpaceMapper* bot_storage =
     create_aux_memory_mapper("Block offset table",
                              G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize),
-                             G1BlockOffsetSharedArray::N_bytes);
+                             G1BlockOffsetSharedArray::heap_map_factor());
 
   ReservedSpace cardtable_rs(G1SATBCardTableLoggingModRefBS::compute_size(g1_rs.size() / HeapWordSize));
   G1RegionToSpaceMapper* cardtable_storage =
     create_aux_memory_mapper("Card table",
                              G1SATBCardTableLoggingModRefBS::compute_size(g1_rs.size() / HeapWordSize),
-                             G1BlockOffsetSharedArray::N_bytes);
+                             G1SATBCardTableLoggingModRefBS::heap_map_factor());
 
   G1RegionToSpaceMapper* card_counts_storage =
     create_aux_memory_mapper("Card counts table",
-                             G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize),
-                             G1BlockOffsetSharedArray::N_bytes);
+                             G1CardCounts::compute_size(g1_rs.size() / HeapWordSize),
+                             G1CardCounts::heap_map_factor());
 
   size_t bitmap_size = CMBitMap::compute_size(g1_rs.size());
   G1RegionToSpaceMapper* prev_bitmap_storage =
-    create_aux_memory_mapper("Prev Bitmap", bitmap_size, CMBitMap::mark_distance());
+    create_aux_memory_mapper("Prev Bitmap", bitmap_size, CMBitMap::heap_map_factor());
   G1RegionToSpaceMapper* next_bitmap_storage =
-    create_aux_memory_mapper("Next Bitmap", bitmap_size, CMBitMap::mark_distance());
+    create_aux_memory_mapper("Next Bitmap", bitmap_size, CMBitMap::heap_map_factor());
 
   _hrm.initialize(heap_storage, prev_bitmap_storage, next_bitmap_storage, bot_storage, cardtable_storage, card_counts_storage);
   g1_barrier_set()->initialize(cardtable_storage);
@@ -3648,6 +3650,7 @@
   SvcGCMarker sgcm(SvcGCMarker::MINOR);
   ResourceMark rm;
 
+  G1Log::update_level();
   print_heap_before_gc();
   trace_heap_before_gc(_gc_tracer_stw);
 
@@ -5438,7 +5441,7 @@
     phase_times->record_string_dedup_fixup_time(fixup_time_ms);
   }
 
-  _allocator->release_gc_alloc_regions(n_workers, evacuation_info);
+  _allocator->release_gc_alloc_regions(evacuation_info);
   g1_rem_set()->cleanup_after_oops_into_collection_set_do();
 
   // Reset and re-enable the hot card cache.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon May 04 17:10:50 2015 +0200
@@ -276,7 +276,7 @@
   void init_gc_alloc_regions(EvacuationInfo& evacuation_info);
 
   // It releases the GC alloc regions at the end of a GC.
-  void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info);
+  void release_gc_alloc_regions(EvacuationInfo& evacuation_info);
 
   // It does any cleanup that needs to be done on the GC alloc regions
   // before a Full GC.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -48,7 +48,7 @@
 }
 
 size_t G1CollectedHeap::desired_plab_sz(InCSetState dest) {
-  size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_sz();
+  size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_sz(G1CollectedHeap::heap()->workers()->active_workers());
   // Prevent humongous PLAB sizes for two reasons:
   // * PLABs are allocated using a similar paths as oops, but should
   //   never be in a humongous region
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Mon May 04 17:10:50 2015 +0200
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMES_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMES_HPP
 
 #include "memory/allocation.hpp"
 
@@ -286,4 +286,4 @@
   ~G1GCParPhaseTimesTracker();
 };
 
-#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMES_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1Log.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1Log.cpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,17 +25,34 @@
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1_globals.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
-#include "runtime/globals.hpp"
+#include "runtime/globals_extension.hpp"
 
 G1Log::LogLevel G1Log::_level = G1Log::LevelNone;
 
+
+// Updates _level based on PrintGC and PrintGCDetails values (unless
+// G1LogLevel is set explicitly)
+// - PrintGC maps to "fine".
+// - PrintGCDetails maps to "finer".
+void G1Log::update_level() {
+  if (FLAG_IS_DEFAULT(G1LogLevel)) {
+    _level = LevelNone;
+    if (PrintGCDetails) {
+      _level = LevelFiner;
+    } else if (PrintGC) {
+      _level = LevelFine;
+    }
+  }
+}
+
+
 // If G1LogLevel has not been set up we will use the values of PrintGC
 // and PrintGCDetails for the logging level.
-// - PrintGC maps to "fine".
-// - PrintGCDetails maps to "finer".
 void G1Log::init() {
-  if (G1LogLevel != NULL && G1LogLevel[0] != '\0') {
-    if (strncmp("none", G1LogLevel, 4) == 0 && G1LogLevel[4] == '\0') {
+  if (!FLAG_IS_DEFAULT(G1LogLevel)) {
+    // PrintGC flags change won't have any affect, because G1LogLevel
+    // is set explicitly
+    if (G1LogLevel[0] == '\0' || strncmp("none", G1LogLevel, 4) == 0 && G1LogLevel[4] == '\0') {
       _level = LevelNone;
     } else if (strncmp("fine", G1LogLevel, 4) == 0 && G1LogLevel[4] == '\0') {
       _level = LevelFine;
@@ -47,10 +64,7 @@
       warning("Unknown logging level '%s', should be one of 'fine', 'finer' or 'finest'.", G1LogLevel);
     }
   } else {
-    if (PrintGCDetails) {
-      _level = LevelFiner;
-    } else if (PrintGC) {
-      _level = LevelFine;
-    }
+    update_level();
   }
 }
+
--- a/hotspot/src/share/vm/gc_implementation/g1/g1Log.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1Log.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -57,6 +57,9 @@
   }
 
   static void init();
+
+  // Update to log level to reflect runtime changes to manageable flags
+  static void update_level();
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1LOG_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -76,9 +76,6 @@
 }
 
 void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) {
-  double longest_allowed = longest_pause_internal(start);
-  if (longest_allowed < 0.0)
-    longest_allowed = 0.0;
   double duration = end - start;
 
   remove_expired_entries(end);
@@ -111,41 +108,6 @@
 // this is for trying things out in the future and a couple
 // of other places (debugging)
 
-double G1MMUTrackerQueue::longest_pause(double current_time) {
-  if (_DISABLE_MMU)
-    return _max_gc_time;
-
-  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
-  remove_expired_entries(current_time);
-
-  return longest_pause_internal(current_time);
-}
-
-double G1MMUTrackerQueue::longest_pause_internal(double current_time) {
-  double target_time = _max_gc_time;
-
-  while( 1 ) {
-    double gc_time =
-      calculate_gc_time(current_time + target_time);
-    double diff = target_time + gc_time - _max_gc_time;
-    if (!is_double_leq_0(diff)) {
-      target_time -= diff;
-      if (is_double_leq_0(target_time)) {
-        target_time = -1.0;
-        break;
-      }
-    } else {
-      break;
-    }
-  }
-
-  return target_time;
-}
-
-// basically the _internal call does not remove expired entries
-// this is for trying things out in the future and a couple
-// of other places (debugging)
-
 double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) {
   if (_DISABLE_MMU)
     return 0.0;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,7 +43,6 @@
   G1MMUTracker(double time_slice, double max_gc_time);
 
   virtual void add_pause(double start, double end, bool gc_thread) = 0;
-  virtual double longest_pause(double current_time) = 0;
   virtual double when_sec(double current_time, double pause_time) = 0;
 
   double max_gc_time() {
@@ -122,7 +121,6 @@
   void remove_expired_entries(double current_time);
   double calculate_gc_time(double current_time);
 
-  double longest_pause_internal(double current_time);
   double when_internal(double current_time, double pause_time);
 
 public:
@@ -130,7 +128,6 @@
 
   virtual void add_pause(double start, double end, bool gc_thread);
 
-  virtual double longest_pause(double current_time);
   virtual double when_sec(double current_time, double pause_time);
 };
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Mon May 04 17:10:50 2015 +0200
@@ -29,7 +29,7 @@
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
-#include "utilities/stack.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp)
   : _g1h(g1h),
@@ -95,8 +95,9 @@
   const double elapsed_ms = elapsed_time() * 1000.0;
   const double s_roots_ms = strong_roots_time() * 1000.0;
   const double term_ms    = term_time() * 1000.0;
-  const size_t alloc_buffer_waste = _g1_par_allocator->alloc_buffer_waste();
-  const size_t undo_waste         = _g1_par_allocator->undo_waste();
+  size_t alloc_buffer_waste = 0;
+  size_t undo_waste = 0;
+  _g1_par_allocator->waste(alloc_buffer_waste, undo_waste);
   st->print_cr("%3d %9.2f %9.2f %6.2f "
                "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
                SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,9 +54,6 @@
   uint              _tenuring_threshold;
   G1ParScanClosure  _scanner;
 
-  size_t            _alloc_buffer_waste;
-  size_t            _undo_waste;
-
   OopsInHeapRegionClosure*      _evac_failure_cl;
 
   int  _hash_seed;
@@ -78,9 +75,6 @@
 
 #define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))
 
-  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
-  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
-
   DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
   G1SATBCardTableModRefBS* ctbs()                { return _ct_bs; }
 
@@ -106,10 +100,7 @@
   bool verify_task(StarTask ref) const;
 #endif // ASSERT
 
-  template <class T> void push_on_queue(T* ref) {
-    assert(verify_ref(ref), "sanity");
-    _refs->push(ref);
-  }
+  template <class T> void push_on_queue(T* ref);
 
   template <class T> void update_rs(HeapRegion* from, T* p, uint tid) {
     // If the new value of the field points to the same region or
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -59,6 +59,11 @@
   update_rs(from, p, queue_num());
 }
 
+template <class T> inline void G1ParScanThreadState::push_on_queue(T* ref) {
+  assert(verify_ref(ref), "sanity");
+  _refs->push(ref);
+}
+
 inline void G1ParScanThreadState::do_oop_partial_array(oop* p) {
   assert(has_partial_array_mask(p), "invariant");
   oop from_obj = clear_partial_array_mask(p);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp	Mon May 04 17:10:50 2015 +0200
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1ROOTPROCESSOR_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1ROOTPROCESSOR_HPP
 
 #include "memory/allocation.hpp"
 #include "memory/strongRootsScope.hpp"
@@ -118,4 +118,4 @@
   void set_num_workers(int active_workers);
 };
 
-#endif // SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ROOTPROCESSOR_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Mon May 04 17:10:50 2015 +0200
@@ -153,6 +153,11 @@
     return ReservedSpace::allocation_align_size_up(number_of_slots);
   }
 
+  // Returns how many bytes of the heap a single byte of the Card Table corresponds to.
+  static size_t heap_map_factor() {
+    return CardTableModRefBS::card_size;
+  }
+
   G1SATBCardTableLoggingModRefBS(MemRegion whole_heap);
 
   virtual void initialize() { }
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionBounds.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionBounds.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,6 +22,9 @@
  *
  */
 
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONBOUNDS_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONBOUNDS_INLINE_HPP
+
 #include "gc_implementation/g1/heapRegionBounds.hpp"
 
 size_t HeapRegionBounds::min_size() {
@@ -35,3 +38,5 @@
 size_t HeapRegionBounds::target_number() {
   return TARGET_REGION_NUMBER;
 }
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONBOUNDS_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Mon May 04 17:10:50 2015 +0200
@@ -29,6 +29,7 @@
 #include "memory/allocation.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 
@@ -160,10 +161,7 @@
   assert(_lock == NULL || _lock->owned_by_self(),
          "we should have taken the lock before calling this");
 
-  // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to
-  // filter the buffer given that this will remove any references into
-  // the CSet as we currently assume that no such refs will appear in
-  // enqueued buffers.
+  // If G1SATBBufferEnqueueingThresholdPercent == 0 we could skip filtering.
 
   // This method should only be called if there is a non-NULL buffer
   // that is full.
@@ -180,25 +178,19 @@
   return should_enqueue;
 }
 
-void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) {
+void ObjPtrQueue::apply_closure_and_empty(SATBBufferClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "SATB queues must only be processed at safepoints");
   if (_buf != NULL) {
-    apply_closure_to_buffer(cl, _buf, _index, _sz);
+    assert(_index % sizeof(void*) == 0, "invariant");
+    assert(_sz % sizeof(void*) == 0, "invariant");
+    assert(_index <= _sz, "invariant");
+    cl->do_buffer(_buf + byte_index_to_index((int)_index),
+                  byte_index_to_index((int)(_sz - _index)));
     _index = _sz;
   }
 }
 
-void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl,
-                                          void** buf, size_t index, size_t sz) {
-  if (cl == NULL) return;
-  for (size_t i = index; i < sz; i += oopSize) {
-    oop obj = (oop)buf[byte_index_to_index((int)i)];
-    // There can be NULL entries because of destructors.
-    if (obj != NULL) {
-      cl->do_object(obj);
-    }
-  }
-}
-
 #ifndef PRODUCT
 // Helpful for debugging
 
@@ -289,7 +281,7 @@
   shared_satb_queue()->filter();
 }
 
-bool SATBMarkQueueSet::apply_closure_to_completed_buffer(ObjectClosure* cl) {
+bool SATBMarkQueueSet::apply_closure_to_completed_buffer(SATBBufferClosure* cl) {
   BufferNode* nd = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
@@ -303,7 +295,18 @@
   }
   if (nd != NULL) {
     void **buf = BufferNode::make_buffer_from_node(nd);
-    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    // Skip over NULL entries at beginning (e.g. push end) of buffer.
+    // Filtering can result in non-full completed buffers; see
+    // should_enqueue_buffer.
+    assert(_sz % sizeof(void*) == 0, "invariant");
+    size_t limit = ObjPtrQueue::byte_index_to_index((int)_sz);
+    for (size_t i = 0; i < limit; ++i) {
+      if (buf[i] != NULL) {
+        // Found the end of the block of NULLs; process the remainder.
+        cl->do_buffer(buf + i, limit - i);
+        break;
+      }
+    }
     deallocate_buffer(buf);
     return true;
   } else {
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Mon May 04 17:10:50 2015 +0200
@@ -25,29 +25,30 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP
 
+#include "memory/allocation.hpp"
 #include "gc_implementation/g1/ptrQueue.hpp"
 
-class ObjectClosure;
 class JavaThread;
 class SATBMarkQueueSet;
 
+// Base class for processing the contents of a SATB buffer.
+class SATBBufferClosure : public StackObj {
+protected:
+  ~SATBBufferClosure() { }
+
+public:
+  // Process the SATB entries in the designated buffer range.
+  virtual void do_buffer(void** buffer, size_t size) = 0;
+};
+
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
-  friend class Threads;
   friend class SATBMarkQueueSet;
-  friend class G1RemarkThreadsClosure;
 
 private:
   // Filter out unwanted entries from the buffer.
   void filter();
 
-  // Apply the closure to all elements and empty the buffer;
-  void apply_closure_and_empty(ObjectClosure* cl);
-
-  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
-  static void apply_closure_to_buffer(ObjectClosure* cl,
-                                      void** buf, size_t index, size_t sz);
-
 public:
   ObjPtrQueue(PtrQueueSet* qset, bool perm = false) :
     // SATB queues are only active during marking cycles. We create
@@ -60,6 +61,10 @@
   // Process queue entries and free resources.
   void flush();
 
+  // Apply cl to the active part of the buffer.
+  // Prerequisite: Must be at a safepoint.
+  void apply_closure_and_empty(SATBBufferClosure* cl);
+
   // Overrides PtrQueue::should_enqueue_buffer(). See the method's
   // definition for more information.
   virtual bool should_enqueue_buffer();
@@ -97,10 +102,12 @@
   // Filter all the currently-active SATB buffers.
   void filter_thread_buffers();
 
-  // If there exists some completed buffer, pop it, then apply the
-  // closure to all its elements, and return true.  If no
-  // completed buffers exist, return false.
-  bool apply_closure_to_completed_buffer(ObjectClosure* closure);
+  // If there exists some completed buffer, pop and process it, and
+  // return true.  Otherwise return false.  Processing a buffer
+  // consists of applying the closure to the buffer range starting
+  // with the first non-NULL entry to the end of the buffer; the
+  // leading entries may be NULL due to filtering.
+  bool apply_closure_to_completed_buffer(SATBBufferClosure* cl);
 
 #ifndef PRODUCT
   // Helpful for debugging
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon May 04 17:10:50 2015 +0200
@@ -54,6 +54,7 @@
 #include "utilities/copy.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/stack.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 #include "utilities/workgroup.hpp"
 
 #ifdef _MSC_VER
@@ -272,16 +273,8 @@
 }
 
 
-void ParScanThreadState::undo_alloc_in_to_space(HeapWord* obj,
-                                                size_t word_sz) {
-  // Is the alloc in the current alloc buffer?
-  if (to_space_alloc_buffer()->contains(obj)) {
-    assert(to_space_alloc_buffer()->contains(obj + word_sz - 1),
-           "Should contain whole object.");
-    to_space_alloc_buffer()->undo_allocation(obj, word_sz);
-  } else {
-    CollectedHeap::fill_with_object(obj, word_sz);
-  }
+void ParScanThreadState::undo_alloc_in_to_space(HeapWord* obj, size_t word_sz) {
+  to_space_alloc_buffer()->undo_allocation(obj, word_sz);
 }
 
 void ParScanThreadState::print_promotion_failure_size() {
@@ -1040,7 +1033,7 @@
   to()->set_concurrent_iteration_safe_limit(to()->top());
 
   if (ResizePLAB) {
-    plab_stats()->adjust_desired_plab_sz(n_workers);
+    plab_stats()->adjust_desired_plab_sz();
   }
 
   if (PrintGC && !PrintGCDetails) {
@@ -1078,6 +1071,10 @@
   _gc_tracer.report_gc_end(_gc_timer->gc_end(), _gc_timer->time_partitions());
 }
 
+size_t ParNewGeneration::desired_plab_sz() {
+  return _plab_stats.desired_plab_sz(GenCollectedHeap::heap()->workers()->active_workers());
+}
+
 static int sum;
 void ParNewGeneration::waste_some_time() {
   for (int i = 0; i < 100; i++) {
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon May 04 17:10:50 2015 +0200
@@ -411,9 +411,7 @@
     return &_plab_stats;
   }
 
-  size_t desired_plab_sz() {
-    return _plab_stats.desired_plab_sz();
-  }
+  size_t desired_plab_sz();
 
   const ParNewTracer* gc_tracer() const {
     return &_gc_tracer;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Mon May 04 17:10:50 2015 +0200
@@ -57,7 +57,7 @@
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
-  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
+  ParCompactionManager::MarkAndPushClosure mark_and_push_closure(cm);
   CLDToOopClosure mark_and_push_from_clds(&mark_and_push_closure, true);
   MarkingCodeBlobClosure mark_and_push_in_blobs(&mark_and_push_closure, !CodeBlobToOopClosure::FixRelocations);
 
@@ -85,8 +85,8 @@
     PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
-  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
-  PSParallelCompact::FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
+  ParCompactionManager::MarkAndPushClosure mark_and_push_closure(cm);
+  ParCompactionManager::FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
 
   switch (_root_type) {
     case universe:
@@ -156,8 +156,8 @@
     PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
-  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
-  PSParallelCompact::FollowStackClosure follow_stack_closure(cm);
+  ParCompactionManager::MarkAndPushClosure mark_and_push_closure(cm);
+  ParCompactionManager::FollowStackClosure follow_stack_closure(cm);
   _rp_task.work(_work_id, *PSParallelCompact::is_alive_closure(),
                 mark_and_push_closure, follow_stack_closure);
 }
@@ -213,7 +213,7 @@
 
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
-  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
+  ParCompactionManager::MarkAndPushClosure mark_and_push_closure(cm);
 
   oop obj = NULL;
   ObjArrayTask task;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Mon May 04 17:10:50 2015 +0200
@@ -37,7 +37,7 @@
 #include "oops/objArrayKlass.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.inline.hpp"
-#include "utilities/stack.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 PSOldGen*            ParCompactionManager::_old_gen = NULL;
 ParCompactionManager**  ParCompactionManager::_manager_array = NULL;
@@ -179,11 +179,11 @@
 void InstanceKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
   assert(obj != NULL, "can't follow the content of NULL object");
 
-  PSParallelCompact::follow_klass(cm, this);
+  cm->follow_klass(this);
   // Only mark the header and let the scan of the meta-data mark
   // everything else.
 
-  PSParallelCompact::MarkAndPushClosure cl(cm);
+  ParCompactionManager::MarkAndPushClosure cl(cm);
   InstanceKlass::oop_oop_iterate_oop_maps<true>(obj, &cl);
 }
 
@@ -201,9 +201,9 @@
     // the call to follow_class_loader is made when the class loader itself
     // is handled.
     if (klass->oop_is_instance() && InstanceKlass::cast(klass)->is_anonymous()) {
-      PSParallelCompact::follow_class_loader(cm, klass->class_loader_data());
+      cm->follow_class_loader(klass->class_loader_data());
     } else {
-      PSParallelCompact::follow_klass(cm, klass);
+      cm->follow_klass(klass);
     }
   } else {
     // If klass is NULL then this a mirror for a primitive type.
@@ -212,7 +212,7 @@
     assert(java_lang_Class::is_primitive(obj), "Sanity check");
   }
 
-  PSParallelCompact::MarkAndPushClosure cl(cm);
+  ParCompactionManager::MarkAndPushClosure cl(cm);
   oop_oop_iterate_statics<true>(obj, &cl);
 }
 
@@ -221,7 +221,7 @@
 
   ClassLoaderData * const loader_data = java_lang_ClassLoader::loader_data(obj);
   if (loader_data != NULL) {
-    PSParallelCompact::follow_class_loader(cm, loader_data);
+    cm->follow_class_loader(loader_data);
   }
 }
 
@@ -253,37 +253,24 @@
           gclog_or_tty->print_cr("       Non NULL normal " PTR_FORMAT, p2i(obj));
         }
       )
-      PSParallelCompact::mark_and_push(cm, referent_addr);
+      cm->mark_and_push(referent_addr);
     }
   }
   T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
-  if (ReferenceProcessor::pending_list_uses_discovered_field()) {
-    // Treat discovered as normal oop, if ref is not "active",
-    // i.e. if next is non-NULL.
-    T  next_oop = oopDesc::load_heap_oop(next_addr);
-    if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
-      T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
-      debug_only(
-        if(TraceReferenceGC && PrintGCDetails) {
-          gclog_or_tty->print_cr("   Process discovered as normal "
-                                 PTR_FORMAT, p2i(discovered_addr));
-        }
-      )
-      PSParallelCompact::mark_and_push(cm, discovered_addr);
-    }
-  } else {
-#ifdef ASSERT
-    // In the case of older JDKs which do not use the discovered
-    // field for the pending list, an inactive ref (next != NULL)
-    // must always have a NULL discovered field.
-    T next = oopDesc::load_heap_oop(next_addr);
-    oop discovered = java_lang_ref_Reference::discovered(obj);
-    assert(oopDesc::is_null(next) || oopDesc::is_null(discovered),
-           err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field",
-                   p2i(obj)));
-#endif
+  // Treat discovered as normal oop, if ref is not "active",
+  // i.e. if next is non-NULL.
+  T  next_oop = oopDesc::load_heap_oop(next_addr);
+  if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
+    T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
+    debug_only(
+      if(TraceReferenceGC && PrintGCDetails) {
+        gclog_or_tty->print_cr("   Process discovered as normal "
+                               PTR_FORMAT, p2i(discovered_addr));
+      }
+    )
+    cm->mark_and_push(discovered_addr);
   }
-  PSParallelCompact::mark_and_push(cm, next_addr);
+  cm->mark_and_push(next_addr);
   klass->InstanceKlass::oop_pc_follow_contents(obj, cm);
 }
 
@@ -297,7 +284,7 @@
 }
 
 void ObjArrayKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
-  PSParallelCompact::follow_klass(cm, this);
+  cm->follow_klass(this);
 
   if (UseCompressedOops) {
     oop_pc_follow_contents_specialized<narrowOop>(objArrayOop(obj), 0, cm);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Mon May 04 17:10:50 2015 +0200
@@ -29,11 +29,6 @@
 #include "utilities/stack.hpp"
 #include "utilities/taskqueue.hpp"
 
-// Move to some global location
-#define HAS_BEEN_MOVED 0x1501d01d
-// End move to some global location
-
-
 class MutableSpace;
 class PSOldGen;
 class ParCompactionManager;
@@ -170,24 +165,23 @@
   bool should_copy();
 
   // Save for later processing.  Must not fail.
-  inline void push(oop obj) { _marking_stack.push(obj); }
+  inline void push(oop obj);
   inline void push_objarray(oop objarray, size_t index);
   inline void push_region(size_t index);
 
+  // Check mark and maybe push on marking stack.
+  template <typename T> inline void mark_and_push(T* p);
+
+  inline void follow_klass(Klass* klass);
+
+  void follow_class_loader(ClassLoaderData* klass);
+
   // Access function for compaction managers
   static ParCompactionManager* gc_thread_compaction_manager(int index);
 
-  static bool steal(int queue_num, int* seed, oop& t) {
-    return stack_array()->steal(queue_num, seed, t);
-  }
-
-  static bool steal_objarray(int queue_num, int* seed, ObjArrayTask& t) {
-    return _objarray_queues->steal(queue_num, seed, t);
-  }
-
-  static bool steal(int queue_num, int* seed, size_t& region) {
-    return region_array()->steal(queue_num, seed, region);
-  }
+  static bool steal(int queue_num, int* seed, oop& t);
+  static bool steal_objarray(int queue_num, int* seed, ObjArrayTask& t);
+  static bool steal(int queue_num, int* seed, size_t& region);
 
   // Process tasks remaining on any marking stack
   void follow_marking_stacks();
@@ -200,6 +194,39 @@
   void follow_contents(objArrayOop array, int index);
 
   void update_contents(oop obj);
+
+  class MarkAndPushClosure: public ExtendedOopClosure {
+   private:
+    ParCompactionManager* _compaction_manager;
+   public:
+    MarkAndPushClosure(ParCompactionManager* cm) : _compaction_manager(cm) { }
+
+    template <typename T> void do_oop_nv(T* p);
+    virtual void do_oop(oop* p);
+    virtual void do_oop(narrowOop* p);
+
+    // This closure provides its own oop verification code.
+    debug_only(virtual bool should_verify_oops() { return false; })
+  };
+
+  class FollowStackClosure: public VoidClosure {
+   private:
+    ParCompactionManager* _compaction_manager;
+   public:
+    FollowStackClosure(ParCompactionManager* cm) : _compaction_manager(cm) { }
+    virtual void do_void();
+  };
+
+  // The one and only place to start following the classes.
+  // Should only be applied to the ClassLoaderData klasses list.
+  class FollowKlassClosure : public KlassClosure {
+   private:
+    MarkAndPushClosure* _mark_and_push_closure;
+   public:
+    FollowKlassClosure(MarkAndPushClosure* mark_and_push_closure) :
+        _mark_and_push_closure(mark_and_push_closure) { }
+    void do_klass(Klass* klass);
+  };
 };
 
 inline ParCompactionManager* ParCompactionManager::manager_array(int index) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -31,6 +31,23 @@
 #include "oops/oop.inline.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/taskqueue.inline.hpp"
+
+inline bool ParCompactionManager::steal(int queue_num, int* seed, oop& t) {
+  return stack_array()->steal(queue_num, seed, t);
+}
+
+inline bool ParCompactionManager::steal_objarray(int queue_num, int* seed, ObjArrayTask& t) {
+  return _objarray_queues->steal(queue_num, seed, t);
+}
+
+inline bool ParCompactionManager::steal(int queue_num, int* seed, size_t& region) {
+  return region_array()->steal(queue_num, seed, region);
+}
+
+inline void ParCompactionManager::push(oop obj) {
+  _marking_stack.push(obj);
+}
 
 void ParCompactionManager::push_objarray(oop obj, size_t index)
 {
@@ -50,6 +67,47 @@
   region_stack()->push(index);
 }
 
+template <typename T>
+inline void ParCompactionManager::mark_and_push(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+    assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap");
+
+    if (mark_bitmap()->is_unmarked(obj) && PSParallelCompact::mark_obj(obj)) {
+      push(obj);
+    }
+  }
+}
+
+template <typename T>
+inline void ParCompactionManager::MarkAndPushClosure::do_oop_nv(T* p) {
+  _compaction_manager->mark_and_push(p);
+}
+
+inline void ParCompactionManager::MarkAndPushClosure::do_oop(oop* p)       { do_oop_nv(p); }
+inline void ParCompactionManager::MarkAndPushClosure::do_oop(narrowOop* p) { do_oop_nv(p); }
+
+inline void ParCompactionManager::follow_klass(Klass* klass) {
+  oop holder = klass->klass_holder();
+  mark_and_push(&holder);
+}
+
+inline void ParCompactionManager::FollowStackClosure::do_void() {
+  _compaction_manager->follow_marking_stacks();
+}
+
+inline void ParCompactionManager::FollowKlassClosure::do_klass(Klass* klass) {
+  klass->oops_do(_mark_and_push_closure);
+}
+
+inline void ParCompactionManager::follow_class_loader(ClassLoaderData* cld) {
+  MarkAndPushClosure mark_and_push_closure(this);
+  FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
+
+  cld->oops_do(&mark_and_push_closure, &follow_klass_closure, true);
+}
+
 inline void ParCompactionManager::follow_contents(oop obj) {
   assert(PSParallelCompact::mark_bitmap()->is_marked(obj), "should be marked");
   obj->pc_follow_contents(this);
@@ -69,7 +127,7 @@
 
   // Push the non-NULL elements of the next stride on the marking stack.
   for (T* e = beg; e < end; e++) {
-    PSParallelCompact::mark_and_push<T>(cm, e);
+    cm->mark_and_push<T>(e);
   }
 
   if (end_index < len) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon May 04 17:10:50 2015 +0200
@@ -108,7 +108,6 @@
 bool      PSParallelCompact::_print_phases = false;
 
 ReferenceProcessor* PSParallelCompact::_ref_processor = NULL;
-Klass*              PSParallelCompact::_updated_int_array_klass_obj = NULL;
 
 double PSParallelCompact::_dwl_mean;
 double PSParallelCompact::_dwl_std_dev;
@@ -820,17 +819,9 @@
 
 bool PSParallelCompact::IsAliveClosure::do_object_b(oop p) { return mark_bitmap()->is_marked(p); }
 
-void PSParallelCompact::KeepAliveClosure::do_oop(oop* p)       { PSParallelCompact::KeepAliveClosure::do_oop_work(p); }
-void PSParallelCompact::KeepAliveClosure::do_oop(narrowOop* p) { PSParallelCompact::KeepAliveClosure::do_oop_work(p); }
-
 PSParallelCompact::AdjustPointerClosure PSParallelCompact::_adjust_pointer_closure;
 PSParallelCompact::AdjustKlassClosure PSParallelCompact::_adjust_klass_closure;
 
-void PSParallelCompact::FollowStackClosure::do_void() { _compaction_manager->follow_marking_stacks(); }
-
-void PSParallelCompact::FollowKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(_mark_and_push_closure);
-}
 void PSParallelCompact::AdjustKlassClosure::do_klass(Klass* klass) {
   klass->oops_do(&PSParallelCompact::_adjust_pointer_closure);
 }
@@ -2350,8 +2341,8 @@
   TaskQueueSetSuper* qset = ParCompactionManager::region_array();
   ParallelTaskTerminator terminator(active_gc_threads, qset);
 
-  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
-  PSParallelCompact::FollowStackClosure follow_stack_closure(cm);
+  ParCompactionManager::MarkAndPushClosure mark_and_push_closure(cm);
+  ParCompactionManager::FollowStackClosure follow_stack_closure(cm);
 
   // Need new claim bits before marking starts.
   ClassLoaderDataGraph::clear_claimed_marks();
@@ -2425,14 +2416,6 @@
   _gc_tracer.report_object_count_after_gc(is_alive_closure());
 }
 
-void PSParallelCompact::follow_class_loader(ParCompactionManager* cm,
-                                            ClassLoaderData* cld) {
-  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
-  PSParallelCompact::FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
-
-  cld->oops_do(&mark_and_push_closure, &follow_klass_closure, true);
-}
-
 // This should be moved to the shared markSweep code!
 class PSAlwaysTrueClosure: public BoolObjectClosure {
 public:
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon May 04 17:10:50 2015 +0200
@@ -28,7 +28,6 @@
 #include "gc_implementation/parallelScavenge/objectStartArray.hpp"
 #include "gc_implementation/parallelScavenge/parallelScavengeHeap.hpp"
 #include "gc_implementation/parallelScavenge/parMarkBitMap.hpp"
-#include "gc_implementation/parallelScavenge/psCompactionManager.hpp"
 #include "gc_implementation/shared/collectorCounters.hpp"
 #include "gc_implementation/shared/mutableSpace.hpp"
 #include "gc_interface/collectedHeap.hpp"
@@ -933,25 +932,6 @@
     virtual bool do_object_b(oop p);
   };
 
-  class KeepAliveClosure: public OopClosure {
-   private:
-    ParCompactionManager* _compaction_manager;
-   protected:
-    template <class T> inline void do_oop_work(T* p);
-   public:
-    KeepAliveClosure(ParCompactionManager* cm) : _compaction_manager(cm) { }
-    virtual void do_oop(oop* p);
-    virtual void do_oop(narrowOop* p);
-  };
-
-  class FollowStackClosure: public VoidClosure {
-   private:
-    ParCompactionManager* _compaction_manager;
-   public:
-    FollowStackClosure(ParCompactionManager* cm) : _compaction_manager(cm) { }
-    virtual void do_void();
-  };
-
   class AdjustPointerClosure: public ExtendedOopClosure {
    public:
     template <typename T> void do_oop_nv(T* p);
@@ -967,12 +947,8 @@
     void do_klass(Klass* klass);
   };
 
-  friend class KeepAliveClosure;
-  friend class FollowStackClosure;
   friend class AdjustPointerClosure;
   friend class AdjustKlassClosure;
-  friend class FollowKlassClosure;
-  friend class InstanceClassLoaderKlass;
   friend class RefProcTaskProxy;
 
  private:
@@ -994,9 +970,6 @@
   // Reference processing (used in ...follow_contents)
   static ReferenceProcessor*  _ref_processor;
 
-  // Updated location of intArrayKlassObj.
-  static Klass* _updated_int_array_klass_obj;
-
   // Values computed at initialization and used by dead_wood_limiter().
   static double _dwl_mean;
   static double _dwl_std_dev;
@@ -1142,30 +1115,6 @@
   static void reset_millis_since_last_gc();
 
  public:
-  class MarkAndPushClosure: public ExtendedOopClosure {
-   private:
-    ParCompactionManager* _compaction_manager;
-   public:
-    MarkAndPushClosure(ParCompactionManager* cm) : _compaction_manager(cm) { }
-
-    template <typename T> void do_oop_nv(T* p);
-    virtual void do_oop(oop* p);
-    virtual void do_oop(narrowOop* p);
-
-    // This closure provides its own oop verification code.
-    debug_only(virtual bool should_verify_oops() { return false; })
-  };
-
-  // The one and only place to start following the classes.
-  // Should only be applied to the ClassLoaderData klasses list.
-  class FollowKlassClosure : public KlassClosure {
-   private:
-    MarkAndPushClosure* _mark_and_push_closure;
-   public:
-    FollowKlassClosure(MarkAndPushClosure* mark_and_push_closure) :
-        _mark_and_push_closure(mark_and_push_closure) { }
-    void do_klass(Klass* klass);
-  };
 
   PSParallelCompact();
 
@@ -1193,23 +1142,13 @@
 
   // Used to add tasks
   static GCTaskManager* const gc_task_manager();
-  static Klass* updated_int_array_klass_obj() {
-    return _updated_int_array_klass_obj;
-  }
 
   // Marking support
   static inline bool mark_obj(oop obj);
   static inline bool is_marked(oop obj);
-  // Check mark and maybe push on marking stack
-  template <class T> static inline void mark_and_push(ParCompactionManager* cm,
-                                                      T* p);
+
   template <class T> static inline void adjust_pointer(T* p);
 
-  static inline void follow_klass(ParCompactionManager* cm, Klass* klass);
-
-  static void follow_class_loader(ParCompactionManager* cm,
-                                  ClassLoaderData* klass);
-
   // Compaction support.
   // Return true if p is in the range [beg_addr, end_addr).
   static inline bool is_in(HeapWord* p, HeapWord* beg_addr, HeapWord* end_addr);
@@ -1337,11 +1276,6 @@
   return mark_bitmap()->is_marked(obj);
 }
 
-template <class T>
-inline void PSParallelCompact::KeepAliveClosure::do_oop_work(T* p) {
-  mark_and_push(_compaction_manager, p);
-}
-
 inline bool PSParallelCompact::print_phases() {
   return _print_phases;
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -26,38 +26,11 @@
 #define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPARALLELCOMPACT_INLINE_HPP
 
 #include "gc_implementation/parallelScavenge/parallelScavengeHeap.hpp"
-#include "gc_implementation/parallelScavenge/psCompactionManager.hpp"
 #include "gc_implementation/parallelScavenge/psParallelCompact.hpp"
 #include "gc_interface/collectedHeap.hpp"
 #include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 
-template <typename T>
-inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) {
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap");
-
-    if (mark_bitmap()->is_unmarked(obj) && mark_obj(obj)) {
-      cm->push(obj);
-    }
-  }
-}
-
-template <typename T>
-inline void PSParallelCompact::MarkAndPushClosure::do_oop_nv(T* p) {
-  mark_and_push(_compaction_manager, p);
-}
-
-inline void PSParallelCompact::MarkAndPushClosure::do_oop(oop* p)       { do_oop_nv(p); }
-inline void PSParallelCompact::MarkAndPushClosure::do_oop(narrowOop* p) { do_oop_nv(p); }
-
-inline void PSParallelCompact::follow_klass(ParCompactionManager* cm, Klass* klass) {
-  oop holder = klass->klass_holder();
-  mark_and_push(cm, &holder);
-}
-
 template <class T>
 inline void PSParallelCompact::adjust_pointer(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Mon May 04 17:10:50 2015 +0200
@@ -36,7 +36,7 @@
 #include "oops/instanceMirrorKlass.inline.hpp"
 #include "oops/objArrayKlass.inline.hpp"
 #include "oops/oop.inline.hpp"
-#include "utilities/stack.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 PaddedEnd<PSPromotionManager>* PSPromotionManager::_manager_array = NULL;
 OopStarTaskQueueSet*           PSPromotionManager::_stack_array_depth = NULL;
@@ -365,33 +365,19 @@
   // Treat discovered as normal oop, if ref is not "active",
   // i.e. if next is non-NULL.
   T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
-  if (ReferenceProcessor::pending_list_uses_discovered_field()) {
-    T  next_oop = oopDesc::load_heap_oop(next_addr);
-    if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
-      T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
-      debug_only(
-        if(TraceReferenceGC && PrintGCDetails) {
-          gclog_or_tty->print_cr("   Process discovered as normal "
-                                 PTR_FORMAT, p2i(discovered_addr));
-        }
-      )
-      if (PSScavenge::should_scavenge(discovered_addr)) {
-        pm->claim_or_forward_depth(discovered_addr);
+  T  next_oop = oopDesc::load_heap_oop(next_addr);
+  if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
+    T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
+    debug_only(
+      if(TraceReferenceGC && PrintGCDetails) {
+        gclog_or_tty->print_cr("   Process discovered as normal "
+                               PTR_FORMAT, p2i(discovered_addr));
       }
+    )
+    if (PSScavenge::should_scavenge(discovered_addr)) {
+      pm->claim_or_forward_depth(discovered_addr);
     }
-  } else {
-#ifdef ASSERT
-    // In the case of older JDKs which do not use the discovered
-    // field for the pending list, an inactive ref (next != NULL)
-    // must always have a NULL discovered field.
-    oop next = oopDesc::load_decode_heap_oop(next_addr);
-    oop discovered = java_lang_ref_Reference::discovered(obj);
-    assert(oopDesc::is_null(next) || oopDesc::is_null(discovered),
-           err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field",
-                   p2i(obj)));
-#endif
   }
-
   // Treat next as normal oop;  next is a link in the reference queue.
   if (PSScavenge::should_scavenge(next_addr)) {
     pm->claim_or_forward_depth(next_addr);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Mon May 04 17:10:50 2015 +0200
@@ -45,10 +45,6 @@
 // FIX ME FIX ME Add a destructor, and don't rely on the user to drain/flush/deallocate!
 //
 
-// Move to some global location
-#define HAS_BEEN_MOVED 0x1501d01d
-// End move to some global location
-
 class MutableSpace;
 class PSOldGen;
 class ParCompactionManager;
@@ -143,9 +139,7 @@
                                                     int start, int end);
   void process_array_chunk(oop old);
 
-  template <class T> void push_depth(T* p) {
-    claimed_stack_depth()->push(p);
-  }
+  template <class T> void push_depth(T* p);
 
   inline void promotion_trace_event(oop new_obj, oop old_obj, size_t obj_size,
                                     uint age, bool tenured,
@@ -163,9 +157,7 @@
   static PSPromotionManager* gc_thread_promotion_manager(int index);
   static PSPromotionManager* vm_thread_promotion_manager();
 
-  static bool steal_depth(int queue_num, int* seed, StarTask& t) {
-    return stack_array_depth()->steal(queue_num, seed, t);
-  }
+  static bool steal_depth(int queue_num, int* seed, StarTask& t);
 
   PSPromotionManager();
 
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -31,6 +31,7 @@
 #include "gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
 #include "oops/oop.inline.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 inline PSPromotionManager* PSPromotionManager::manager_array(int index) {
   assert(_manager_array != NULL, "access of NULL manager_array");
@@ -39,6 +40,11 @@
 }
 
 template <class T>
+inline void PSPromotionManager::push_depth(T* p) {
+  claimed_stack_depth()->push(p);
+}
+
+template <class T>
 inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
   if (p != NULL) { // XXX: error if p != NULL here
     oop o = oopDesc::load_decode_heap_oop_not_null(p);
@@ -99,7 +105,7 @@
 // performance.
 //
 template<bool promote_immediately>
-oop PSPromotionManager::copy_to_survivor_space(oop o) {
+inline oop PSPromotionManager::copy_to_survivor_space(oop o) {
   assert(should_scavenge(&o), "Sanity");
 
   oop new_obj = NULL;
@@ -317,6 +323,10 @@
   }
 }
 
+inline bool PSPromotionManager::steal_depth(int queue_num, int* seed, StarTask& t) {
+  return stack_array_depth()->steal(queue_num, seed, t);
+}
+
 #if TASKQUEUE_STATS
 void PSPromotionManager::record_steal(StarTask& p) {
   if (is_oop_masked(p)) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Mon May 04 17:10:50 2015 +0200
@@ -39,8 +39,7 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 #include "services/management.hpp"
-#include "utilities/stack.inline.hpp"
-#include "utilities/taskqueue.hpp"
+#include "utilities/taskqueue.inline.hpp"
 
 //
 // ScavengeRootsTask
--- a/hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Mon May 04 17:10:50 2015 +0200
@@ -193,8 +193,9 @@
      (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) {
     new_active_workers = total_workers;
   } else {
+    uintx min_workers = (total_workers == 1) ? 1 : 2;
     new_active_workers = calc_default_active_workers(total_workers,
-                                                     2, /* Minimum number of workers */
+                                                     min_workers,
                                                      active_workers,
                                                      application_workers);
   }
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.cpp	Mon May 04 17:10:50 2015 +0200
@@ -145,31 +145,18 @@
     }
   }
   T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
-  if (ReferenceProcessor::pending_list_uses_discovered_field()) {
-    // Treat discovered as normal oop, if ref is not "active",
-    // i.e. if next is non-NULL.
-    T  next_oop = oopDesc::load_heap_oop(next_addr);
-    if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
-      T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
-      debug_only(
-        if(TraceReferenceGC && PrintGCDetails) {
-          gclog_or_tty->print_cr("   Process discovered as normal "
-                                 PTR_FORMAT, p2i(discovered_addr));
-        }
-      )
-      MarkSweep::mark_and_push(discovered_addr);
-    }
-  } else {
-#ifdef ASSERT
-    // In the case of older JDKs which do not use the discovered
-    // field for the pending list, an inactive ref (next != NULL)
-    // must always have a NULL discovered field.
-    oop next = oopDesc::load_decode_heap_oop(next_addr);
-    oop discovered = java_lang_ref_Reference::discovered(obj);
-    assert(oopDesc::is_null(next) || oopDesc::is_null(discovered),
-        err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field",
-            p2i(obj)));
-#endif
+  // Treat discovered as normal oop, if ref is not "active",
+  // i.e. if next is non-NULL.
+  T  next_oop = oopDesc::load_heap_oop(next_addr);
+  if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
+    T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
+    debug_only(
+      if(TraceReferenceGC && PrintGCDetails) {
+        gclog_or_tty->print_cr("   Process discovered as normal "
+                               PTR_FORMAT, p2i(discovered_addr));
+      }
+    )
+    MarkSweep::mark_and_push(discovered_addr);
   }
   // treat next as normal oop.  next is a link in the reference queue.
   debug_only(
--- a/hotspot/src/share/vm/gc_implementation/shared/objectCountEventSender.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/objectCountEventSender.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_OBJECT_COUNT_EVENT_SENDER_HPP
-#define SHARE_VM_OBJECT_COUNT_EVENT_SENDER_HPP
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_OBJECTCOUNTEVENTSENDER_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_OBJECTCOUNTEVENTSENDER_HPP
 
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "memory/allocation.hpp"
@@ -42,4 +42,4 @@
 
 #endif // INCLUDE_SERVICES
 
-#endif // SHARE_VM_OBJECT_COUNT_EVENT_SENDER
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_OBJECTCOUNTEVENTSENDER_HPP
--- a/hotspot/src/share/vm/gc_implementation/shared/plab.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/plab.cpp	Mon May 04 17:10:50 2015 +0200
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/shared/plab.hpp"
+#include "gc_interface/collectedHeap.hpp"
 #include "memory/threadLocalAllocBuffer.hpp"
 #include "oops/arrayOop.hpp"
 #include "oops/oop.inline.hpp"
@@ -39,7 +40,7 @@
 
 PLAB::PLAB(size_t desired_plab_sz_) :
   _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL),
-  _end(NULL), _hard_end(NULL), _allocated(0), _wasted(0)
+  _end(NULL), _hard_end(NULL), _allocated(0), _wasted(0), _undo_wasted(0)
 {
   // ArrayOopDesc::header_size depends on command line initialization.
   AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? align_object_size(arrayOopDesc::header_size(T_INT)) : 0;
@@ -62,13 +63,15 @@
   // Now flush the statistics.
   stats->add_allocated(_allocated);
   stats->add_wasted(_wasted);
+  stats->add_undo_wasted(_undo_wasted);
   stats->add_unused(unused);
 
   // Since we have flushed the stats we need to clear  the _allocated and _wasted
   // fields in case somebody retains an instance of this over GCs. Not doing so
   // will artifically inflate the values in the statistics.
-  _allocated = 0;
-  _wasted = 0;
+  _allocated   = 0;
+  _wasted      = 0;
+  _undo_wasted = 0;
 }
 
 void PLAB::retire() {
@@ -84,10 +87,39 @@
   return result;
 }
 
-// Compute desired plab size and latch result for later
+void PLAB::add_undo_waste(HeapWord* obj, size_t word_sz) {
+  CollectedHeap::fill_with_object(obj, word_sz);
+  _undo_wasted += word_sz;
+}
+
+void PLAB::undo_last_allocation(HeapWord* obj, size_t word_sz) {
+  assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
+  assert(pointer_delta(_top, obj) == word_sz, "Bad undo");
+  _top = obj;
+}
+
+void PLAB::undo_allocation(HeapWord* obj, size_t word_sz) {
+  // Is the alloc in the current alloc buffer?
+  if (contains(obj)) {
+    assert(contains(obj + word_sz - 1),
+      "should contain whole object");
+    undo_last_allocation(obj, word_sz);
+  } else {
+    add_undo_waste(obj, word_sz);
+  }
+}
+
+// Calculates plab size for current number of gc worker threads.
+size_t PLABStats::desired_plab_sz(uint no_of_gc_workers) {
+  assert(no_of_gc_workers > 0, "Number of GC workers should be larger than zero");
+
+  return align_object_size(_desired_net_plab_sz / MAX2(no_of_gc_workers, 1U));
+}
+
+// Compute desired plab size for one gc worker thread and latch result for later
 // use. This should be called once at the end of parallel
 // scavenge; it clears the sensor accumulators.
-void PLABStats::adjust_desired_plab_sz(uint no_of_gc_workers) {
+void PLABStats::adjust_desired_plab_sz() {
   assert(ResizePLAB, "Not set");
 
   assert(is_object_aligned(max_size()) && min_size() <= max_size(),
@@ -98,8 +130,9 @@
            err_msg("Inconsistency in PLAB stats: "
                    "_allocated: "SIZE_FORMAT", "
                    "_wasted: "SIZE_FORMAT", "
-                   "_unused: "SIZE_FORMAT,
-                   _allocated, _wasted, _unused));
+                   "_unused: "SIZE_FORMAT", "
+                   "_undo_wasted: "SIZE_FORMAT,
+                   _allocated, _wasted, _unused, _undo_wasted));
 
     _allocated = 1;
   }
@@ -109,7 +142,8 @@
     target_refills = 1;
   }
   size_t used = _allocated - _wasted - _unused;
-  size_t recent_plab_sz = used / (target_refills * no_of_gc_workers);
+  // Assumed to have 1 gc worker thread
+  size_t recent_plab_sz = used / target_refills;
   // Take historical weighted average
   _filter.sample(recent_plab_sz);
   // Clip from above and below, and align to object boundary
@@ -120,7 +154,7 @@
   if (PrintPLAB) {
     gclog_or_tty->print(" (plab_sz = " SIZE_FORMAT" desired_plab_sz = " SIZE_FORMAT") ", recent_plab_sz, new_plab_sz);
   }
-  _desired_plab_sz = new_plab_sz;
+  _desired_net_plab_sz = new_plab_sz;
 
   reset();
 }
--- a/hotspot/src/share/vm/gc_implementation/shared/plab.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/gc_implementation/shared/plab.hpp	Mon May 04 17:10:50 2015 +0200
@@ -45,6 +45,7 @@
   // In support of ergonomic sizing of PLAB's
   size_t    _allocated;     // in HeapWord units
   size_t    _wasted;        // in HeapWord units
+  size_t    _undo_wasted;
   char      tail[32];
   static size_t AlignmentReserve;
 
@@ -62,6 +63,12 @@
   // the amount of remaining space.
   size_t retire_internal();
 
+  void add_undo_waste(HeapWord* obj, size_t word_sz);
+
+  // Undo the last allocation in the buffer, which is required to be of the
+  // "obj" of the given "word_sz".
+  void undo_last_allocation(HeapWord* obj, size_t word_sz);
+
 public:
   // Initializes the buffer to be empty, but with the given "word_sz".
   // Must get initialized with "set_buf" for an allocation to succeed.
@@ -90,18 +97,17 @@
   // Allocate the object aligned to "alignment_in_bytes".
   HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes);
 
-  // Undo the last allocation in the buffer, which is required to be of the
+  // Undo any allocation in the buffer, which is required to be of the
   // "obj" of the given "word_sz".
-  void undo_allocation(HeapWord* obj, size_t word_sz) {
-    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
-    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
-    _top = obj;
-  }
+  void undo_allocation(HeapWord* obj, size_t word_sz);
 
   // The total (word) size of the buffer, including both allocated and
   // unallocated space.
   size_t word_sz() { return _word_sz; }
 
+  size_t waste() { return _wasted; }
+  size_t undo_waste() { return _undo_wasted; }
+
   // Should only be done if we are about to reset with a new buffer of the
   // given size.
   void set_word_size(size_t new_word_sz) {
@@ -144,24 +150,27 @@
 
 // PLAB book-keeping.
 class PLABStats VALUE_OBJ_CLASS_SPEC {
-  size_t _allocated;      // Total allocated
-  size_t _wasted;         // of which wasted (internal fragmentation)
-  size_t _unused;         // Unused in last buffer
-  size_t _desired_plab_sz;// Output of filter (below), suitably trimmed and quantized
+  size_t _allocated;           // Total allocated
+  size_t _wasted;              // of which wasted (internal fragmentation)
+  size_t _undo_wasted;         // of which wasted on undo (is not used for calculation of PLAB size)
+  size_t _unused;              // Unused in last buffer
+  size_t _desired_net_plab_sz; // Output of filter (below), suitably trimmed and quantized
   AdaptiveWeightedAverage
-         _filter;         // Integrator with decay
+         _filter;              // Integrator with decay
 
   void reset() {
-    _allocated = 0;
-    _wasted    = 0;
-    _unused    = 0;
+    _allocated   = 0;
+    _wasted      = 0;
+    _undo_wasted = 0;
+    _unused      = 0;
   }
  public:
-  PLABStats(size_t desired_plab_sz_, unsigned wt) :
+  PLABStats(size_t desired_net_plab_sz_, unsigned wt) :
     _allocated(0),
     _wasted(0),
+    _undo_wasted(0),
     _unused(0),
-    _desired_plab_sz(desired_plab_sz_),
+    _desired_net_plab_sz(desired_net_plab_sz_),
     _filter(wt)
   { }
 
@@ -173,13 +182,12 @@
     return PLAB::max_size();
   }
 
-  size_t desired_plab_sz() {
-    return _desired_plab_sz;
-  }
+  // Calculates plab size for current number of gc worker threads.
+  size_t desired_plab_sz(uint no_of_gc_workers);
 
-  // Updates the current desired PLAB size. Computes the new desired PLAB size,
+  // Updates the current desired PLAB size. Computes the new desired PLAB size with one gc worker thread,
   // updates _desired_plab_sz and clears sensor accumulators.
-  void adjust_desired_plab_sz(uint no_of_gc_workers);
+  void adjust_desired_plab_sz();
 
   void add_allocated(size_t v) {
     Atomic::add_ptr(v, &_allocated);
@@ -192,6 +200,10 @@
   void add_wasted(size_t v) {
     Atomic::add_ptr(v, &_wasted);
   }
+
+  void add_undo_wasted(size_t v) {
+    Atomic::add_ptr(v, &_undo_wasted);
+  }
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_PLAB_HPP
--- a/hotspot/src/share/vm/memory/guardedMemory.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/guardedMemory.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
-#define SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
+#ifndef SHARE_VM_MEMORY_GUARDEDMEMORY_HPP
+#define SHARE_VM_MEMORY_GUARDEDMEMORY_HPP
 
 #include "memory/allocation.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -323,4 +323,4 @@
 #endif
 }; // GuardedMemory
 
-#endif // SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
+#endif // SHARE_VM_MEMORY_GUARDEDMEMORY_HPP
--- a/hotspot/src/share/vm/memory/metaspaceChunkFreeListSummary.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/metaspaceChunkFreeListSummary.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -21,8 +21,9 @@
  * questions.
  *
  */
-#ifndef SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP
-#define SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP
+
+#ifndef SHARE_VM_MEMORY_METASPACECHUNKFREELISTSUMMARY_HPP
+#define SHARE_VM_MEMORY_METASPACECHUNKFREELISTSUMMARY_HPP
 
 #include "memory/allocation.hpp"
 
@@ -100,4 +101,4 @@
   }
 };
 
-#endif // SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP
+#endif // SHARE_VM_MEMORY_METASPACECHUNKFREELISTSUMMARY_HPP
--- a/hotspot/src/share/vm/memory/metaspaceGCThresholdUpdater.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/metaspaceGCThresholdUpdater.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP
-#define SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP
+#ifndef SHARE_VM_MEMORY_METASPACEGCTHRESHOLDUPDATER_HPP
+#define SHARE_VM_MEMORY_METASPACEGCTHRESHOLDUPDATER_HPP
 
 #include "memory/allocation.hpp"
 #include "utilities/debug.hpp"
@@ -49,4 +49,4 @@
   }
 };
 
-#endif // SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP
+#endif // SHARE_VM_MEMORY_METASPACEGCTHRESHOLDUPDATER_HPP
--- a/hotspot/src/share/vm/memory/metaspaceShared.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/metaspaceShared.hpp	Mon May 04 17:10:50 2015 +0200
@@ -21,8 +21,9 @@
  * questions.
  *
  */
-#ifndef SHARE_VM_MEMORY_METASPACE_SHARED_HPP
-#define SHARE_VM_MEMORY_METASPACE_SHARED_HPP
+
+#ifndef SHARE_VM_MEMORY_METASPACESHARED_HPP
+#define SHARE_VM_MEMORY_METASPACESHARED_HPP
 
 #include "classfile/compactHashtable.hpp"
 #include "memory/allocation.hpp"
@@ -153,4 +154,4 @@
   static int count_class(const char* classlist_file);
   static void estimate_regions_size() NOT_CDS_RETURN;
 };
-#endif // SHARE_VM_MEMORY_METASPACE_SHARED_HPP
+#endif // SHARE_VM_MEMORY_METASPACESHARED_HPP
--- a/hotspot/src/share/vm/memory/metaspaceTracer.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/metaspaceTracer.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_MEMORY_METASPACE_TRACER_HPP
-#define SHARE_VM_MEMORY_METASPACE_TRACER_HPP
+#ifndef SHARE_VM_MEMORY_METASPACETRACER_HPP
+#define SHARE_VM_MEMORY_METASPACETRACER_HPP
 
 #include "memory/allocation.hpp"
 #include "memory/metaspace.hpp"
@@ -52,4 +52,4 @@
 
 };
 
-#endif // SHARE_VM_MEMORY_METASPACE_TRACER_HPP
+#endif // SHARE_VM_MEMORY_METASPACETRACER_HPP
--- a/hotspot/src/share/vm/memory/padded.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/padded.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,6 +22,9 @@
  *
  */
 
+#ifndef SHARE_VM_MEMORY_PADDED_INLINE_HPP
+#define SHARE_VM_MEMORY_PADDED_INLINE_HPP
+
 #include "memory/allocation.inline.hpp"
 #include "memory/padded.hpp"
 #include "utilities/debug.hpp"
@@ -86,3 +89,5 @@
 
   return (T*)align_pointer_up(chunk, alignment);
 }
+
+#endif // SHARE_VM_MEMORY_PADDED_INLINE_HPP
--- a/hotspot/src/share/vm/memory/referenceProcessor.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp	Mon May 04 17:10:50 2015 +0200
@@ -37,7 +37,6 @@
 
 ReferencePolicy* ReferenceProcessor::_always_clear_soft_ref_policy = NULL;
 ReferencePolicy* ReferenceProcessor::_default_soft_ref_policy      = NULL;
-bool             ReferenceProcessor::_pending_list_uses_discovered_field = false;
 jlong            ReferenceProcessor::_soft_ref_timestamp_clock = 0;
 
 void referenceProcessor_init() {
@@ -63,7 +62,6 @@
   guarantee(RefDiscoveryPolicy == ReferenceBasedDiscovery ||
             RefDiscoveryPolicy == ReferentBasedDiscovery,
             "Unrecognized RefDiscoveryPolicy");
-  _pending_list_uses_discovered_field = JDK_Version::current().pending_list_uses_discovered_field();
 }
 
 void ReferenceProcessor::enable_discovery(bool check_no_refs) {
@@ -353,10 +351,6 @@
   // all linked Reference objects. Note that it is important to not dirty any
   // cards during reference processing since this will cause card table
   // verification to fail for G1.
-  //
-  // BKWRD COMPATIBILITY NOTE: For older JDKs (prior to the fix for 4956777),
-  // the "next" field is used to chain the pending list, not the discovered
-  // field.
   if (TraceReferenceGC && PrintGCDetails) {
     gclog_or_tty->print_cr("ReferenceProcessor::enqueue_discovered_reflist list "
                            INTPTR_FORMAT, p2i(refs_list.head()));
@@ -364,64 +358,30 @@
 
   oop obj = NULL;
   oop next_d = refs_list.head();
-  if (pending_list_uses_discovered_field()) { // New behavior
-    // Walk down the list, self-looping the next field
-    // so that the References are not considered active.
-    while (obj != next_d) {
-      obj = next_d;
-      assert(obj->is_instanceRef(), "should be reference object");
-      next_d = java_lang_ref_Reference::discovered(obj);
-      if (TraceReferenceGC && PrintGCDetails) {
-        gclog_or_tty->print_cr("        obj " INTPTR_FORMAT "/next_d " INTPTR_FORMAT,
-                               p2i(obj), p2i(next_d));
-      }
-      assert(java_lang_ref_Reference::next(obj) == NULL,
-             "Reference not active; should not be discovered");
-      // Self-loop next, so as to make Ref not active.
-      java_lang_ref_Reference::set_next_raw(obj, obj);
-      if (next_d != obj) {
-        oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), next_d);
-      } else {
-        // This is the last object.
-        // Swap refs_list into pending_list_addr and
-        // set obj's discovered to what we read from pending_list_addr.
-        oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
-        // Need post-barrier on pending_list_addr. See enqueue_discovered_ref_helper() above.
-        java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL
-        oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old);
-      }
+  // Walk down the list, self-looping the next field
+  // so that the References are not considered active.
+  while (obj != next_d) {
+    obj = next_d;
+    assert(obj->is_instanceRef(), "should be reference object");
+    next_d = java_lang_ref_Reference::discovered(obj);
+    if (TraceReferenceGC && PrintGCDetails) {
+      gclog_or_tty->print_cr("        obj " INTPTR_FORMAT "/next_d " INTPTR_FORMAT,
+                             p2i(obj), p2i(next_d));
     }
-  } else { // Old behavior
-    // Walk down the list, copying the discovered field into
-    // the next field and clearing the discovered field.
-    while (obj != next_d) {
-      obj = next_d;
-      assert(obj->is_instanceRef(), "should be reference object");
-      next_d = java_lang_ref_Reference::discovered(obj);
-      if (TraceReferenceGC && PrintGCDetails) {
-        gclog_or_tty->print_cr("        obj " INTPTR_FORMAT "/next_d " INTPTR_FORMAT,
-                               p2i(obj), p2i(next_d));
-      }
-      assert(java_lang_ref_Reference::next(obj) == NULL,
-             "The reference should not be enqueued");
-      if (next_d == obj) {  // obj is last
-        // Swap refs_list into pending_list_addr and
-        // set obj's next to what we read from pending_list_addr.
-        oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
-        // Need oop_check on pending_list_addr above;
-        // see special oop-check code at the end of
-        // enqueue_discovered_reflists() further below.
-        if (old == NULL) {
-          // obj should be made to point to itself, since
-          // pending list was empty.
-          java_lang_ref_Reference::set_next(obj, obj);
-        } else {
-          java_lang_ref_Reference::set_next(obj, old);
-        }
-      } else {
-        java_lang_ref_Reference::set_next(obj, next_d);
-      }
-      java_lang_ref_Reference::set_discovered(obj, (oop) NULL);
+    assert(java_lang_ref_Reference::next(obj) == NULL,
+           "Reference not active; should not be discovered");
+    // Self-loop next, so as to make Ref not active.
+    java_lang_ref_Reference::set_next_raw(obj, obj);
+    if (next_d != obj) {
+      oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), next_d);
+    } else {
+      // This is the last object.
+      // Swap refs_list into pending_list_addr and
+      // set obj's discovered to what we read from pending_list_addr.
+      oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
+      // Need post-barrier on pending_list_addr. See enqueue_discovered_ref_helper() above.
+      java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL
+      oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old);
     }
   }
 }
@@ -515,22 +475,6 @@
   _refs_list.dec_length(1);
 }
 
-// Make the Reference object active again.
-void DiscoveredListIterator::make_active() {
-  // The pre barrier for G1 is probably just needed for the old
-  // reference processing behavior. Should we guard this with
-  // ReferenceProcessor::pending_list_uses_discovered_field() ?
-  if (UseG1GC) {
-    HeapWord* next_addr = java_lang_ref_Reference::next_addr(_ref);
-    if (UseCompressedOops) {
-      oopDesc::bs()->write_ref_field_pre((narrowOop*)next_addr, NULL);
-    } else {
-      oopDesc::bs()->write_ref_field_pre((oop*)next_addr, NULL);
-    }
-  }
-  java_lang_ref_Reference::set_next_raw(_ref, NULL);
-}
-
 void DiscoveredListIterator::clear_referent() {
   oop_store_raw(_referent_addr, NULL);
 }
@@ -567,8 +511,6 @@
       }
       // Remove Reference object from list
       iter.remove();
-      // Make the Reference object active again
-      iter.make_active();
       // keep the referent around
       iter.make_referent_alive();
       iter.move_to_next();
--- a/hotspot/src/share/vm/memory/referenceProcessor.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/referenceProcessor.hpp	Mon May 04 17:10:50 2015 +0200
@@ -161,9 +161,6 @@
   // Remove the current reference from the list
   void remove();
 
-  // Make the Reference object active again.
-  void make_active();
-
   // Make the referent alive.
   inline void make_referent_alive() {
     if (UseCompressedOops) {
@@ -200,9 +197,6 @@
   size_t total_count(DiscoveredList lists[]);
 
  protected:
-  // Compatibility with pre-4965777 JDK's
-  static bool _pending_list_uses_discovered_field;
-
   // The SoftReference master timestamp clock
   static jlong _soft_ref_timestamp_clock;
 
@@ -421,13 +415,6 @@
   bool discovery_is_atomic() const { return _discovery_is_atomic; }
   void set_atomic_discovery(bool atomic) { _discovery_is_atomic = atomic; }
 
-  // whether the JDK in which we are embedded is a pre-4965777 JDK,
-  // and thus whether or not it uses the discovered field to chain
-  // the entries in the pending list.
-  static bool pending_list_uses_discovered_field() {
-    return _pending_list_uses_discovered_field;
-  }
-
   // whether discovery is done by multiple threads same-old-timeously
   bool discovery_is_mt() const { return _discovery_is_mt; }
   void set_mt_discovery(bool mt) { _discovery_is_mt = mt; }
--- a/hotspot/src/share/vm/memory/referenceType.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/memory/referenceType.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_VM_MEMORY_REFRERENCETYPE_HPP
-#define SHARE_VM_MEMORY_REFRERENCETYPE_HPP
+#ifndef SHARE_VM_MEMORY_REFERENCETYPE_HPP
+#define SHARE_VM_MEMORY_REFERENCETYPE_HPP
 
 #include "utilities/debug.hpp"
 
@@ -39,4 +39,4 @@
   REF_CLEANER    // Subclass of sun/misc/Cleaner
 };
 
-#endif // SHARE_VM_MEMORY_REFRERENCETYPE_HPP
+#endif // SHARE_VM_MEMORY_REFERENCETYPE_HPP
--- a/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -55,30 +55,17 @@
     }
   }
   T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
-  if (ReferenceProcessor::pending_list_uses_discovered_field()) {
-    T next_oop  = oopDesc::load_heap_oop(next_addr);
-    // Treat discovered as normal oop, if ref is not "active" (next non-NULL)
-    if (!oopDesc::is_null(next_oop) && contains(disc_addr)) {
-      // i.e. ref is not "active"
-      debug_only(
-        if(TraceReferenceGC && PrintGCDetails) {
-          gclog_or_tty->print_cr("   Process discovered as normal "
-                                 PTR_FORMAT, p2i(disc_addr));
-        }
-      )
-      Devirtualizer<nv>::do_oop(closure, disc_addr);
-    }
-  } else {
-    // In the case of older JDKs which do not use the discovered field for
-    // the pending list, an inactive ref (next != NULL) must always have a
-    // NULL discovered field.
+  T next_oop  = oopDesc::load_heap_oop(next_addr);
+  // Treat discovered as normal oop, if ref is not "active" (next non-NULL)
+  if (!oopDesc::is_null(next_oop) && contains(disc_addr)) {
+    // i.e. ref is not "active"
     debug_only(
-      T next_oop = oopDesc::load_heap_oop(next_addr);
-      T disc_oop = oopDesc::load_heap_oop(disc_addr);
-      assert(oopDesc::is_null(next_oop) || oopDesc::is_null(disc_oop),
-           err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL"
-                   "discovered field", p2i(obj)));
+      if(TraceReferenceGC && PrintGCDetails) {
+        gclog_or_tty->print_cr("   Process discovered as normal "
+                               PTR_FORMAT, p2i(disc_addr));
+      }
     )
+    Devirtualizer<nv>::do_oop(closure, disc_addr);
   }
   // treat next as normal oop
   if (contains(next_addr)) {
--- a/hotspot/src/share/vm/runtime/java.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/runtime/java.cpp	Mon May 04 17:10:50 2015 +0200
@@ -651,11 +651,15 @@
       minor = micro;
       micro = 0;
     }
+    // Incompatible with pre-4243978 JDK.
+    if (info.pending_list_uses_discovered_field == 0) {
+      vm_exit_during_initialization(
+        "Incompatible JDK is not using Reference.discovered field for pending list");
+    }
     _current = JDK_Version(major, minor, micro, info.update_version,
                            info.special_update_version, build,
                            info.thread_park_blocker == 1,
-                           info.post_vm_init_hook_enabled == 1,
-                           info.pending_list_uses_discovered_field == 1);
+                           info.post_vm_init_hook_enabled == 1);
   }
 }
 
--- a/hotspot/src/share/vm/runtime/java.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/runtime/java.hpp	Mon May 04 17:10:50 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -91,7 +91,6 @@
   bool _partially_initialized;
 
   bool _thread_park_blocker;
-  bool _pending_list_uses_discovered_field;
   bool _post_vm_init_hook_enabled;
 
   bool is_valid() const {
@@ -114,18 +113,17 @@
 
   JDK_Version() : _major(0), _minor(0), _micro(0), _update(0),
                   _special(0), _build(0), _partially_initialized(false),
-                  _thread_park_blocker(false), _post_vm_init_hook_enabled(false),
-                  _pending_list_uses_discovered_field(false) {}
+                  _thread_park_blocker(false), _post_vm_init_hook_enabled(false)
+                  {}
 
   JDK_Version(uint8_t major, uint8_t minor = 0, uint8_t micro = 0,
               uint8_t update = 0, uint8_t special = 0, uint8_t build = 0,
-              bool thread_park_blocker = false, bool post_vm_init_hook_enabled = false,
-              bool pending_list_uses_discovered_field = false) :
+              bool thread_park_blocker = false, bool post_vm_init_hook_enabled = false) :
       _major(major), _minor(minor), _micro(micro), _update(update),
       _special(special), _build(build), _partially_initialized(false),
       _thread_park_blocker(thread_park_blocker),
-      _post_vm_init_hook_enabled(post_vm_init_hook_enabled),
-      _pending_list_uses_discovered_field(pending_list_uses_discovered_field) {}
+      _post_vm_init_hook_enabled(post_vm_init_hook_enabled)
+      {}
 
   // Returns the current running JDK version
   static JDK_Version current() { return _current; }
@@ -152,10 +150,6 @@
   bool post_vm_init_hook_enabled() const {
     return _post_vm_init_hook_enabled;
   }
-  // For compatibility wrt pre-4965777 JDK's
-  bool pending_list_uses_discovered_field() const {
-    return _pending_list_uses_discovered_field;
-  }
 
   // Performs a full ordering comparison using all fields (update, build, etc.)
   int compare(const JDK_Version& other) const;
--- a/hotspot/src/share/vm/runtime/thread.cpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Mon May 04 17:10:50 2015 +0200
@@ -4210,13 +4210,13 @@
                Abstract_VM_Version::vm_info_string());
   st->cr();
 
-#if INCLUDE_ALL_GCS
+#if INCLUDE_SERVICES
   // Dump concurrent locks
   ConcurrentLocksDump concurrent_locks;
   if (print_concurrent_locks) {
     concurrent_locks.dump_at_safepoint();
   }
-#endif // INCLUDE_ALL_GCS
+#endif // INCLUDE_SERVICES
 
   ALL_JAVA_THREADS(p) {
     ResourceMark rm;
@@ -4229,11 +4229,11 @@
       }
     }
     st->cr();
-#if INCLUDE_ALL_GCS
+#if INCLUDE_SERVICES
     if (print_concurrent_locks) {
       concurrent_locks.print_locks_on(p, st);
     }
-#endif // INCLUDE_ALL_GCS
+#endif // INCLUDE_SERVICES
   }
 
   VMThread::vm_thread()->print_on(st);
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp	Mon May 04 17:10:50 2015 +0200
@@ -26,9 +26,6 @@
 #define SHARE_VM_UTILITIES_TASKQUEUE_HPP
 
 #include "memory/allocation.hpp"
-#include "memory/allocation.inline.hpp"
-#include "runtime/mutex.hpp"
-#include "runtime/orderAccess.inline.hpp"
 #include "utilities/stack.hpp"
 
 // Simple TaskQueue stats that are collected by default in debug builds.
@@ -134,11 +131,7 @@
       if (_fields._top == 0) ++_fields._tag;
     }
 
-    Age cmpxchg(const Age new_age, const Age old_age) volatile {
-      return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data,
-                                          (volatile intptr_t *)&_data,
-                                          (intptr_t)old_age._data);
-    }
+    Age cmpxchg(const Age new_age, const Age old_age) volatile;
 
     bool operator ==(const Age& other) const { return _data == other._data; }
 
@@ -315,121 +308,6 @@
   assert(sizeof(Age) == sizeof(size_t), "Depends on this.");
 }
 
-template<class E, MEMFLAGS F, unsigned int N>
-void GenericTaskQueue<E, F, N>::initialize() {
-  _elems = _array_allocator.allocate(N);
-}
-
-template<class E, MEMFLAGS F, unsigned int N>
-void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) {
-  // tty->print_cr("START OopTaskQueue::oops_do");
-  uint iters = size();
-  uint index = _bottom;
-  for (uint i = 0; i < iters; ++i) {
-    index = decrement_index(index);
-    // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
-    //            index, &_elems[index], _elems[index]);
-    E* t = (E*)&_elems[index];      // cast away volatility
-    oop* p = (oop*)t;
-    assert((*t)->is_oop_or_null(), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(*t)));
-    f->do_oop(p);
-  }
-  // tty->print_cr("END OopTaskQueue::oops_do");
-}
-
-template<class E, MEMFLAGS F, unsigned int N>
-bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) {
-  if (dirty_n_elems == N - 1) {
-    // Actually means 0, so do the push.
-    uint localBot = _bottom;
-    // g++ complains if the volatile result of the assignment is
-    // unused, so we cast the volatile away.  We cannot cast directly
-    // to void, because gcc treats that as not using the result of the
-    // assignment.  However, casting to E& means that we trigger an
-    // unused-value warning.  So, we cast the E& to void.
-    (void)const_cast<E&>(_elems[localBot] = t);
-    OrderAccess::release_store(&_bottom, increment_index(localBot));
-    TASKQUEUE_STATS_ONLY(stats.record_push());
-    return true;
-  }
-  return false;
-}
-
-// pop_local_slow() is done by the owning thread and is trying to
-// get the last task in the queue.  It will compete with pop_global()
-// that will be used by other threads.  The tag age is incremented
-// whenever the queue goes empty which it will do here if this thread
-// gets the last task or in pop_global() if the queue wraps (top == 0
-// and pop_global() succeeds, see pop_global()).
-template<class E, MEMFLAGS F, unsigned int N>
-bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) {
-  // This queue was observed to contain exactly one element; either this
-  // thread will claim it, or a competing "pop_global".  In either case,
-  // the queue will be logically empty afterwards.  Create a new Age value
-  // that represents the empty queue for the given value of "_bottom".  (We
-  // must also increment "tag" because of the case where "bottom == 1",
-  // "top == 0".  A pop_global could read the queue element in that case,
-  // then have the owner thread do a pop followed by another push.  Without
-  // the incrementing of "tag", the pop_global's CAS could succeed,
-  // allowing it to believe it has claimed the stale element.)
-  Age newAge((idx_t)localBot, oldAge.tag() + 1);
-  // Perhaps a competing pop_global has already incremented "top", in which
-  // case it wins the element.
-  if (localBot == oldAge.top()) {
-    // No competing pop_global has yet incremented "top"; we'll try to
-    // install new_age, thus claiming the element.
-    Age tempAge = _age.cmpxchg(newAge, oldAge);
-    if (tempAge == oldAge) {
-      // We win.
-      assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
-      TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
-      return true;
-    }
-  }
-  // We lose; a completing pop_global gets the element.  But the queue is empty
-  // and top is greater than bottom.  Fix this representation of the empty queue
-  // to become the canonical one.
-  _age.set(newAge);
-  assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
-  return false;
-}
-
-template<class E, MEMFLAGS F, unsigned int N>
-bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
-  Age oldAge = _age.get();
-  // Architectures with weak memory model require a barrier here
-  // to guarantee that bottom is not older than age,
-  // which is crucial for the correctness of the algorithm.
-#if !(defined SPARC || defined IA32 || defined AMD64)
-  OrderAccess::fence();
-#endif
-  uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom);
-  uint n_elems = size(localBot, oldAge.top());
-  if (n_elems == 0) {
-    return false;
-  }
-
-  // g++ complains if the volatile result of the assignment is
-  // unused, so we cast the volatile away.  We cannot cast directly
-  // to void, because gcc treats that as not using the result of the
-  // assignment.  However, casting to E& means that we trigger an
-  // unused-value warning.  So, we cast the E& to void.
-  (void) const_cast<E&>(t = _elems[oldAge.top()]);
-  Age newAge(oldAge);
-  newAge.increment();
-  Age resAge = _age.cmpxchg(newAge, oldAge);
-
-  // Note that using "_bottom" here might fail, since a pop_local might
-  // have decremented it.
-  assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
-  return resAge == oldAge;
-}
-
-template<class E, MEMFLAGS F, unsigned int N>
-GenericTaskQueue<E, F, N>::~GenericTaskQueue() {
-  FREE_C_HEAP_ARRAY(E, _elems);
-}
-
 // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for
 // elements that do not fit in the TaskQueue.
 //
@@ -468,24 +346,6 @@
   overflow_t _overflow_stack;
 };
 
-template <class E, MEMFLAGS F, unsigned int N>
-bool OverflowTaskQueue<E, F, N>::push(E t)
-{
-  if (!taskqueue_t::push(t)) {
-    overflow_stack()->push(t);
-    TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size()));
-  }
-  return true;
-}
-
-template <class E, MEMFLAGS F, unsigned int N>
-bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t)
-{
-  if (overflow_empty()) return false;
-  t = overflow_stack()->pop();
-  return true;
-}
-
 class TaskQueueSetSuper {
 protected:
   static int randomParkAndMiller(int* seed0);
@@ -506,13 +366,7 @@
 public:
   typedef typename T::element_type E;
 
-  GenericTaskQueueSet(int n) : _n(n) {
-    typedef T* GenericTaskQueuePtr;
-    _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F);
-    for (int i = 0; i < n; i++) {
-      _queues[i] = NULL;
-    }
-  }
+  GenericTaskQueueSet(int n);
 
   bool steal_best_of_2(uint queue_num, int* seed, E& t);
 
@@ -541,40 +395,6 @@
   return _queues[i];
 }
 
-template<class T, MEMFLAGS F> bool
-GenericTaskQueueSet<T, F>::steal(uint queue_num, int* seed, E& t) {
-  for (uint i = 0; i < 2 * _n; i++) {
-    if (steal_best_of_2(queue_num, seed, t)) {
-      TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true));
-      return true;
-    }
-  }
-  TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false));
-  return false;
-}
-
-template<class T, MEMFLAGS F> bool
-GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) {
-  if (_n > 2) {
-    uint k1 = queue_num;
-    while (k1 == queue_num) k1 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
-    uint k2 = queue_num;
-    while (k2 == queue_num || k2 == k1) k2 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
-    // Sample both and try the larger.
-    uint sz1 = _queues[k1]->size();
-    uint sz2 = _queues[k2]->size();
-    if (sz2 > sz1) return _queues[k2]->pop_global(t);
-    else return _queues[k1]->pop_global(t);
-  } else if (_n == 2) {
-    // Just try the other one.
-    uint k = (queue_num + 1) % 2;
-    return _queues[k]->pop_global(t);
-  } else {
-    assert(_n == 1, "can't be zero.");
-    return false;
-  }
-}
-
 template<class T, MEMFLAGS F>
 bool GenericTaskQueueSet<T, F>::peek() {
   // Try all the queues.
@@ -649,65 +469,6 @@
 #endif
 };
 
-template<class E, MEMFLAGS F, unsigned int N> inline bool
-GenericTaskQueue<E, F, N>::push(E t) {
-  uint localBot = _bottom;
-  assert(localBot < N, "_bottom out of range.");
-  idx_t top = _age.top();
-  uint dirty_n_elems = dirty_size(localBot, top);
-  assert(dirty_n_elems < N, "n_elems out of range.");
-  if (dirty_n_elems < max_elems()) {
-    // g++ complains if the volatile result of the assignment is
-    // unused, so we cast the volatile away.  We cannot cast directly
-    // to void, because gcc treats that as not using the result of the
-    // assignment.  However, casting to E& means that we trigger an
-    // unused-value warning.  So, we cast the E& to void.
-    (void) const_cast<E&>(_elems[localBot] = t);
-    OrderAccess::release_store(&_bottom, increment_index(localBot));
-    TASKQUEUE_STATS_ONLY(stats.record_push());
-    return true;
-  } else {
-    return push_slow(t, dirty_n_elems);
-  }
-}
-
-template<class E, MEMFLAGS F, unsigned int N> inline bool
-GenericTaskQueue<E, F, N>::pop_local(volatile E& t) {
-  uint localBot = _bottom;
-  // This value cannot be N-1.  That can only occur as a result of
-  // the assignment to bottom in this method.  If it does, this method
-  // resets the size to 0 before the next call (which is sequential,
-  // since this is pop_local.)
-  uint dirty_n_elems = dirty_size(localBot, _age.top());
-  assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
-  if (dirty_n_elems == 0) return false;
-  localBot = decrement_index(localBot);
-  _bottom = localBot;
-  // This is necessary to prevent any read below from being reordered
-  // before the store just above.
-  OrderAccess::fence();
-  // g++ complains if the volatile result of the assignment is
-  // unused, so we cast the volatile away.  We cannot cast directly
-  // to void, because gcc treats that as not using the result of the
-  // assignment.  However, casting to E& means that we trigger an
-  // unused-value warning.  So, we cast the E& to void.
-  (void) const_cast<E&>(t = _elems[localBot]);
-  // This is a second read of "age"; the "size()" above is the first.
-  // If there's still at least one element in the queue, based on the
-  // "_bottom" and "age" we've read, then there can be no interference with
-  // a "pop_global" operation, and we're done.
-  idx_t tp = _age.top();    // XXX
-  if (size(localBot, tp) > 0) {
-    assert(dirty_size(localBot, tp) != N - 1, "sanity");
-    TASKQUEUE_STATS_ONLY(stats.record_pop());
-    return true;
-  } else {
-    // Otherwise, the queue contained exactly one element; we take the slow
-    // path.
-    return pop_local_slow(localBot, _age.get());
-  }
-}
-
 typedef GenericTaskQueue<oop, mtGC>             OopTaskQueue;
 typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/utilities/taskqueue.inline.hpp	Mon May 04 17:10:50 2015 +0200
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_UTILITIES_TASKQUEUE_INLINE_HPP
+#define SHARE_VM_UTILITIES_TASKQUEUE_INLINE_HPP
+
+#include "memory/allocation.inline.hpp"
+#include "oops/oop.inline.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/taskqueue.hpp"
+#include "utilities/stack.inline.hpp"
+#include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
+
+template <class T, MEMFLAGS F>
+inline GenericTaskQueueSet<T, F>::GenericTaskQueueSet(int n) : _n(n) {
+  typedef T* GenericTaskQueuePtr;
+  _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F);
+  for (int i = 0; i < n; i++) {
+    _queues[i] = NULL;
+  }
+}
+
+template<class E, MEMFLAGS F, unsigned int N>
+inline void GenericTaskQueue<E, F, N>::initialize() {
+  _elems = _array_allocator.allocate(N);
+}
+
+template<class E, MEMFLAGS F, unsigned int N>
+inline GenericTaskQueue<E, F, N>::~GenericTaskQueue() {
+  FREE_C_HEAP_ARRAY(E, _elems);
+}
+
+template<class E, MEMFLAGS F, unsigned int N>
+bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) {
+  if (dirty_n_elems == N - 1) {
+    // Actually means 0, so do the push.
+    uint localBot = _bottom;
+    // g++ complains if the volatile result of the assignment is
+    // unused, so we cast the volatile away.  We cannot cast directly
+    // to void, because gcc treats that as not using the result of the
+    // assignment.  However, casting to E& means that we trigger an
+    // unused-value warning.  So, we cast the E& to void.
+    (void)const_cast<E&>(_elems[localBot] = t);
+    OrderAccess::release_store(&_bottom, increment_index(localBot));
+    TASKQUEUE_STATS_ONLY(stats.record_push());
+    return true;
+  }
+  return false;
+}
+
+template<class E, MEMFLAGS F, unsigned int N> inline bool
+GenericTaskQueue<E, F, N>::push(E t) {
+  uint localBot = _bottom;
+  assert(localBot < N, "_bottom out of range.");
+  idx_t top = _age.top();
+  uint dirty_n_elems = dirty_size(localBot, top);
+  assert(dirty_n_elems < N, "n_elems out of range.");
+  if (dirty_n_elems < max_elems()) {
+    // g++ complains if the volatile result of the assignment is
+    // unused, so we cast the volatile away.  We cannot cast directly
+    // to void, because gcc treats that as not using the result of the
+    // assignment.  However, casting to E& means that we trigger an
+    // unused-value warning.  So, we cast the E& to void.
+    (void) const_cast<E&>(_elems[localBot] = t);
+    OrderAccess::release_store(&_bottom, increment_index(localBot));
+    TASKQUEUE_STATS_ONLY(stats.record_push());
+    return true;
+  } else {
+    return push_slow(t, dirty_n_elems);
+  }
+}
+
+template <class E, MEMFLAGS F, unsigned int N>
+inline bool OverflowTaskQueue<E, F, N>::push(E t)
+{
+  if (!taskqueue_t::push(t)) {
+    overflow_stack()->push(t);
+    TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size()));
+  }
+  return true;
+}
+
+// pop_local_slow() is done by the owning thread and is trying to
+// get the last task in the queue.  It will compete with pop_global()
+// that will be used by other threads.  The tag age is incremented
+// whenever the queue goes empty which it will do here if this thread
+// gets the last task or in pop_global() if the queue wraps (top == 0
+// and pop_global() succeeds, see pop_global()).
+template<class E, MEMFLAGS F, unsigned int N>
+bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) {
+  // This queue was observed to contain exactly one element; either this
+  // thread will claim it, or a competing "pop_global".  In either case,
+  // the queue will be logically empty afterwards.  Create a new Age value
+  // that represents the empty queue for the given value of "_bottom".  (We
+  // must also increment "tag" because of the case where "bottom == 1",
+  // "top == 0".  A pop_global could read the queue element in that case,
+  // then have the owner thread do a pop followed by another push.  Without
+  // the incrementing of "tag", the pop_global's CAS could succeed,
+  // allowing it to believe it has claimed the stale element.)
+  Age newAge((idx_t)localBot, oldAge.tag() + 1);
+  // Perhaps a competing pop_global has already incremented "top", in which
+  // case it wins the element.
+  if (localBot == oldAge.top()) {
+    // No competing pop_global has yet incremented "top"; we'll try to
+    // install new_age, thus claiming the element.
+    Age tempAge = _age.cmpxchg(newAge, oldAge);
+    if (tempAge == oldAge) {
+      // We win.
+      assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
+      TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
+      return true;
+    }
+  }
+  // We lose; a completing pop_global gets the element.  But the queue is empty
+  // and top is greater than bottom.  Fix this representation of the empty queue
+  // to become the canonical one.
+  _age.set(newAge);
+  assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
+  return false;
+}
+
+template<class E, MEMFLAGS F, unsigned int N> inline bool
+GenericTaskQueue<E, F, N>::pop_local(volatile E& t) {
+  uint localBot = _bottom;
+  // This value cannot be N-1.  That can only occur as a result of
+  // the assignment to bottom in this method.  If it does, this method
+  // resets the size to 0 before the next call (which is sequential,
+  // since this is pop_local.)
+  uint dirty_n_elems = dirty_size(localBot, _age.top());
+  assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
+  if (dirty_n_elems == 0) return false;
+  localBot = decrement_index(localBot);
+  _bottom = localBot;
+  // This is necessary to prevent any read below from being reordered
+  // before the store just above.
+  OrderAccess::fence();
+  // g++ complains if the volatile result of the assignment is
+  // unused, so we cast the volatile away.  We cannot cast directly
+  // to void, because gcc treats that as not using the result of the
+  // assignment.  However, casting to E& means that we trigger an
+  // unused-value warning.  So, we cast the E& to void.
+  (void) const_cast<E&>(t = _elems[localBot]);
+  // This is a second read of "age"; the "size()" above is the first.
+  // If there's still at least one element in the queue, based on the
+  // "_bottom" and "age" we've read, then there can be no interference with
+  // a "pop_global" operation, and we're done.
+  idx_t tp = _age.top();    // XXX
+  if (size(localBot, tp) > 0) {
+    assert(dirty_size(localBot, tp) != N - 1, "sanity");
+    TASKQUEUE_STATS_ONLY(stats.record_pop());
+    return true;
+  } else {
+    // Otherwise, the queue contained exactly one element; we take the slow
+    // path.
+    return pop_local_slow(localBot, _age.get());
+  }
+}
+
+template <class E, MEMFLAGS F, unsigned int N>
+bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t)
+{
+  if (overflow_empty()) return false;
+  t = overflow_stack()->pop();
+  return true;
+}
+
+template<class E, MEMFLAGS F, unsigned int N>
+bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
+  Age oldAge = _age.get();
+  // Architectures with weak memory model require a barrier here
+  // to guarantee that bottom is not older than age,
+  // which is crucial for the correctness of the algorithm.
+#if !(defined SPARC || defined IA32 || defined AMD64)
+  OrderAccess::fence();
+#endif
+  uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom);
+  uint n_elems = size(localBot, oldAge.top());
+  if (n_elems == 0) {
+    return false;
+  }
+
+  // g++ complains if the volatile result of the assignment is
+  // unused, so we cast the volatile away.  We cannot cast directly
+  // to void, because gcc treats that as not using the result of the
+  // assignment.  However, casting to E& means that we trigger an
+  // unused-value warning.  So, we cast the E& to void.
+  (void) const_cast<E&>(t = _elems[oldAge.top()]);
+  Age newAge(oldAge);
+  newAge.increment();
+  Age resAge = _age.cmpxchg(newAge, oldAge);
+
+  // Note that using "_bottom" here might fail, since a pop_local might
+  // have decremented it.
+  assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
+  return resAge == oldAge;
+}
+
+template<class T, MEMFLAGS F> bool
+GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) {
+  if (_n > 2) {
+    uint k1 = queue_num;
+    while (k1 == queue_num) k1 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
+    uint k2 = queue_num;
+    while (k2 == queue_num || k2 == k1) k2 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
+    // Sample both and try the larger.
+    uint sz1 = _queues[k1]->size();
+    uint sz2 = _queues[k2]->size();
+    if (sz2 > sz1) return _queues[k2]->pop_global(t);
+    else return _queues[k1]->pop_global(t);
+  } else if (_n == 2) {
+    // Just try the other one.
+    uint k = (queue_num + 1) % 2;
+    return _queues[k]->pop_global(t);
+  } else {
+    assert(_n == 1, "can't be zero.");
+    return false;
+  }
+}
+
+template<class T, MEMFLAGS F> bool
+GenericTaskQueueSet<T, F>::steal(uint queue_num, int* seed, E& t) {
+  for (uint i = 0; i < 2 * _n; i++) {
+    if (steal_best_of_2(queue_num, seed, t)) {
+      TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true));
+      return true;
+    }
+  }
+  TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false));
+  return false;
+}
+
+template <unsigned int N, MEMFLAGS F>
+inline typename TaskQueueSuper<N, F>::Age TaskQueueSuper<N, F>::Age::cmpxchg(const Age new_age, const Age old_age) volatile {
+  return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data,
+                                      (volatile intptr_t *)&_data,
+                                      (intptr_t)old_age._data);
+}
+
+template<class E, MEMFLAGS F, unsigned int N>
+inline void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) {
+  // tty->print_cr("START OopTaskQueue::oops_do");
+  uint iters = size();
+  uint index = _bottom;
+  for (uint i = 0; i < iters; ++i) {
+    index = decrement_index(index);
+    // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
+    //            index, &_elems[index], _elems[index]);
+    E* t = (E*)&_elems[index];      // cast away volatility
+    oop* p = (oop*)t;
+    assert((*t)->is_oop_or_null(), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(*t)));
+    f->do_oop(p);
+  }
+  // tty->print_cr("END OopTaskQueue::oops_do");
+}
+
+
+#endif // SHARE_VM_UTILITIES_TASKQUEUE_INLINE_HPP
--- a/hotspot/test/Makefile	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/Makefile	Mon May 04 17:10:50 2015 +0200
@@ -28,8 +28,6 @@
 
 ALT_MAKE ?= closed
 
--include $(ALT_MAKE)/Makefile
-
 GETMIXEDPATH=echo
 
 # Utilities used
@@ -306,6 +304,8 @@
 endif
 JTREG_BASIC_OPTIONS += $(JTREG_KEY_OPTION)
 
+-include $(ALT_MAKE)/Makefile
+
 # Make sure jtreg exists
 $(JTREG): $(JT_HOME)
 
@@ -401,15 +401,6 @@
 
 ################################################################
 
-# internalvmtests (run internal unit tests inside the VM)
-
-hotspot_internalvmtests internalvmtests: prep $(PRODUCT_HOME)
-	$(PRODUCT_HOME)/bin/java $(JAVA_OPTIONS) -XX:+ExecuteInternalVMTests -version
-
-PHONY_LIST += hotspot_internalvmtests internalvmtests
-
-################################################################
-
 # Phony targets (e.g. these are not filenames)
 .PHONY: all clean prep $(PHONY_LIST)
 
--- a/hotspot/test/TEST.groups	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/TEST.groups	Mon May 04 17:10:50 2015 +0200
@@ -233,6 +233,7 @@
   gc/arguments/TestParallelGCThreads.java \
   gc/arguments/TestUseCompressedOopsErgo.java \
   gc/class_unloading/TestG1ClassUnloadingHWM.java \
+  gc/ergonomics/TestDynamicNumberOfGCThreads.java
   gc/g1/ \
   gc/metaspace/G1AddMetaspaceDependency.java \
   gc/metaspace/TestMetaspacePerfCounters.java \
@@ -262,6 +263,7 @@
   gc/arguments/TestMinInitialErgonomics.java \
   gc/arguments/TestParallelGCThreads.java \
   gc/arguments/TestUseCompressedOopsErgo.java \
+  gc/ergonomics/TestDynamicNumberOfGCThreads.java
   gc/metaspace/TestMetaspacePerfCounters.java \
   gc/parallelScavenge/ \
   gc/startup_warnings/TestParallelGC.java \
@@ -279,6 +281,7 @@
   gc/arguments/TestUseCompressedOopsErgo.java \
   gc/class_unloading/TestCMSClassUnloadingEnabledHWM.java \
   gc/concurrentMarkSweep/ \
+  gc/ergonomics/TestDynamicNumberOfGCThreads.java
   gc/startup_warnings/TestCMS.java \
   gc/startup_warnings/TestDefNewCMS.java \
   gc/startup_warnings/TestParNewCMS.java
--- a/hotspot/test/gc/TestSoftReferencesBehaviorOnOOME.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/TestSoftReferencesBehaviorOnOOME.java	Mon May 04 17:10:50 2015 +0200
@@ -28,29 +28,30 @@
  * @library /testlibrary
  * @modules java.base/sun.misc
  *          java.management
- * @ignore 8073669
  * @build TestSoftReferencesBehaviorOnOOME
  * @run main/othervm -Xmx128m TestSoftReferencesBehaviorOnOOME 512 2k
  * @run main/othervm -Xmx128m TestSoftReferencesBehaviorOnOOME 128k 256k
- * @run main/othervm -Xmx128m TestSoftReferencesBehaviorOnOOME 2k 32k 10
+ * @run main/othervm -Xmx128m TestSoftReferencesBehaviorOnOOME 2k 32k
  */
 import com.oracle.java.testlibrary.Utils;
+import com.oracle.java.testlibrary.Asserts;
 import java.lang.ref.SoftReference;
 import java.util.LinkedList;
 import java.util.Random;
 
 public class TestSoftReferencesBehaviorOnOOME {
 
-    private static final Random rndGenerator = Utils.getRandomInstance();
-
+    /**
+     * Test generates a lot of soft references to objects with random payloads.
+     * Then it provokes OOME and checks that all SoftReferences has been gone
+     * @param args - [minSize] [maxSize] [freq]
+     *  where
+     *  - minSize - min size of random objects
+     *  - maxSize - max size of random objects
+     */
     public static void main(String[] args) {
-        int semiRefAllocFrequency = DEFAULT_FREQUENCY;
-        long minSize = DEFAULT_MIN_SIZE,
-                maxSize = DEFAULT_MAX_SIZE;
-
-        if ( args.length >= 3 ) {
-            semiRefAllocFrequency = Integer.parseInt(args[2]);
-        }
+        long minSize = DEFAULT_MIN_SIZE;
+        long maxSize = DEFAULT_MAX_SIZE;
 
         if ( args.length >= 2) {
             maxSize = getBytesCount(args[1]);
@@ -60,46 +61,49 @@
             minSize = getBytesCount(args[0]);
         }
 
-        new TestSoftReferencesBehaviorOnOOME().softReferencesOom(minSize, maxSize, semiRefAllocFrequency);
+        new TestSoftReferencesBehaviorOnOOME().softReferencesOom(minSize, maxSize);
     }
 
     /**
      * Test that all SoftReferences has been cleared at time of OOM.
      */
-    void softReferencesOom(long minSize, long maxSize, int semiRefAllocFrequency) {
-        System.out.format( "minSize = %d, maxSize = %d, freq = %d%n", minSize, maxSize, semiRefAllocFrequency );
-        long counter = 0;
+    void softReferencesOom(long minSize, long maxSize) {
+        System.out.format( "minSize = %d, maxSize = %d%n", minSize, maxSize );
+
+        LinkedList<SoftReference> arrSoftRefs = new LinkedList();
+        staticRef = arrSoftRefs;
+        LinkedList arrObjects = new LinkedList();
+        staticRef = arrObjects;
 
         long multiplier = maxSize - minSize;
-        LinkedList<SoftReference> arrSoftRefs = new LinkedList();
-        LinkedList arrObjects = new LinkedList();
         long numberOfNotNulledObjects = 0;
-        long oomSoftArraySize = 0;
 
         try {
-            while (true) {
-                // Keep every Xth object to make sure we hit OOM pretty fast
-                if (counter % semiRefAllocFrequency != 0) {
-                    long allocationSize = ((int) (rndGenerator.nextDouble() * multiplier))
-                            + minSize;
-                    arrObjects.add(new byte[(int)allocationSize]);
-                } else {
-                    arrSoftRefs.add(new SoftReference(new Object()));
-                }
+
+            // Lets allocate as many as we can - taking size of all SoftRerefences
+            // by minimum. So it can provoke some GC but we surely will allocate enough.
+            long numSofts = (long) ((0.95 * Runtime.getRuntime().totalMemory()) / minSize);
+            System.out.println("num Soft: " + numSofts);
+
+            while (numSofts-- > 0) {
+                int allocationSize = ((int) (RND_GENERATOR.nextDouble() * multiplier))
+                            + (int)minSize;
+                arrSoftRefs.add(new SoftReference(new byte[allocationSize]));
+            }
 
-                counter++;
-                if (counter == Long.MAX_VALUE) {
-                    counter = 0;
-                }
+            System.out.println("free: " + Runtime.getRuntime().freeMemory());
+
+            // provoke OOME.
+            while (true) {
+                arrObjects.add(new byte[(int) Runtime.getRuntime().totalMemory()]);
             }
+
         } catch (OutOfMemoryError oome) {
+
             // Clear allocated ballast, so we don't get another OOM.
-
+            staticRef = null;
             arrObjects = null;
-
-            // Get the number of soft refs first, so we don't trigger
-            // another OOM.
-            oomSoftArraySize = arrSoftRefs.size();
+            long oomSoftArraySize = arrSoftRefs.size();
 
             for (SoftReference sr : arrSoftRefs) {
                 Object o = sr.get();
@@ -111,15 +115,14 @@
 
             // Make sure we clear all refs before we return failure
             arrSoftRefs = null;
-
-            if (numberOfNotNulledObjects > 0) {
-                throw new RuntimeException(numberOfNotNulledObjects + " out of "
-                        + oomSoftArraySize + " SoftReferences was not "
-                        + "null at time of OutOfMemoryError");
-            }
+            Asserts.assertFalse(numberOfNotNulledObjects > 0,
+                    "" + numberOfNotNulledObjects + " out of "
+                    + oomSoftArraySize + " SoftReferences was not "
+                    + "null at time of OutOfMemoryError"
+            );
         } finally {
-            arrSoftRefs = null;
-            arrObjects = null;
+            Asserts.assertTrue(arrObjects == null, "OOME hasn't been provoked");
+            Asserts.assertTrue(arrSoftRefs == null, "OOME hasn't been provoked");
         }
     }
 
@@ -128,9 +131,7 @@
         long mod = 1;
 
         if (arg.trim().length() >= 2) {
-            mod = postfixes.indexOf(
-                    arg.trim().charAt(arg.length() - 1)
-            );
+            mod = postfixes.indexOf(arg.trim().charAt(arg.length() - 1));
 
             if (mod != -1) {
                 mod = (long) Math.pow(1024, mod+1);
@@ -143,7 +144,8 @@
         return Long.parseLong(arg) * mod;
     }
 
+    private static final Random RND_GENERATOR = Utils.getRandomInstance();
     private static final long DEFAULT_MIN_SIZE = 512;
     private static final long DEFAULT_MAX_SIZE = 1024;
-    private static final int DEFAULT_FREQUENCY = 4;
+    private static Object staticRef; // to prevent compile optimisations
 }
--- a/hotspot/test/gc/ergonomics/TestDynamicNumberOfGCThreads.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/ergonomics/TestDynamicNumberOfGCThreads.java	Mon May 04 17:10:50 2015 +0200
@@ -44,14 +44,24 @@
   }
 
   private static void verifyDynamicNumberOfGCThreads(OutputAnalyzer output) {
+    output.shouldHaveExitValue(0); // test should run succesfully
     output.shouldContain("new_active_workers");
-    output.shouldHaveExitValue(0);
   }
 
   private static void testDynamicNumberOfGCThreads(String gcFlag) throws Exception {
     // UseDynamicNumberOfGCThreads and TraceDynamicGCThreads enabled
-    ProcessBuilder pb_enabled =
-      ProcessTools.createJavaProcessBuilder("-XX:+" + gcFlag, "-Xmx10M", "-XX:+PrintGCDetails",  "-XX:+UseDynamicNumberOfGCThreads", "-XX:+TraceDynamicGCThreads", GCTest.class.getName());
+    String[] baseArgs = {"-XX:+" + gcFlag, "-Xmx10M", "-XX:+PrintGCDetails",  "-XX:+UseDynamicNumberOfGCThreads", "-XX:+TraceDynamicGCThreads", GCTest.class.getName()};
+
+    // Base test with gc and +UseDynamicNumberOfGCThreads:
+    ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder(baseArgs);
+    verifyDynamicNumberOfGCThreads(new OutputAnalyzer(pb_enabled.start()));
+
+    // Ensure it also works on uniprocessors or if user specifies -XX:ParallelGCThreads=1:
+    String[] extraArgs = {"-XX:+UnlockDiagnosticVMOptions", "-XX:+ForceDynamicNumberOfGCThreads", "-XX:ParallelGCThreads=1"};
+    String[] finalArgs = new String[baseArgs.length + extraArgs.length];
+    System.arraycopy(extraArgs, 0, finalArgs, 0,                extraArgs.length);
+    System.arraycopy(baseArgs,  0, finalArgs, extraArgs.length, baseArgs.length);
+    pb_enabled = ProcessTools.createJavaProcessBuilder(finalArgs);
     verifyDynamicNumberOfGCThreads(new OutputAnalyzer(pb_enabled.start()));
   }
 
--- a/hotspot/test/gc/g1/TestShrinkAuxiliaryData.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/g1/TestShrinkAuxiliaryData.java	Mon May 04 17:10:50 2015 +0200
@@ -76,7 +76,6 @@
         printTestInfo(maxCacheSize);
 
         vmOpts.add("-XX:G1ConcRSLogCacheSize=" + hotCardTableSize);
-        vmOpts.addAll(Arrays.asList(Utils.getTestJavaOpts()));
 
         // for 32 bits ObjectAlignmentInBytes is not a option
         if (Platform.is32bit()) {
@@ -98,7 +97,7 @@
 
     private void performTest(List<String> opts) throws Exception {
         ProcessBuilder pb
-                = ProcessTools.createJavaProcessBuilder(
+                = ProcessTools.createJavaProcessBuilder(true,
                         opts.toArray(new String[opts.size()])
                 );
 
--- a/hotspot/test/gc/g1/TestShrinkAuxiliaryData05.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/g1/TestShrinkAuxiliaryData05.java	Mon May 04 17:10:50 2015 +0200
@@ -23,7 +23,7 @@
 
 /**
  * @test TestShrinkAuxiliaryData05
- * @bug 8038423 8061715
+ * @bug 8038423 8061715 8078405
  * @summary Checks that decommitment occurs for JVM with different
  * G1ConcRSLogCacheSize and ObjectAlignmentInBytes options values
  * @requires vm.gc=="G1" | vm.gc=="null"
--- a/hotspot/test/gc/g1/TestShrinkAuxiliaryData10.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/g1/TestShrinkAuxiliaryData10.java	Mon May 04 17:10:50 2015 +0200
@@ -23,7 +23,7 @@
 
 /**
  * @test TestShrinkAuxiliaryData10
- * @bug 8038423 8061715
+ * @bug 8038423 8061715 8078405
  * @summary Checks that decommitment occurs for JVM with different
  * G1ConcRSLogCacheSize and ObjectAlignmentInBytes options values
  * @requires vm.gc=="G1" | vm.gc=="null"
--- a/hotspot/test/gc/g1/TestShrinkAuxiliaryData15.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/g1/TestShrinkAuxiliaryData15.java	Mon May 04 17:10:50 2015 +0200
@@ -23,7 +23,7 @@
 
 /**
  * @test TestShrinkAuxiliaryData15
- * @bug 8038423 8061715
+ * @bug 8038423 8061715 8078405
  * @summary Checks that decommitment occurs for JVM with different
  * G1ConcRSLogCacheSize and ObjectAlignmentInBytes options values
  * @requires vm.gc=="G1" | vm.gc=="null"
--- a/hotspot/test/gc/g1/TestShrinkAuxiliaryData20.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/g1/TestShrinkAuxiliaryData20.java	Mon May 04 17:10:50 2015 +0200
@@ -23,7 +23,7 @@
 
 /**
  * @test TestShrinkAuxiliaryData20
- * @bug 8038423 8061715
+ * @bug 8038423 8061715 8078405
  * @summary Checks that decommitment occurs for JVM with different
  * G1ConcRSLogCacheSize and ObjectAlignmentInBytes options values
  * @requires vm.gc=="G1" | vm.gc=="null"
--- a/hotspot/test/gc/g1/TestShrinkAuxiliaryData25.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/g1/TestShrinkAuxiliaryData25.java	Mon May 04 17:10:50 2015 +0200
@@ -23,7 +23,7 @@
 
 /**
  * @test TestShrinkAuxiliaryData25
- * @bug 8038423 8061715
+ * @bug 8038423 8061715 8078405
  * @summary Checks that decommitment occurs for JVM with different
  * G1ConcRSLogCacheSize and ObjectAlignmentInBytes options values
  * @requires vm.gc=="G1" | vm.gc=="null"
--- a/hotspot/test/gc/g1/TestShrinkAuxiliaryData30.java	Sat May 02 00:16:29 2015 -0700
+++ b/hotspot/test/gc/g1/TestShrinkAuxiliaryData30.java	Mon May 04 17:10:50 2015 +0200
@@ -23,7 +23,7 @@
 
 /**
  * @test TestShrinkAuxiliaryData30
- * @bug 8038423 8061715
+ * @bug 8038423 8061715 8078405
  * @summary Checks that decommitment occurs for JVM with different
  * G1ConcRSLogCacheSize and ObjectAlignmentInBytes options values
  * @requires vm.gc=="G1" | vm.gc=="null"