8060025: Object copy time regressions after JDK-8031323 and JDK-8057536
authortschatzl
Fri, 19 Dec 2014 09:21:06 +0100
changeset 28213 b0bf57cd1e9d
parent 28212 647b7d0efb88
child 28214 0b2894b5e67c
child 28215 4920c0fcdb07
8060025: Object copy time regressions after JDK-8031323 and JDK-8057536 Summary: Evaluate and improve object copy time by micro-optimizations and splitting out slow and fast paths aggressively. Reviewed-by: kbarrett, mgerdin, jmasa Contributed-by: Tony Printezis <tprintezis@twitter.com>, Thomas Schatzl <thomas.schatzl@oracle.com>
hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp
hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp
hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
hotspot/src/share/vm/gc_implementation/g1/g1InCSetState.hpp
hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp
hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Fri Dec 19 09:21:06 2014 +0100
@@ -254,25 +254,23 @@
 HeapRegion* SurvivorGCAllocRegion::allocate_new_region(size_t word_size,
                                                        bool force) {
   assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForSurvived);
+  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young);
 }
 
 void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region,
                                           size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes,
-                               GCAllocForSurvived);
+  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Young);
 }
 
 HeapRegion* OldGCAllocRegion::allocate_new_region(size_t word_size,
                                                   bool force) {
   assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForTenured);
+  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old);
 }
 
 void OldGCAllocRegion::retire_region(HeapRegion* alloc_region,
                                      size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes,
-                               GCAllocForTenured);
+  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Old);
 }
 
 HeapRegion* OldGCAllocRegion::release() {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp	Fri Dec 19 09:21:06 2014 +0100
@@ -113,15 +113,16 @@
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
   ParGCAllocBuffer(gclab_word_size), _retired(true) { }
 
-HeapWord* G1ParGCAllocator::allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) {
-  HeapWord* obj = NULL;
-  size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
+HeapWord* G1ParGCAllocator::allocate_direct_or_new_plab(InCSetState dest,
+                                                        size_t word_sz,
+                                                        AllocationContext_t context) {
+  size_t gclab_word_size = _g1h->desired_plab_sz(dest);
   if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
-    G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose, context);
+    G1ParGCAllocBuffer* alloc_buf = alloc_buffer(dest, context);
     add_to_alloc_buffer_waste(alloc_buf->words_remaining());
     alloc_buf->retire(false /* end_of_gc */, false /* retain */);
 
-    HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size, context);
+    HeapWord* buf = _g1h->par_allocate_during_gc(dest, gclab_word_size, context);
     if (buf == NULL) {
       return NULL; // Let caller handle allocation failure.
     }
@@ -129,30 +130,33 @@
     alloc_buf->set_word_size(gclab_word_size);
     alloc_buf->set_buf(buf);
 
-    obj = alloc_buf->allocate(word_sz);
+    HeapWord* const obj = alloc_buf->allocate(word_sz);
     assert(obj != NULL, "buffer was definitely big enough...");
+    return obj;
   } else {
-    obj = _g1h->par_allocate_during_gc(purpose, word_sz, context);
+    return _g1h->par_allocate_during_gc(dest, word_sz, context);
   }
-  return obj;
 }
 
 G1DefaultParGCAllocator::G1DefaultParGCAllocator(G1CollectedHeap* g1h) :
-            G1ParGCAllocator(g1h),
-            _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
-            _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)) {
-
-  _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
-  _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
-
+  G1ParGCAllocator(g1h),
+  _surviving_alloc_buffer(g1h->desired_plab_sz(InCSetState::Young)),
+  _tenured_alloc_buffer(g1h->desired_plab_sz(InCSetState::Old)) {
+  for (uint state = 0; state < InCSetState::Num; state++) {
+    _alloc_buffers[state] = NULL;
+  }
+  _alloc_buffers[InCSetState::Young] = &_surviving_alloc_buffer;
+  _alloc_buffers[InCSetState::Old]  = &_tenured_alloc_buffer;
 }
 
 void G1DefaultParGCAllocator::retire_alloc_buffers() {
-  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
-    size_t waste = _alloc_buffers[ap]->words_remaining();
-    add_to_alloc_buffer_waste(waste);
-    _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
-                                               true /* end_of_gc */,
-                                               false /* retain */);
+  for (uint state = 0; state < InCSetState::Num; state++) {
+    G1ParGCAllocBuffer* const buf = _alloc_buffers[state];
+    if (buf != NULL) {
+      add_to_alloc_buffer_waste(buf->words_remaining());
+      buf->flush_stats_and_retire(_g1h->alloc_buffer_stats(state),
+                                  true /* end_of_gc */,
+                                  false /* retain */);
+    }
   }
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -27,14 +27,9 @@
 
 #include "gc_implementation/g1/g1AllocationContext.hpp"
 #include "gc_implementation/g1/g1AllocRegion.hpp"
+#include "gc_implementation/g1/g1InCSetState.hpp"
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"
 
-enum GCAllocPurpose {
-  GCAllocForTenured,
-  GCAllocForSurvived,
-  GCAllocPurposeCount
-};
-
 // Base class for G1 allocators.
 class G1Allocator : public CHeapObj<mtGC> {
   friend class VMStructs;
@@ -178,20 +173,40 @@
 protected:
   G1CollectedHeap* _g1h;
 
+  // The survivor alignment in effect in bytes.
+  // == 0 : don't align survivors
+  // != 0 : align survivors to that alignment
+  // These values were chosen to favor the non-alignment case since some
+  // architectures have a special compare against zero instructions.
+  const uint _survivor_alignment_bytes;
+
   size_t _alloc_buffer_waste;
   size_t _undo_waste;
 
   void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
   void add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
 
-  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context);
+  virtual void retire_alloc_buffers() = 0;
+  virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0;
 
-  virtual void retire_alloc_buffers() = 0;
-  virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) = 0;
+  // Calculate the survivor space object alignment in bytes. Returns that or 0 if
+  // there are no restrictions on survivor alignment.
+  static uint calc_survivor_alignment_bytes() {
+    assert(SurvivorAlignmentInBytes >= ObjectAlignmentInBytes, "sanity");
+    if (SurvivorAlignmentInBytes == ObjectAlignmentInBytes) {
+      // No need to align objects in the survivors differently, return 0
+      // which means "survivor alignment is not used".
+      return 0;
+    } else {
+      assert(SurvivorAlignmentInBytes > 0, "sanity");
+      return SurvivorAlignmentInBytes;
+    }
+  }
 
 public:
   G1ParGCAllocator(G1CollectedHeap* g1h) :
-    _g1h(g1h), _alloc_buffer_waste(0), _undo_waste(0) {
+    _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()),
+    _alloc_buffer_waste(0), _undo_waste(0) {
   }
 
   static G1ParGCAllocator* create_allocator(G1CollectedHeap* g1h);
@@ -199,24 +214,40 @@
   size_t alloc_buffer_waste() { return _alloc_buffer_waste; }
   size_t undo_waste() {return _undo_waste; }
 
-  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) {
-    HeapWord* obj = NULL;
-    if (purpose == GCAllocForSurvived) {
-      obj = alloc_buffer(purpose, context)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  // Allocate word_sz words in dest, either directly into the regions or by
+  // allocating a new PLAB. Returns the address of the allocated memory, NULL if
+  // not successful.
+  HeapWord* allocate_direct_or_new_plab(InCSetState dest,
+                                        size_t word_sz,
+                                        AllocationContext_t context);
+
+  // Allocate word_sz words in the PLAB of dest.  Returns the address of the
+  // allocated memory, NULL if not successful.
+  HeapWord* plab_allocate(InCSetState dest,
+                          size_t word_sz,
+                          AllocationContext_t context) {
+    G1ParGCAllocBuffer* buffer = alloc_buffer(dest, context);
+    if (_survivor_alignment_bytes == 0) {
+      return buffer->allocate(word_sz);
     } else {
-      obj = alloc_buffer(purpose, context)->allocate(word_sz);
+      return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes);
     }
+  }
+
+  HeapWord* allocate(InCSetState dest, size_t word_sz,
+                     AllocationContext_t context) {
+    HeapWord* const obj = plab_allocate(dest, word_sz, context);
     if (obj != NULL) {
       return obj;
     }
-    return allocate_slow(purpose, word_sz, context);
+    return allocate_direct_or_new_plab(dest, word_sz, context);
   }
 
-  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
-    if (alloc_buffer(purpose, context)->contains(obj)) {
-      assert(alloc_buffer(purpose, context)->contains(obj + word_sz - 1),
+  void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
+    if (alloc_buffer(dest, context)->contains(obj)) {
+      assert(alloc_buffer(dest, context)->contains(obj + word_sz - 1),
              "should contain whole object");
-      alloc_buffer(purpose, context)->undo_allocation(obj, word_sz);
+      alloc_buffer(dest, context)->undo_allocation(obj, word_sz);
     } else {
       CollectedHeap::fill_with_object(obj, word_sz);
       add_to_undo_waste(word_sz);
@@ -227,13 +258,17 @@
 class G1DefaultParGCAllocator : public G1ParGCAllocator {
   G1ParGCAllocBuffer  _surviving_alloc_buffer;
   G1ParGCAllocBuffer  _tenured_alloc_buffer;
-  G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
+  G1ParGCAllocBuffer* _alloc_buffers[InCSetState::Num];
 
 public:
   G1DefaultParGCAllocator(G1CollectedHeap* g1h);
 
-  virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) {
-    return _alloc_buffers[purpose];
+  virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) {
+    assert(dest.is_valid(),
+           err_msg("Allocation buffer index out-of-bounds: " CSETSTATE_FORMAT, dest.value()));
+    assert(_alloc_buffers[dest.value()] != NULL,
+           err_msg("Allocation buffer is NULL: " CSETSTATE_FORMAT, dest.value()));
+    return _alloc_buffers[dest.value()];
   }
 
   virtual void retire_alloc_buffers() ;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Dec 19 09:21:06 2014 +0100
@@ -3818,6 +3818,8 @@
 
         register_humongous_regions_with_in_cset_fast_test();
 
+        assert(check_cset_fast_test(), "Inconsistency in the InCSetState table.");
+
         _cm->note_start_of_gc();
         // We should not verify the per-thread SATB buffers given that
         // we have not filtered them yet (we'll do so during the
@@ -4047,29 +4049,6 @@
   return true;
 }
 
-size_t G1CollectedHeap::desired_plab_sz(GCAllocPurpose purpose)
-{
-  size_t gclab_word_size;
-  switch (purpose) {
-    case GCAllocForSurvived:
-      gclab_word_size = _survivor_plab_stats.desired_plab_sz();
-      break;
-    case GCAllocForTenured:
-      gclab_word_size = _old_plab_stats.desired_plab_sz();
-      break;
-    default:
-      assert(false, "unknown GCAllocPurpose");
-      gclab_word_size = _old_plab_stats.desired_plab_sz();
-      break;
-  }
-
-  // Prevent humongous PLAB sizes for two reasons:
-  // * PLABs are allocated using a similar paths as oops, but should
-  //   never be in a humongous region
-  // * Allowing humongous PLABs needlessly churns the region free lists
-  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
-}
-
 void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
   _drain_in_progress = false;
   set_evac_failure_closure(cl);
@@ -4195,35 +4174,6 @@
   }
 }
 
-HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose,
-                                                  size_t word_size,
-                                                  AllocationContext_t context) {
-  if (purpose == GCAllocForSurvived) {
-    HeapWord* result = survivor_attempt_allocation(word_size, context);
-    if (result != NULL) {
-      return result;
-    } else {
-      // Let's try to allocate in the old gen in case we can fit the
-      // object there.
-      return old_attempt_allocation(word_size, context);
-    }
-  } else {
-    assert(purpose ==  GCAllocForTenured, "sanity");
-    HeapWord* result = old_attempt_allocation(word_size, context);
-    if (result != NULL) {
-      return result;
-    } else {
-      // Let's try to allocate in the survivors in case we can fit the
-      // object there.
-      return survivor_attempt_allocation(word_size, context);
-    }
-  }
-
-  ShouldNotReachHere();
-  // Trying to keep some compilers happy.
-  return NULL;
-}
-
 void G1ParCopyHelper::mark_object(oop obj) {
   assert(!_g1->heap_region_containing(obj)->in_collection_set(), "should not mark objects in the CSet");
 
@@ -4266,15 +4216,14 @@
 
   assert(_worker_id == _par_scan_state->queue_num(), "sanity");
 
-  G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
-
-  if (state == G1CollectedHeap::InCSet) {
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
     oop forwardee;
     markOop m = obj->mark();
     if (m->is_marked()) {
       forwardee = (oop) m->decode_pointer();
     } else {
-      forwardee = _par_scan_state->copy_to_survivor_space(obj, m);
+      forwardee = _par_scan_state->copy_to_survivor_space(state, obj, m);
     }
     assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
@@ -4288,7 +4237,7 @@
       do_klass_barrier(p, forwardee);
     }
   } else {
-    if (state == G1CollectedHeap::IsHumongous) {
+    if (state.is_humongous()) {
       _g1->set_humongous_is_live(obj);
     }
     // The object is not in collection set. If we're a root scanning
@@ -5144,17 +5093,17 @@
     oop obj = *p;
     assert(obj != NULL, "the caller should have filtered out NULL values");
 
-    G1CollectedHeap::in_cset_state_t cset_state = _g1->in_cset_state(obj);
-    if (cset_state == G1CollectedHeap::InNeither) {
+    const InCSetState cset_state = _g1->in_cset_state(obj);
+    if (!cset_state.is_in_cset_or_humongous()) {
       return;
     }
-    if (cset_state == G1CollectedHeap::InCSet) {
+    if (cset_state.is_in_cset()) {
       assert( obj->is_forwarded(), "invariant" );
       *p = obj->forwardee();
     } else {
       assert(!obj->is_forwarded(), "invariant" );
-      assert(cset_state == G1CollectedHeap::IsHumongous,
-             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state));
+      assert(cset_state.is_humongous(),
+             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state.value()));
       _g1->set_humongous_is_live(obj);
     }
   }
@@ -5950,6 +5899,58 @@
   heap_region_iterate(&cl);
   guarantee(!cl.failures(), "bitmap verification");
 }
+
+bool G1CollectedHeap::check_cset_fast_test() {
+  bool failures = false;
+  for (uint i = 0; i < _hrm.length(); i += 1) {
+    HeapRegion* hr = _hrm.at(i);
+    InCSetState cset_state = (InCSetState) _in_cset_fast_test.get_by_index((uint) i);
+    if (hr->is_humongous()) {
+      if (hr->in_collection_set()) {
+        gclog_or_tty->print_cr("\n## humongous region %u in CSet", i);
+        failures = true;
+        break;
+      }
+      if (cset_state.is_in_cset()) {
+        gclog_or_tty->print_cr("\n## inconsistent cset state %d for humongous region %u", cset_state.value(), i);
+        failures = true;
+        break;
+      }
+      if (hr->is_continues_humongous() && cset_state.is_humongous()) {
+        gclog_or_tty->print_cr("\n## inconsistent cset state %d for continues humongous region %u", cset_state.value(), i);
+        failures = true;
+        break;
+      }
+    } else {
+      if (cset_state.is_humongous()) {
+        gclog_or_tty->print_cr("\n## inconsistent cset state %d for non-humongous region %u", cset_state.value(), i);
+        failures = true;
+        break;
+      }
+      if (hr->in_collection_set() != cset_state.is_in_cset()) {
+        gclog_or_tty->print_cr("\n## in CSet %d / cset state %d inconsistency for region %u",
+                               hr->in_collection_set(), cset_state.value(), i);
+        failures = true;
+        break;
+      }
+      if (cset_state.is_in_cset()) {
+        if (hr->is_young() != (cset_state.is_young())) {
+          gclog_or_tty->print_cr("\n## is_young %d / cset state %d inconsistency for region %u",
+                                 hr->is_young(), cset_state.value(), i);
+          failures = true;
+          break;
+        }
+        if (hr->is_old() != (cset_state.is_old())) {
+          gclog_or_tty->print_cr("\n## is_old %d / cset state %d inconsistency for region %u",
+                                 hr->is_old(), cset_state.value(), i);
+          failures = true;
+          break;
+        }
+      }
+    }
+  }
+  return !failures;
+}
 #endif // PRODUCT
 
 void G1CollectedHeap::cleanUpCardTable() {
@@ -6518,20 +6519,20 @@
 
 HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size,
                                                  uint count,
-                                                 GCAllocPurpose ap) {
+                                                 InCSetState dest) {
   assert(FreeList_lock->owned_by_self(), "pre-condition");
 
-  if (count < g1_policy()->max_regions(ap)) {
-    bool survivor = (ap == GCAllocForSurvived);
+  if (count < g1_policy()->max_regions(dest)) {
+    const bool is_survivor = (dest.is_young());
     HeapRegion* new_alloc_region = new_region(word_size,
-                                              !survivor,
+                                              !is_survivor,
                                               true /* do_expand */);
     if (new_alloc_region != NULL) {
       // We really only need to do this for old regions given that we
       // should never scan survivors. But it doesn't hurt to do it
       // for survivors too.
       new_alloc_region->record_timestamp();
-      if (survivor) {
+      if (is_survivor) {
         new_alloc_region->set_survivor();
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor);
         check_bitmaps("Survivor Region Allocation", new_alloc_region);
@@ -6543,8 +6544,6 @@
       bool during_im = g1_policy()->during_initial_mark_pause();
       new_alloc_region->note_start_of_copying(during_im);
       return new_alloc_region;
-    } else {
-      g1_policy()->note_alloc_region_limit_reached(ap);
     }
   }
   return NULL;
@@ -6552,11 +6551,11 @@
 
 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
-                                             GCAllocPurpose ap) {
+                                             InCSetState dest) {
   bool during_im = g1_policy()->during_initial_mark_pause();
   alloc_region->note_end_of_copying(during_im);
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
-  if (ap == GCAllocForSurvived) {
+  if (dest.is_young()) {
     young_list()->add_survivor_region(alloc_region);
   } else {
     _old_set.add(alloc_region);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -32,6 +32,7 @@
 #include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1BiasedArray.hpp"
 #include "gc_implementation/g1/g1HRPrinter.hpp"
+#include "gc_implementation/g1/g1InCSetState.hpp"
 #include "gc_implementation/g1/g1MonitoringSupport.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
@@ -547,15 +548,9 @@
   // allocation region, either by picking one or expanding the
   // heap, and then allocate a block of the given size. The block
   // may not be a humongous - it must fit into a single heap region.
-  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose,
-                                   size_t word_size,
-                                   AllocationContext_t context);
-
-  HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
-                                    HeapRegion*    alloc_region,
-                                    bool           par,
-                                    size_t         word_size);
-
+  inline HeapWord* par_allocate_during_gc(InCSetState dest,
+                                          size_t word_size,
+                                          AllocationContext_t context);
   // Ensure that no further allocations can happen in "r", bearing in mind
   // that parallel threads might be attempting allocations.
   void par_allocate_remaining_space(HeapRegion* r);
@@ -577,9 +572,9 @@
 
   // For GC alloc regions.
   HeapRegion* new_gc_alloc_region(size_t word_size, uint count,
-                                  GCAllocPurpose ap);
+                                  InCSetState dest);
   void retire_gc_alloc_region(HeapRegion* alloc_region,
-                              size_t allocated_bytes, GCAllocPurpose ap);
+                              size_t allocated_bytes, InCSetState dest);
 
   // - if explicit_gc is true, the GC is for a System.gc() or a heap
   //   inspection request and should collect the entire heap
@@ -640,26 +635,11 @@
   // (Rounds up to a HeapRegion boundary.)
   bool expand(size_t expand_bytes);
 
-  // Returns the PLAB statistics given a purpose.
-  PLABStats* stats_for_purpose(GCAllocPurpose purpose) {
-    PLABStats* stats = NULL;
+  // Returns the PLAB statistics for a given destination.
+  inline PLABStats* alloc_buffer_stats(InCSetState dest);
 
-    switch (purpose) {
-    case GCAllocForSurvived:
-      stats = &_survivor_plab_stats;
-      break;
-    case GCAllocForTenured:
-      stats = &_old_plab_stats;
-      break;
-    default:
-      assert(false, "unrecognized GCAllocPurpose");
-    }
-
-    return stats;
-  }
-
-  // Determines PLAB size for a particular allocation purpose.
-  size_t desired_plab_sz(GCAllocPurpose purpose);
+  // Determines PLAB size for a given destination.
+  inline size_t desired_plab_sz(InCSetState dest);
 
   inline AllocationContextStats& allocation_context_stats();
 
@@ -683,8 +663,11 @@
   void register_humongous_regions_with_in_cset_fast_test();
   // We register a region with the fast "in collection set" test. We
   // simply set to true the array slot corresponding to this region.
-  void register_region_with_in_cset_fast_test(HeapRegion* r) {
-    _in_cset_fast_test.set_in_cset(r->hrm_index());
+  void register_young_region_with_in_cset_fast_test(HeapRegion* r) {
+    _in_cset_fast_test.set_in_young(r->hrm_index());
+  }
+  void register_old_region_with_in_cset_fast_test(HeapRegion* r) {
+    _in_cset_fast_test.set_in_old(r->hrm_index());
   }
 
   // This is a fast test on whether a reference points into the
@@ -1181,6 +1164,9 @@
   // appropriate error messages and crash.
   void check_bitmaps(const char* caller) PRODUCT_RETURN;
 
+  // Do sanity check on the contents of the in-cset fast test table.
+  bool check_cset_fast_test() PRODUCT_RETURN_( return true; );
+
   // verify_region_sets() performs verification over the region
   // lists. It will be compiled in the product code to be used when
   // necessary (i.e., during heap verification).
@@ -1276,53 +1262,15 @@
 
   inline bool is_in_cset_or_humongous(const oop obj);
 
-  enum in_cset_state_t {
-   InNeither,           // neither in collection set nor humongous
-   InCSet,              // region is in collection set only
-   IsHumongous          // region is a humongous start region
-  };
  private:
-  // Instances of this class are used for quick tests on whether a reference points
-  // into the collection set or is a humongous object (points into a humongous
-  // object).
-  // Each of the array's elements denotes whether the corresponding region is in
-  // the collection set or a humongous region.
-  // We use this to quickly reclaim humongous objects: by making a humongous region
-  // succeed this test, we sort-of add it to the collection set. During the reference
-  // iteration closures, when we see a humongous region, we simply mark it as
-  // referenced, i.e. live.
-  class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> {
-   protected:
-    char default_value() const { return G1CollectedHeap::InNeither; }
-   public:
-    void set_humongous(uintptr_t index) {
-      assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values");
-      set_by_index(index, G1CollectedHeap::IsHumongous);
-    }
-
-    void clear_humongous(uintptr_t index) {
-      set_by_index(index, G1CollectedHeap::InNeither);
-    }
-
-    void set_in_cset(uintptr_t index) {
-      assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value");
-      set_by_index(index, G1CollectedHeap::InCSet);
-    }
-
-    bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; }
-    bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; }
-    G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); }
-    void clear() { G1BiasedMappedArray<char>::clear(); }
-  };
-
   // This array is used for a quick test on whether a reference points into
   // the collection set or not. Each of the array's elements denotes whether the
   // corresponding region is in the collection set or not.
-  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+  G1InCSetStateFastTestBiasedMappedArray _in_cset_fast_test;
 
  public:
 
-  inline in_cset_state_t in_cset_state(const oop obj);
+  inline InCSetState in_cset_state(const oop obj);
 
   // Return "TRUE" iff the given object address is in the reserved
   // region of g1.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -35,6 +35,41 @@
 #include "runtime/orderAccess.inline.hpp"
 #include "utilities/taskqueue.hpp"
 
+PLABStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) {
+  switch (dest.value()) {
+    case InCSetState::Young:
+      return &_survivor_plab_stats;
+    case InCSetState::Old:
+      return &_old_plab_stats;
+    default:
+      ShouldNotReachHere();
+      return NULL; // Keep some compilers happy
+  }
+}
+
+size_t G1CollectedHeap::desired_plab_sz(InCSetState dest) {
+  size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_sz();
+  // Prevent humongous PLAB sizes for two reasons:
+  // * PLABs are allocated using a similar paths as oops, but should
+  //   never be in a humongous region
+  // * Allowing humongous PLABs needlessly churns the region free lists
+  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
+}
+
+HeapWord* G1CollectedHeap::par_allocate_during_gc(InCSetState dest,
+                                                  size_t word_size,
+                                                  AllocationContext_t context) {
+  switch (dest.value()) {
+    case InCSetState::Young:
+      return survivor_attempt_allocation(word_size, context);
+    case InCSetState::Old:
+      return old_attempt_allocation(word_size, context);
+    default:
+      ShouldNotReachHere();
+      return NULL; // Keep some compilers happy
+  }
+}
+
 // Inline functions for G1CollectedHeap
 
 inline AllocationContextStats& G1CollectedHeap::allocation_context_stats() {
@@ -203,7 +238,7 @@
   return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj);
 }
 
-G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) {
+InCSetState G1CollectedHeap::in_cset_state(const oop obj) {
   return _in_cset_fast_test.at((HeapWord*)obj);
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Dec 19 09:21:06 2014 +0100
@@ -1437,18 +1437,6 @@
   return young_list_length < young_list_max_length;
 }
 
-uint G1CollectorPolicy::max_regions(int purpose) {
-  switch (purpose) {
-    case GCAllocForSurvived:
-      return _max_survivor_regions;
-    case GCAllocForTenured:
-      return REGIONS_UNLIMITED;
-    default:
-      ShouldNotReachHere();
-      return REGIONS_UNLIMITED;
-  };
-}
-
 void G1CollectorPolicy::update_max_gc_locker_expansion() {
   uint expansion_region_num = 0;
   if (GCLockerEdenExpansionPercent > 0) {
@@ -1634,7 +1622,7 @@
   hr->set_next_in_collection_set(_collection_set);
   _collection_set = hr;
   _collection_set_bytes_used_before += hr->used();
-  _g1->register_region_with_in_cset_fast_test(hr);
+  _g1->register_old_region_with_in_cset_fast_test(hr);
   size_t rs_length = hr->rem_set()->occupied();
   _recorded_rs_lengths += rs_length;
   _old_cset_region_length += 1;
@@ -1767,7 +1755,7 @@
   hr->set_in_collection_set(true);
   assert( hr->next_in_collection_set() == NULL, "invariant");
 
-  _g1->register_region_with_in_cset_fast_test(hr);
+  _g1->register_young_region_with_in_cset_fast_test(hr);
 }
 
 // Add the region at the RHS of the incremental cset
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -881,28 +881,20 @@
 public:
   uint tenuring_threshold() const { return _tenuring_threshold; }
 
-  inline GCAllocPurpose
-    evacuation_destination(HeapRegion* src_region, uint age, size_t word_sz) {
-      if (age < _tenuring_threshold && src_region->is_young()) {
-        return GCAllocForSurvived;
-      } else {
-        return GCAllocForTenured;
-      }
-  }
-
-  inline bool track_object_age(GCAllocPurpose purpose) {
-    return purpose == GCAllocForSurvived;
-  }
-
   static const uint REGIONS_UNLIMITED = (uint) -1;
 
-  uint max_regions(int purpose);
-
-  // The limit on regions for a particular purpose is reached.
-  void note_alloc_region_limit_reached(int purpose) {
-    if (purpose == GCAllocForSurvived) {
-      _tenuring_threshold = 0;
+  uint max_regions(InCSetState dest) {
+    switch (dest.value()) {
+      case InCSetState::Young:
+        return _max_survivor_regions;
+      case InCSetState::Old:
+        return REGIONS_UNLIMITED;
+      default:
+        assert(false, err_msg("Unknown dest state: " CSETSTATE_FORMAT, dest.value()));
+        break;
     }
+    // keep some compilers happy
+    return 0;
   }
 
   void note_start_adding_survivor_regions() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1InCSetState.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
+
+#include "gc_implementation/g1/g1BiasedArray.hpp"
+#include "memory/allocation.hpp"
+
+// Per-region state during garbage collection.
+struct InCSetState {
+ public:
+  // We use different types to represent the state value. Particularly SPARC puts
+  // values in structs from "left to right", i.e. MSB to LSB. This results in many
+  // unnecessary shift operations when loading and storing values of this type.
+  // This degrades performance significantly (>10%) on that platform.
+  // Other tested ABIs do not seem to have this problem, and actually tend to
+  // favor smaller types, so we use the smallest usable type there.
+#ifdef SPARC
+  #define CSETSTATE_FORMAT INTPTR_FORMAT
+  typedef intptr_t in_cset_state_t;
+#else
+  #define CSETSTATE_FORMAT "%d"
+  typedef int8_t in_cset_state_t;
+#endif
+ private:
+  in_cset_state_t _value;
+ public:
+  enum {
+    // Selection of the values were driven to micro-optimize the encoding and
+    // frequency of the checks.
+    // The most common check is whether the region is in the collection set or not.
+    // This encoding allows us to use an != 0 check which in some architectures
+    // (x86*) can be encoded slightly more efficently than a normal comparison
+    // against zero.
+    // The same situation occurs when checking whether the region is humongous
+    // or not, which is encoded by values < 0.
+    // The other values are simply encoded in increasing generation order, which
+    // makes getting the next generation fast by a simple increment.
+    Humongous    = -1,    // The region is humongous - note that actually any value < 0 would be possible here.
+    NotInCSet    =  0,    // The region is not in the collection set.
+    Young        =  1,    // The region is in the collection set and a young region.
+    Old          =  2,    // The region is in the collection set and an old region.
+    Num
+  };
+
+  InCSetState(in_cset_state_t value = NotInCSet) : _value(value) {
+    assert(is_valid(), err_msg("Invalid state %d", _value));
+  }
+
+  in_cset_state_t value() const        { return _value; }
+
+  void set_old()                       { _value = Old; }
+
+  bool is_in_cset_or_humongous() const { return _value != NotInCSet; }
+  bool is_in_cset() const              { return _value > NotInCSet; }
+  bool is_humongous() const            { return _value < NotInCSet; }
+  bool is_young() const                { return _value == Young; }
+  bool is_old() const                  { return _value == Old; }
+
+#ifdef ASSERT
+  bool is_default() const              { return !is_in_cset_or_humongous(); }
+  bool is_valid() const                { return (_value >= Humongous) && (_value < Num); }
+  bool is_valid_gen() const            { return (_value >= Young && _value <= Old); }
+#endif
+};
+
+// Instances of this class are used for quick tests on whether a reference points
+// into the collection set and into which generation or is a humongous object
+//
+// Each of the array's elements indicates whether the corresponding region is in
+// the collection set and if so in which generation, or a humongous region.
+//
+// We use this to speed up reference processing during young collection and
+// quickly reclaim humongous objects. For the latter, by making a humongous region
+// succeed this test, we sort-of add it to the collection set. During the reference
+// iteration closures, when we see a humongous region, we then simply mark it as
+// referenced, i.e. live.
+class G1InCSetStateFastTestBiasedMappedArray : public G1BiasedMappedArray<InCSetState> {
+ protected:
+  InCSetState default_value() const { return InCSetState::NotInCSet; }
+ public:
+  void set_humongous(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Humongous);
+  }
+
+  void clear_humongous(uintptr_t index) {
+    set_by_index(index, InCSetState::NotInCSet);
+  }
+
+  void set_in_young(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Young);
+  }
+
+  void set_in_old(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Old);
+  }
+
+  bool is_in_cset_or_humongous(HeapWord* addr) const { return at(addr).is_in_cset_or_humongous(); }
+  bool is_in_cset(HeapWord* addr) const { return at(addr).is_in_cset(); }
+  InCSetState at(HeapWord* addr) const { return get_by_address(addr); }
+  void clear() { G1BiasedMappedArray<InCSetState>::clear(); }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 
 #include "memory/iterator.hpp"
+#include "oops/markOop.hpp"
 
 class HeapRegion;
 class G1CollectedHeap;
@@ -256,7 +257,8 @@
   }
 
   bool self_forwarded(oop obj) {
-    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
+    markOop m = obj->mark();
+    bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj));
     return result;
   }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -67,8 +67,8 @@
 
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
-    if (state == G1CollectedHeap::InCSet) {
+    const InCSetState state = _g1->in_cset_state(obj);
+    if (state.is_in_cset()) {
       // We're not going to even bother checking whether the object is
       // already forwarded or not, as this usually causes an immediate
       // stall. We'll try to prefetch the object (for write, given that
@@ -87,7 +87,7 @@
 
       _par_scan_state->push_on_queue(p);
     } else {
-      if (state == G1CollectedHeap::IsHumongous) {
+      if (state.is_humongous()) {
         _g1->set_humongous_is_live(obj);
       }
       _par_scan_state->update_rs(_from, p, _worker_id);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Fri Dec 19 09:21:06 2014 +0100
@@ -38,6 +38,7 @@
     _g1_rem(g1h->g1_rem_set()),
     _hash_seed(17), _queue_num(queue_num),
     _term_attempts(0),
+    _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()),
     _age_table(false), _scanner(g1h, rp),
     _strong_roots_time(0), _term_time(0) {
   _scanner.set_par_scan_thread_state(this);
@@ -59,6 +60,12 @@
 
   _g1_par_allocator = G1ParGCAllocator::create_allocator(_g1h);
 
+  _dest[InCSetState::NotInCSet]    = InCSetState::NotInCSet;
+  // The dest for Young is used when the objects are aged enough to
+  // need to be moved to the next space.
+  _dest[InCSetState::Young]        = InCSetState::Old;
+  _dest[InCSetState::Old]          = InCSetState::Old;
+
   _start = os::elapsedTime();
 }
 
@@ -150,52 +157,94 @@
   } while (!_refs->is_empty());
 }
 
-oop G1ParScanThreadState::copy_to_survivor_space(oop const old,
+HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state,
+                                                      InCSetState* dest,
+                                                      size_t word_sz,
+                                                      AllocationContext_t const context) {
+  assert(state.is_in_cset_or_humongous(), err_msg("Unexpected state: " CSETSTATE_FORMAT, state.value()));
+  assert(dest->is_in_cset_or_humongous(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));
+
+  // Right now we only have two types of regions (young / old) so
+  // let's keep the logic here simple. We can generalize it when necessary.
+  if (dest->is_young()) {
+    HeapWord* const obj_ptr = _g1_par_allocator->allocate(InCSetState::Old,
+                                                          word_sz, context);
+    if (obj_ptr == NULL) {
+      return NULL;
+    }
+    // Make sure that we won't attempt to copy any other objects out
+    // of a survivor region (given that apparently we cannot allocate
+    // any new ones) to avoid coming into this slow path.
+    _tenuring_threshold = 0;
+    dest->set_old();
+    return obj_ptr;
+  } else {
+    assert(dest->is_old(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));
+    // no other space to try.
+    return NULL;
+  }
+}
+
+InCSetState G1ParScanThreadState::next_state(InCSetState const state, markOop const m, uint& age) {
+  if (state.is_young()) {
+    age = !m->has_displaced_mark_helper() ? m->age()
+                                          : m->displaced_mark_helper()->age();
+    if (age < _tenuring_threshold) {
+      return state;
+    }
+  }
+  return dest(state);
+}
+
+oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state,
+                                                 oop const old,
                                                  markOop const old_mark) {
-  size_t word_sz = old->size();
-  HeapRegion* from_region = _g1h->heap_region_containing_raw(old);
+  const size_t word_sz = old->size();
+  HeapRegion* const from_region = _g1h->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
-  int       young_index = from_region->young_index_in_cset()+1;
+  const int young_index = from_region->young_index_in_cset()+1;
   assert( (from_region->is_young() && young_index >  0) ||
          (!from_region->is_young() && young_index == 0), "invariant" );
-  G1CollectorPolicy* g1p = _g1h->g1_policy();
-  uint age = old_mark->has_displaced_mark_helper() ? old_mark->displaced_mark_helper()->age()
-                                                   : old_mark->age();
-  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
-                                                             word_sz);
-  AllocationContext_t context = from_region->allocation_context();
-  HeapWord* obj_ptr = _g1_par_allocator->allocate(alloc_purpose, word_sz, context);
+  const AllocationContext_t context = from_region->allocation_context();
+
+  uint age = 0;
+  InCSetState dest_state = next_state(state, old_mark, age);
+  HeapWord* obj_ptr = _g1_par_allocator->plab_allocate(dest_state, word_sz, context);
+
+  // PLAB allocations should succeed most of the time, so we'll
+  // normally check against NULL once and that's it.
+  if (obj_ptr == NULL) {
+    obj_ptr = _g1_par_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context);
+    if (obj_ptr == NULL) {
+      obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context);
+      if (obj_ptr == NULL) {
+        // This will either forward-to-self, or detect that someone else has
+        // installed a forwarding pointer.
+        return _g1h->handle_evacuation_failure_par(this, old);
+      }
+    }
+  }
+
+  assert(obj_ptr != NULL, "when we get here, allocation should have succeeded");
 #ifndef PRODUCT
   // Should this evacuation fail?
   if (_g1h->evacuation_should_fail()) {
-    if (obj_ptr != NULL) {
-      _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
-      obj_ptr = NULL;
-    }
+    // Doing this after all the allocation attempts also tests the
+    // undo_allocation() method too.
+    _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context);
+    return _g1h->handle_evacuation_failure_par(this, old);
   }
 #endif // !PRODUCT
 
-  if (obj_ptr == NULL) {
-    // This will either forward-to-self, or detect that someone else has
-    // installed a forwarding pointer.
-    return _g1h->handle_evacuation_failure_par(this, old);
-  }
-
-  oop obj = oop(obj_ptr);
-
   // We're going to allocate linearly, so might as well prefetch ahead.
   Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
 
-  oop forward_ptr = old->forward_to_atomic(obj);
+  const oop obj = oop(obj_ptr);
+  const oop forward_ptr = old->forward_to_atomic(obj);
   if (forward_ptr == NULL) {
     Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
 
-    // alloc_purpose is just a hint to allocate() above, recheck the type of region
-    // we actually allocated from and update alloc_purpose accordingly
-    HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
-    alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
-
-    if (g1p->track_object_age(alloc_purpose)) {
+    if (dest_state.is_young()) {
       if (age < markOopDesc::max_age) {
         age++;
       }
@@ -215,13 +264,19 @@
     }
 
     if (G1StringDedup::is_enabled()) {
-      G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
-                                             to_region->is_young(),
+      const bool is_from_young = state.is_young();
+      const bool is_to_young = dest_state.is_young();
+      assert(is_from_young == _g1h->heap_region_containing_raw(old)->is_young(),
+             "sanity");
+      assert(is_to_young == _g1h->heap_region_containing_raw(obj)->is_young(),
+             "sanity");
+      G1StringDedup::enqueue_from_evacuation(is_from_young,
+                                             is_to_young,
                                              queue_num(),
                                              obj);
     }
 
-    size_t* surv_young_words = surviving_young_words();
+    size_t* const surv_young_words = surviving_young_words();
     surv_young_words[young_index] += word_sz;
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
@@ -232,14 +287,13 @@
       oop* old_p = set_partial_array_mask(old);
       push_on_queue(old_p);
     } else {
-      // No point in using the slower heap_region_containing() method,
-      // given that we know obj is in the heap.
-      _scanner.set_region(_g1h->heap_region_containing_raw(obj));
+      HeapRegion* const to_region = _g1h->heap_region_containing_raw(obj_ptr);
+      _scanner.set_region(to_region);
       obj->oop_iterate_backwards(&_scanner);
     }
+    return obj;
   } else {
-    _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
-    obj = forward_ptr;
+    _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context);
+    return forward_ptr;
   }
-  return obj;
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -46,14 +46,16 @@
   G1SATBCardTableModRefBS* _ct_bs;
   G1RemSet* _g1_rem;
 
-  G1ParGCAllocator*   _g1_par_allocator;
-
-  ageTable            _age_table;
+  G1ParGCAllocator* _g1_par_allocator;
 
-  G1ParScanClosure    _scanner;
+  ageTable          _age_table;
+  InCSetState       _dest[InCSetState::Num];
+  // Local tenuring threshold.
+  uint              _tenuring_threshold;
+  G1ParScanClosure  _scanner;
 
-  size_t           _alloc_buffer_waste;
-  size_t           _undo_waste;
+  size_t            _alloc_buffer_waste;
+  size_t            _undo_waste;
 
   OopsInHeapRegionClosure*      _evac_failure_cl;
 
@@ -82,6 +84,14 @@
   DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
   G1SATBCardTableModRefBS* ctbs()                { return _ct_bs; }
 
+  InCSetState dest(InCSetState original) const {
+    assert(original.is_valid(),
+           err_msg("Original state invalid: " CSETSTATE_FORMAT, original.value()));
+    assert(_dest[original.value()].is_valid_gen(),
+           err_msg("Dest state is invalid: " CSETSTATE_FORMAT, _dest[original.value()].value()));
+    return _dest[original.value()];
+  }
+
  public:
   G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp);
   ~G1ParScanThreadState();
@@ -112,7 +122,6 @@
       }
     }
   }
- public:
 
   void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
     _evac_failure_cl = evac_failure_cl;
@@ -193,9 +202,20 @@
   template <class T> inline void deal_with_reference(T* ref_to_scan);
 
   inline void dispatch_reference(StarTask ref);
+
+  // Tries to allocate word_sz in the PLAB of the next "generation" after trying to
+  // allocate into dest. State is the original (source) cset state for the object
+  // that is allocated for.
+  // Returns a non-NULL pointer if successful, and updates dest if required.
+  HeapWord* allocate_in_next_plab(InCSetState const state,
+                                  InCSetState* dest,
+                                  size_t word_sz,
+                                  AllocationContext_t const context);
+
+  inline InCSetState next_state(InCSetState const state, markOop const m, uint& age);
  public:
 
-  oop copy_to_survivor_space(oop const obj, markOop const old_mark);
+  oop copy_to_survivor_space(InCSetState const state, oop const obj, markOop const old_mark);
 
   void trim_queue();
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Wed Dec 17 22:32:44 2014 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Fri Dec 19 09:21:06 2014 +0100
@@ -38,21 +38,21 @@
   // set, due to (benign) races in the claim mechanism during RSet scanning more
   // than one thread might claim the same card. So the same card may be
   // processed multiple times. So redo this check.
-  G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj);
-  if (in_cset_state == G1CollectedHeap::InCSet) {
+  const InCSetState in_cset_state = _g1h->in_cset_state(obj);
+  if (in_cset_state.is_in_cset()) {
     oop forwardee;
     markOop m = obj->mark();
     if (m->is_marked()) {
       forwardee = (oop) m->decode_pointer();
     } else {
-      forwardee = copy_to_survivor_space(obj, m);
+      forwardee = copy_to_survivor_space(in_cset_state, obj, m);
     }
     oopDesc::encode_store_heap_oop(p, forwardee);
-  } else if (in_cset_state == G1CollectedHeap::IsHumongous) {
+  } else if (in_cset_state.is_humongous()) {
     _g1h->set_humongous_is_live(obj);
   } else {
-    assert(in_cset_state == G1CollectedHeap::InNeither,
-           err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state));
+    assert(!in_cset_state.is_in_cset_or_humongous(),
+           err_msg("In_cset_state must be NotInCSet here, but is " CSETSTATE_FORMAT, in_cset_state.value()));
   }
 
   assert(obj != NULL, "Must be");