Merge
authorcoleenp
Tue, 25 Aug 2015 17:30:14 +0200
changeset 32584 2e0377d76c5a
parent 32583 e95e2cfc9cf5 (current diff)
parent 32390 b0f6868d46ee (diff)
child 32586 6ea0bf4c4e28
child 32587 397d087f7204
Merge
--- a/hotspot/src/share/vm/gc/g1/g1AllocRegion.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1AllocRegion.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -205,11 +205,11 @@
 }
 
 #if G1_ALLOC_REGION_TRACING
-void G1AllocRegion::trace(const char* str, size_t word_size, HeapWord* result) {
+void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_word_size, size_t actual_word_size, HeapWord* result) {
   // All the calls to trace that set either just the size or the size
   // and the result are considered part of level 2 tracing and are
   // skipped during level 1 tracing.
-  if ((word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) {
+  if ((actual_word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) {
     const size_t buffer_length = 128;
     char hr_buffer[buffer_length];
     char rest_buffer[buffer_length];
@@ -226,10 +226,10 @@
 
     if (G1_ALLOC_REGION_TRACING > 1) {
       if (result != NULL) {
-        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT " " PTR_FORMAT,
-                     word_size, result);
-      } else if (word_size != 0) {
-        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT, word_size);
+        jio_snprintf(rest_buffer, buffer_length, "min " SIZE_FORMAT " desired " SIZE_FORMAT " actual " SIZE_FORMAT " " PTR_FORMAT,
+                     min_word_size, desired_word_size, actual_word_size, result);
+      } else if (min_word_size != 0) {
+        jio_snprintf(rest_buffer, buffer_length, "min " SIZE_FORMAT " desired " SIZE_FORMAT, min_word_size, desired_word_size);
       } else {
         jio_snprintf(rest_buffer, buffer_length, "");
       }
--- a/hotspot/src/share/vm/gc/g1/g1AllocRegion.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1AllocRegion.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -104,6 +104,15 @@
   static inline HeapWord* par_allocate(HeapRegion* alloc_region,
                                        size_t word_size,
                                        bool bot_updates);
+  // Perform a MT-safe allocation out of the given region, with the given
+  // minimum and desired size. Returns the actual size allocated (between
+  // minimum and desired size) in actual_word_size if the allocation has been
+  // successful.
+  static inline HeapWord* par_allocate(HeapRegion* alloc_region,
+                                       size_t min_word_size,
+                                       size_t desired_word_size,
+                                       size_t* actual_word_size,
+                                       bool bot_updates);
 
   // Ensure that the region passed as a parameter has been filled up
   // so that noone else can allocate out of it any more.
@@ -159,7 +168,18 @@
   // First-level allocation: Should be called without holding a
   // lock. It will try to allocate lock-free out of the active region,
   // or return NULL if it was unable to.
-  inline HeapWord* attempt_allocation(size_t word_size, bool bot_updates);
+  inline HeapWord* attempt_allocation(size_t word_size,
+                                      bool bot_updates);
+  // Perform an allocation out of the current allocation region, with the given
+  // minimum and desired size. Returns the actual size allocated (between
+  // minimum and desired size) in actual_word_size if the allocation has been
+  // successful.
+  // Should be called without holding a lock. It will try to allocate lock-free
+  // out of the active region, or return NULL if it was unable to.
+  inline HeapWord* attempt_allocation(size_t min_word_size,
+                                      size_t desired_word_size,
+                                      size_t* actual_word_size,
+                                      bool bot_updates);
 
   // Second-level allocation: Should be called while holding a
   // lock. It will try to first allocate lock-free out of the active
@@ -169,6 +189,14 @@
   // it conform to its locking protocol.
   inline HeapWord* attempt_allocation_locked(size_t word_size,
                                              bool bot_updates);
+  // Same as attempt_allocation_locked(size_t, bool), but allowing specification
+  // of minimum word size of the block in min_word_size, and the maximum word
+  // size of the allocation in desired_word_size. The actual size of the block is
+  // returned in actual_word_size.
+  inline HeapWord* attempt_allocation_locked(size_t min_word_size,
+                                             size_t desired_word_size,
+                                             size_t* actual_word_size,
+                                             bool bot_updates);
 
   // Should be called to allocate a new region even if the max of this
   // type of regions has been reached. Should only be called if other
@@ -191,9 +219,17 @@
   virtual HeapRegion* release();
 
 #if G1_ALLOC_REGION_TRACING
-  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL);
+  void trace(const char* str,
+             size_t min_word_size = 0,
+             size_t desired_word_size = 0,
+             size_t actual_word_size = 0,
+             HeapWord* result = NULL);
 #else // G1_ALLOC_REGION_TRACING
-  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL) { }
+  void trace(const char* str,
+             size_t min_word_size = 0,
+             size_t desired_word_size = 0,
+             size_t actual_word_size = 0,
+             HeapWord* result = NULL) { }
 #endif // G1_ALLOC_REGION_TRACING
 };
 
--- a/hotspot/src/share/vm/gc/g1/g1AllocRegion.inline.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1AllocRegion.inline.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -40,52 +40,74 @@
   }
 }
 
+inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region, size_t word_size, bool bot_updates) {
+  size_t temp;
+  return par_allocate(alloc_region, word_size, word_size, &temp, bot_updates);
+}
+
 inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region,
-                                             size_t word_size,
+                                             size_t min_word_size,
+                                             size_t desired_word_size,
+                                             size_t* actual_word_size,
                                              bool bot_updates) {
   assert(alloc_region != NULL, err_msg("pre-condition"));
   assert(!alloc_region->is_empty(), err_msg("pre-condition"));
 
   if (!bot_updates) {
-    return alloc_region->par_allocate_no_bot_updates(word_size);
+    return alloc_region->par_allocate_no_bot_updates(min_word_size, desired_word_size, actual_word_size);
   } else {
-    return alloc_region->par_allocate(word_size);
+    return alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
   }
 }
 
-inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size,
+inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size, bool bot_updates) {
+  size_t temp;
+  return attempt_allocation(word_size, word_size, &temp, bot_updates);
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation(size_t min_word_size,
+                                                   size_t desired_word_size,
+                                                   size_t* actual_word_size,
                                                    bool bot_updates) {
   assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));
 
   HeapRegion* alloc_region = _alloc_region;
   assert(alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
 
-  HeapWord* result = par_allocate(alloc_region, word_size, bot_updates);
+  HeapWord* result = par_allocate(alloc_region, min_word_size, desired_word_size, actual_word_size, bot_updates);
   if (result != NULL) {
-    trace("alloc", word_size, result);
+    trace("alloc", min_word_size, desired_word_size, *actual_word_size, result);
     return result;
   }
-  trace("alloc failed", word_size);
+  trace("alloc failed", min_word_size, desired_word_size);
   return NULL;
 }
 
-inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size,
+inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size, bool bot_updates) {
+  size_t temp;
+  return attempt_allocation_locked(word_size, word_size, &temp, bot_updates);
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t min_word_size,
+                                                          size_t desired_word_size,
+                                                          size_t* actual_word_size,
                                                           bool bot_updates) {
   // First we have to redo the allocation, assuming we're holding the
   // appropriate lock, in case another thread changed the region while
   // we were waiting to get the lock.
-  HeapWord* result = attempt_allocation(word_size, bot_updates);
+  HeapWord* result = attempt_allocation(min_word_size, desired_word_size, actual_word_size, bot_updates);
   if (result != NULL) {
     return result;
   }
 
   retire(true /* fill_up */);
-  result = new_alloc_region_and_allocate(word_size, false /* force */);
+  result = new_alloc_region_and_allocate(desired_word_size, false /* force */);
   if (result != NULL) {
-    trace("alloc locked (second attempt)", word_size, result);
+    *actual_word_size = desired_word_size;
+    trace("alloc locked (second attempt)", min_word_size, desired_word_size, *actual_word_size, result);
     return result;
   }
-  trace("alloc locked failed", word_size);
+  trace("alloc locked failed", min_word_size, desired_word_size);
   return NULL;
 }
 
@@ -94,13 +116,13 @@
   assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));
   assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
 
-  trace("forcing alloc");
+  trace("forcing alloc", word_size, word_size);
   HeapWord* result = new_alloc_region_and_allocate(word_size, true /* force */);
   if (result != NULL) {
-    trace("alloc forced", word_size, result);
+    trace("alloc forced", word_size, word_size, word_size, result);
     return result;
   }
-  trace("alloc forced failed", word_size);
+  trace("alloc forced failed", word_size, word_size);
   return NULL;
 }
 
--- a/hotspot/src/share/vm/gc/g1/g1Allocator.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1Allocator.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc/g1/g1Allocator.inline.hpp"
+#include "gc/g1/g1AllocRegion.inline.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectorPolicy.hpp"
 #include "gc/g1/g1MarkSweep.hpp"
@@ -143,11 +144,24 @@
 HeapWord* G1Allocator::par_allocate_during_gc(InCSetState dest,
                                               size_t word_size,
                                               AllocationContext_t context) {
+  size_t temp = 0;
+  HeapWord* result = par_allocate_during_gc(dest, word_size, word_size, &temp, context);
+  assert(result == NULL || temp == word_size,
+         err_msg("Requested " SIZE_FORMAT " words, but got " SIZE_FORMAT " at " PTR_FORMAT,
+                 word_size, temp, p2i(result)));
+  return result;
+}
+
+HeapWord* G1Allocator::par_allocate_during_gc(InCSetState dest,
+                                              size_t min_word_size,
+                                              size_t desired_word_size,
+                                              size_t* actual_word_size,
+                                              AllocationContext_t context) {
   switch (dest.value()) {
     case InCSetState::Young:
-      return survivor_attempt_allocation(word_size, context);
+      return survivor_attempt_allocation(min_word_size, desired_word_size, actual_word_size, context);
     case InCSetState::Old:
-      return old_attempt_allocation(word_size, context);
+      return old_attempt_allocation(min_word_size, desired_word_size, actual_word_size, context);
     default:
       ShouldNotReachHere();
       return NULL; // Keep some compilers happy
@@ -170,37 +184,49 @@
   _old_is_full = true;
 }
 
-HeapWord* G1Allocator::survivor_attempt_allocation(size_t word_size,
+HeapWord* G1Allocator::survivor_attempt_allocation(size_t min_word_size,
+                                                   size_t desired_word_size,
+                                                   size_t* actual_word_size,
                                                    AllocationContext_t context) {
-  assert(!_g1h->is_humongous(word_size),
+  assert(!_g1h->is_humongous(desired_word_size),
          "we should not be seeing humongous-size allocations in this path");
 
-  HeapWord* result = survivor_gc_alloc_region(context)->attempt_allocation(word_size,
+  HeapWord* result = survivor_gc_alloc_region(context)->attempt_allocation(min_word_size,
+                                                                           desired_word_size,
+                                                                           actual_word_size,
                                                                            false /* bot_updates */);
   if (result == NULL && !survivor_is_full(context)) {
     MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    result = survivor_gc_alloc_region(context)->attempt_allocation_locked(word_size,
+    result = survivor_gc_alloc_region(context)->attempt_allocation_locked(min_word_size,
+                                                                          desired_word_size,
+                                                                          actual_word_size,
                                                                           false /* bot_updates */);
     if (result == NULL) {
       set_survivor_full(context);
     }
   }
   if (result != NULL) {
-    _g1h->dirty_young_block(result, word_size);
+    _g1h->dirty_young_block(result, *actual_word_size);
   }
   return result;
 }
 
-HeapWord* G1Allocator::old_attempt_allocation(size_t word_size,
+HeapWord* G1Allocator::old_attempt_allocation(size_t min_word_size,
+                                              size_t desired_word_size,
+                                              size_t* actual_word_size,
                                               AllocationContext_t context) {
-  assert(!_g1h->is_humongous(word_size),
+  assert(!_g1h->is_humongous(desired_word_size),
          "we should not be seeing humongous-size allocations in this path");
 
-  HeapWord* result = old_gc_alloc_region(context)->attempt_allocation(word_size,
+  HeapWord* result = old_gc_alloc_region(context)->attempt_allocation(min_word_size,
+                                                                      desired_word_size,
+                                                                      actual_word_size,
                                                                       true /* bot_updates */);
   if (result == NULL && !old_is_full(context)) {
     MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    result = old_gc_alloc_region(context)->attempt_allocation_locked(word_size,
+    result = old_gc_alloc_region(context)->attempt_allocation_locked(min_word_size,
+                                                                     desired_word_size,
+                                                                     actual_word_size,
                                                                      true /* bot_updates */);
     if (result == NULL) {
       set_old_full(context);
@@ -242,10 +268,19 @@
     G1PLAB* alloc_buf = alloc_buffer(dest, context);
     alloc_buf->retire();
 
-    HeapWord* buf = _allocator->par_allocate_during_gc(dest, plab_word_size, context);
+    size_t actual_plab_size = 0;
+    HeapWord* buf = _allocator->par_allocate_during_gc(dest,
+                                                       required_in_plab,
+                                                       plab_word_size,
+                                                       &actual_plab_size,
+                                                       context);
+
+    assert(buf == NULL || ((actual_plab_size >= required_in_plab) && (actual_plab_size <= plab_word_size)),
+           err_msg("Requested at minimum " SIZE_FORMAT ", desired " SIZE_FORMAT " words, but got " SIZE_FORMAT " at " PTR_FORMAT,
+                   required_in_plab, plab_word_size, actual_plab_size, p2i(buf)));
+
     if (buf != NULL) {
-      // Otherwise.
-      alloc_buf->set_buf(buf, plab_word_size);
+      alloc_buf->set_buf(buf, actual_plab_size);
 
       HeapWord* const obj = alloc_buf->allocate(word_sz);
       assert(obj != NULL, err_msg("PLAB should have been big enough, tried to allocate "
--- a/hotspot/src/share/vm/gc/g1/g1Allocator.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1Allocator.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -57,10 +57,14 @@
   virtual OldGCAllocRegion* old_gc_alloc_region(AllocationContext_t context) = 0;
 
   // Allocation attempt during GC for a survivor object / PLAB.
-  inline HeapWord* survivor_attempt_allocation(size_t word_size,
+  inline HeapWord* survivor_attempt_allocation(size_t min_word_size,
+                                               size_t desired_word_size,
+                                               size_t* actual_word_size,
                                                AllocationContext_t context);
   // Allocation attempt during GC for an old object / PLAB.
-  inline HeapWord* old_attempt_allocation(size_t word_size,
+  inline HeapWord* old_attempt_allocation(size_t min_word_size,
+                                          size_t desired_word_size,
+                                          size_t* actual_word_size,
                                           AllocationContext_t context);
 public:
   G1Allocator(G1CollectedHeap* heap) : _g1h(heap), _survivor_is_full(false), _old_is_full(false) { }
@@ -102,6 +106,12 @@
                                    size_t word_size,
                                    AllocationContext_t context);
 
+  HeapWord* par_allocate_during_gc(InCSetState dest,
+                                   size_t min_word_size,
+                                   size_t desired_word_size,
+                                   size_t* actual_word_size,
+                                   AllocationContext_t context);
+
   virtual size_t used_in_alloc_regions() = 0;
 };
 
--- a/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -26,7 +26,7 @@
 #define SHARE_VM_GC_G1_G1BLOCKOFFSETTABLE_INLINE_HPP
 
 #include "gc/g1/g1BlockOffsetTable.hpp"
-#include "gc/g1/heapRegion.inline.hpp"
+#include "gc/g1/heapRegion.hpp"
 #include "gc/shared/space.hpp"
 
 inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
--- a/hotspot/src/share/vm/gc/g1/heapRegion.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -109,7 +109,7 @@
 // evacuation pauses between two cleanups, which is _highly_ unlikely.
 class G1OffsetTableContigSpace: public CompactibleSpace {
   friend class VMStructs;
-  HeapWord* _top;
+  HeapWord* volatile _top;
   HeapWord* volatile _scan_top;
  protected:
   G1BlockOffsetArrayContigSpace _offsets;
@@ -134,10 +134,18 @@
   // Reset the G1OffsetTableContigSpace.
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
 
-  HeapWord** top_addr() { return &_top; }
-  // Allocation helpers (return NULL if full).
-  inline HeapWord* allocate_impl(size_t word_size, HeapWord* end_value);
-  inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value);
+  HeapWord* volatile* top_addr() { return &_top; }
+  // Try to allocate at least min_word_size and up to desired_size from this Space.
+  // Returns NULL if not possible, otherwise sets actual_word_size to the amount of
+  // space allocated.
+  // This version assumes that all allocation requests to this Space are properly
+  // synchronized.
+  inline HeapWord* allocate_impl(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
+  // Try to allocate at least min_word_size and up to desired_size from this Space.
+  // Returns NULL if not possible, otherwise sets actual_word_size to the amount of
+  // space allocated.
+  // This version synchronizes with other calls to par_allocate_impl().
+  inline HeapWord* par_allocate_impl(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
 
  public:
   void reset_after_compaction() { set_top(compaction_top()); }
@@ -179,9 +187,14 @@
   HeapWord* block_start(const void* p);
   HeapWord* block_start_const(const void* p) const;
 
-  // Add offset table update.
+  // Allocation (return NULL if full).  Assumes the caller has established
+  // mutually exclusive access to the space.
+  HeapWord* allocate(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
+  // Allocation (return NULL if full).  Enforces mutual exclusion internally.
+  HeapWord* par_allocate(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
+
   virtual HeapWord* allocate(size_t word_size);
-  HeapWord* par_allocate(size_t word_size);
+  virtual HeapWord* par_allocate(size_t word_size);
 
   HeapWord* saved_mark_word() const { ShouldNotReachHere(); return NULL; }
 
@@ -351,8 +364,9 @@
   // Override for scan_and_forward support.
   void prepare_for_compaction(CompactPoint* cp);
 
-  inline HeapWord* par_allocate_no_bot_updates(size_t word_size);
+  inline HeapWord* par_allocate_no_bot_updates(size_t min_word_size, size_t desired_word_size, size_t* word_size);
   inline HeapWord* allocate_no_bot_updates(size_t word_size);
+  inline HeapWord* allocate_no_bot_updates(size_t min_word_size, size_t desired_word_size, size_t* actual_size);
 
   // If this region is a member of a HeapRegionManager, the index in that
   // sequence, otherwise -1.
--- a/hotspot/src/share/vm/gc/g1/heapRegion.inline.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.inline.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -32,33 +32,39 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.inline.hpp"
 
-// This version requires locking.
-inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t size,
-                                                HeapWord* const end_value) {
+inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t min_word_size,
+                                                         size_t desired_word_size,
+                                                         size_t* actual_size) {
   HeapWord* obj = top();
-  if (pointer_delta(end_value, obj) >= size) {
-    HeapWord* new_top = obj + size;
+  size_t available = pointer_delta(end(), obj);
+  size_t want_to_allocate = MIN2(available, desired_word_size);
+  if (want_to_allocate >= min_word_size) {
+    HeapWord* new_top = obj + want_to_allocate;
     set_top(new_top);
     assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+    *actual_size = want_to_allocate;
     return obj;
   } else {
     return NULL;
   }
 }
 
-// This version is lock-free.
-inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t size,
-                                                    HeapWord* const end_value) {
+inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t min_word_size,
+                                                             size_t desired_word_size,
+                                                             size_t* actual_size) {
   do {
     HeapWord* obj = top();
-    if (pointer_delta(end_value, obj) >= size) {
-      HeapWord* new_top = obj + size;
+    size_t available = pointer_delta(end(), obj);
+    size_t want_to_allocate = MIN2(available, desired_word_size);
+    if (want_to_allocate >= min_word_size) {
+      HeapWord* new_top = obj + want_to_allocate;
       HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj);
       // result can be one of two:
       //  the old top value: the exchange succeeded
       //  otherwise: the new value of the top is returned.
       if (result == obj) {
         assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+        *actual_size = want_to_allocate;
         return obj;
       }
     } else {
@@ -67,20 +73,34 @@
   } while (true);
 }
 
-inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
-  HeapWord* res = allocate_impl(size, end());
+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t min_word_size,
+                                                    size_t desired_word_size,
+                                                    size_t* actual_size) {
+  HeapWord* res = allocate_impl(min_word_size, desired_word_size, actual_size);
   if (res != NULL) {
-    _offsets.alloc_block(res, size);
+    _offsets.alloc_block(res, *actual_size);
   }
   return res;
 }
 
+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t word_size) {
+  size_t temp;
+  return allocate(word_size, word_size, &temp);
+}
+
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t word_size) {
+  size_t temp;
+  return par_allocate(word_size, word_size, &temp);
+}
+
 // Because of the requirement of keeping "_offsets" up to date with the
 // allocations, we sequentialize these with a lock.  Therefore, best if
 // this is used for larger LAB allocations only.
-inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t min_word_size,
+                                                        size_t desired_word_size,
+                                                        size_t* actual_size) {
   MutexLocker x(&_par_alloc_lock);
-  return allocate(size);
+  return allocate(min_word_size, desired_word_size, actual_size);
 }
 
 inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
@@ -128,14 +148,23 @@
   return pointer_delta(next, addr);
 }
 
-inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) {
+inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t min_word_size,
+                                                         size_t desired_word_size,
+                                                         size_t* actual_word_size) {
   assert(is_young(), "we can only skip BOT updates on young regions");
-  return par_allocate_impl(word_size, end());
+  return par_allocate_impl(min_word_size, desired_word_size, actual_word_size);
 }
 
 inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t word_size) {
+  size_t temp;
+  return allocate_no_bot_updates(word_size, word_size, &temp);
+}
+
+inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t min_word_size,
+                                                     size_t desired_word_size,
+                                                     size_t* actual_word_size) {
   assert(is_young(), "we can only skip BOT updates on young regions");
-  return allocate_impl(word_size, end());
+  return allocate_impl(min_word_size, desired_word_size, actual_word_size);
 }
 
 inline void HeapRegion::note_start_of_marking() {
--- a/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -428,7 +428,7 @@
 
     uncommit_regions(idx_last_found + num_last_found - to_remove, to_remove);
 
-    cur -= num_last_found;
+    cur = idx_last_found;
     removed += to_remove;
   }
 
--- a/hotspot/src/share/vm/gc/g1/vmStructs_g1.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/gc/g1/vmStructs_g1.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -34,7 +34,7 @@
   static_field(HeapRegion, GrainBytes,        size_t)                         \
   static_field(HeapRegion, LogOfHRGrainBytes, int)                            \
                                                                               \
-  nonstatic_field(G1OffsetTableContigSpace, _top,       HeapWord*)            \
+  nonstatic_field(G1OffsetTableContigSpace, _top,       HeapWord* volatile)   \
                                                                               \
   nonstatic_field(G1HeapRegionTable, _base,             address)              \
   nonstatic_field(G1HeapRegionTable, _length,           size_t)               \
--- a/hotspot/src/share/vm/opto/arraycopynode.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/arraycopynode.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -626,3 +626,75 @@
 
   return CallNode::may_modify_arraycopy_helper(dest_t, t_oop, phase);
 }
+
+bool ArrayCopyNode::may_modify_helper(const TypeOopPtr *t_oop, Node* n, PhaseTransform *phase) {
+  if (n->is_Proj()) {
+    n = n->in(0);
+    if (n->is_Call() && n->as_Call()->may_modify(t_oop, phase)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase) {
+  Node* mem = mb->in(TypeFunc::Memory);
+
+  if (mem->is_MergeMem()) {
+    Node* n = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
+    if (may_modify_helper(t_oop, n, phase)) {
+      return true;
+    } else if (n->is_Phi()) {
+      for (uint i = 1; i < n->req(); i++) {
+        if (n->in(i) != NULL) {
+          if (may_modify_helper(t_oop, n->in(i), phase)) {
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+// Does this array copy modify offsets between offset_lo and offset_hi
+// in the destination array
+// if must_modify is false, return true if the copy could write
+// between offset_lo and offset_hi
+// if must_modify is true, return true if the copy is guaranteed to
+// write between offset_lo and offset_hi
+bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransform* phase, bool must_modify) {
+  assert(_kind == ArrayCopy || _kind == CopyOf || _kind == CopyOfRange, "only for real array copies");
+
+  Node* dest = in(ArrayCopyNode::Dest);
+  Node* src_pos = in(ArrayCopyNode::SrcPos);
+  Node* dest_pos = in(ArrayCopyNode::DestPos);
+  Node* len = in(ArrayCopyNode::Length);
+
+  const TypeInt *dest_pos_t = phase->type(dest_pos)->isa_int();
+  const TypeInt *len_t = phase->type(len)->isa_int();
+  const TypeAryPtr* ary_t = phase->type(dest)->isa_aryptr();
+
+  if (dest_pos_t != NULL && len_t != NULL && ary_t != NULL) {
+    BasicType ary_elem = ary_t->klass()->as_array_klass()->element_type()->basic_type();
+    uint header = arrayOopDesc::base_offset_in_bytes(ary_elem);
+    uint elemsize = type2aelembytes(ary_elem);
+
+    intptr_t dest_pos_plus_len_lo = (((intptr_t)dest_pos_t->_lo) + len_t->_lo) * elemsize + header;
+    intptr_t dest_pos_plus_len_hi = (((intptr_t)dest_pos_t->_hi) + len_t->_hi) * elemsize + header;
+    intptr_t dest_pos_lo = ((intptr_t)dest_pos_t->_lo) * elemsize + header;
+    intptr_t dest_pos_hi = ((intptr_t)dest_pos_t->_hi) * elemsize + header;
+
+    if (must_modify) {
+      if (offset_lo >= dest_pos_hi && offset_hi < dest_pos_plus_len_lo) {
+        return true;
+      }
+    } else {
+      if (offset_hi >= dest_pos_lo && offset_lo < dest_pos_plus_len_hi) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
--- a/hotspot/src/share/vm/opto/arraycopynode.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/arraycopynode.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -108,6 +108,7 @@
                             BasicType copy_type, const Type* value_type, int count);
   bool finish_transform(PhaseGVN *phase, bool can_reshape,
                         Node* ctl, Node *mem);
+  static bool may_modify_helper(const TypeOopPtr *t_oop, Node* n, PhaseTransform *phase);
 
 public:
 
@@ -162,6 +163,9 @@
 
   bool is_alloc_tightly_coupled() const { return _alloc_tightly_coupled; }
 
+  static bool may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase);
+  bool modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransform* phase, bool must_modify);
+
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
   virtual void dump_compact_spec(outputStream* st) const;
--- a/hotspot/src/share/vm/opto/c2compiler.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -161,7 +161,7 @@
   vmIntrinsics::ID id = method->intrinsic_id();
   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
 
-  if (id < vmIntrinsics::FIRST_ID || id >= vmIntrinsics::LAST_COMPILER_INLINE) {
+  if (id < vmIntrinsics::FIRST_ID || id > vmIntrinsics::LAST_COMPILER_INLINE) {
     return false;
   }
 
--- a/hotspot/src/share/vm/opto/callnode.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -742,7 +742,7 @@
 //
 bool CallNode::may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase) {
   assert((t_oop != NULL), "sanity");
-  if (is_call_to_arraycopystub()) {
+  if (is_call_to_arraycopystub() && strcmp(_name, "unsafe_arraycopy") != 0) {
     const TypeTuple* args = _tf->domain();
     Node* dest = NULL;
     // Stubs that can be called once an ArrayCopyNode is expanded have
--- a/hotspot/src/share/vm/opto/library_call.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -2730,7 +2730,22 @@
         load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval));
       }
     }
-    post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
+    if (kind == LS_cmpxchg) {
+      // Emit the post barrier only when the actual store happened.
+      // This makes sense to check only for compareAndSet that can fail to set the value.
+      // CAS success path is marked more likely since we anticipate this is a performance
+      // critical path, while CAS failure path can use the penalty for going through unlikely
+      // path as backoff. Which is still better than doing a store barrier there.
+      IdealKit ideal(this);
+      ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
+        sync_kit(ideal);
+        post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
+        ideal.sync_kit(this);
+      } ideal.end_if();
+      final_sync(ideal);
+    } else {
+      post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
+    }
     break;
   default:
     fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -1175,7 +1175,7 @@
 //=============================================================================
 //------------------------------is_member--------------------------------------
 // Is 'l' a member of 'this'?
-int IdealLoopTree::is_member( const IdealLoopTree *l ) const {
+bool IdealLoopTree::is_member(const IdealLoopTree *l) const {
   while( l->_nest > _nest ) l = l->_parent;
   return l == this;
 }
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -384,7 +384,7 @@
   { }
 
   // Is 'l' a member of 'this'?
-  int is_member( const IdealLoopTree *l ) const; // Test for nested membership
+  bool is_member(const IdealLoopTree *l) const; // Test for nested membership
 
   // Set loop nesting depth.  Accumulate has_call bits.
   int set_nest( uint depth );
@@ -1086,6 +1086,8 @@
   bool split_up( Node *n, Node *blk1, Node *blk2 );
   void sink_use( Node *use, Node *post_loop );
   Node *place_near_use( Node *useblock ) const;
+  Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
+  void try_move_store_after_loop(Node* n);
 
   bool _created_loop_node;
 public:
--- a/hotspot/src/share/vm/opto/loopopts.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -653,6 +653,209 @@
   return iff->in(1);
 }
 
+#ifdef ASSERT
+static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
+  for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
+    Node* u = m->fast_out(i);
+    if (u->is_CFG()) {
+      if (u->Opcode() == Op_NeverBranch) {
+        u = ((NeverBranchNode*)u)->proj_out(0);
+        enqueue_cfg_uses(u, wq);
+      } else {
+        wq.push(u);
+      }
+    }
+  }
+}
+#endif
+
+// Try moving a store out of a loop, right before the loop
+Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
+  // Store has to be first in the loop body
+  IdealLoopTree *n_loop = get_loop(n_ctrl);
+  if (n->is_Store() && n_loop != _ltree_root && n_loop->is_loop()) {
+    assert(n->in(0), "store should have control set");
+    Node* address = n->in(MemNode::Address);
+    Node* value = n->in(MemNode::ValueIn);
+    Node* mem = n->in(MemNode::Memory);
+    IdealLoopTree* address_loop = get_loop(get_ctrl(address));
+    IdealLoopTree* value_loop = get_loop(get_ctrl(value));
+
+    // - address and value must be loop invariant
+    // - memory must be a memory Phi for the loop
+    // - Store must be the only store on this memory slice in the
+    // loop: if there's another store following this one then value
+    // written at iteration i by the second store could be overwritten
+    // at iteration i+n by the first store: it's not safe to move the
+    // first store out of the loop
+    // - nothing must observe the Phi memory: it guarantees no read
+    // before the store and no early exit out of the loop
+    // With those conditions, we are also guaranteed the store post
+    // dominates the loop head. Otherwise there would be extra Phi
+    // involved between the loop's Phi and the store.
+
+    if (!n_loop->is_member(address_loop) &&
+        !n_loop->is_member(value_loop) &&
+        mem->is_Phi() && mem->in(0) == n_loop->_head &&
+        mem->outcnt() == 1 &&
+        mem->in(LoopNode::LoopBackControl) == n) {
+
+#ifdef ASSERT
+      // Verify that store's control does post dominate loop entry and
+      // that there's no early exit of the loop before the store.
+      bool ctrl_ok = false;
+      {
+        // Follow control from loop head until n, we exit the loop or
+        // we reach the tail
+        ResourceMark rm;
+        Unique_Node_List wq;
+        wq.push(n_loop->_head);
+        assert(n_loop->_tail != NULL, "need a tail");
+        for (uint next = 0; next < wq.size(); ++next) {
+          Node *m = wq.at(next);
+          if (m == n->in(0)) {
+            ctrl_ok = true;
+            continue;
+          }
+          assert(!has_ctrl(m), "should be CFG");
+          if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
+            ctrl_ok = false;
+            break;
+          }
+          enqueue_cfg_uses(m, wq);
+        }
+      }
+      assert(ctrl_ok, "bad control");
+#endif
+
+      // move the Store
+      _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
+      _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
+      _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
+      // Disconnect the phi now. An empty phi can confuse other
+      // optimizations in this pass of loop opts.
+      _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
+      n_loop->_body.yank(mem);
+
+      IdealLoopTree* new_loop = get_loop(n->in(0));
+      set_ctrl_and_loop(n, n->in(0));
+
+      return n;
+    }
+  }
+  return NULL;
+}
+
+// Try moving a store out of a loop, right after the loop
+void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
+  if (n->is_Store()) {
+    assert(n->in(0), "store should have control set");
+    Node *n_ctrl = get_ctrl(n);
+    IdealLoopTree *n_loop = get_loop(n_ctrl);
+    // Store must be in a loop
+    if (n_loop != _ltree_root && !n_loop->_irreducible) {
+      Node* address = n->in(MemNode::Address);
+      Node* value = n->in(MemNode::ValueIn);
+      IdealLoopTree* address_loop = get_loop(get_ctrl(address));
+      // address must be loop invariant
+      if (!n_loop->is_member(address_loop)) {
+        // Store must be last on this memory slice in the loop and
+        // nothing in the loop must observe it
+        Node* phi = NULL;
+        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+          Node* u = n->fast_out(i);
+          if (has_ctrl(u)) { // control use?
+            IdealLoopTree *u_loop = get_loop(get_ctrl(u));
+            if (!n_loop->is_member(u_loop)) {
+              continue;
+            }
+            if (u->is_Phi() && u->in(0) == n_loop->_head) {
+              assert(_igvn.type(u) == Type::MEMORY, "bad phi");
+              assert(phi == NULL, "already found");
+              phi = u;
+              continue;
+            }
+          }
+          phi = NULL;
+          break;
+        }
+        if (phi != NULL) {
+          // Nothing in the loop before the store (next iteration)
+          // must observe the stored value
+          bool mem_ok = true;
+          {
+            ResourceMark rm;
+            Unique_Node_List wq;
+            wq.push(phi);
+            for (uint next = 0; next < wq.size() && mem_ok; ++next) {
+              Node *m = wq.at(next);
+              for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
+                Node* u = m->fast_out(i);
+                if (u->is_Store() || u->is_Phi()) {
+                  if (u != n) {
+                    wq.push(u);
+                    mem_ok = (wq.size() <= 10);
+                  }
+                } else {
+                  mem_ok = false;
+                  break;
+                }
+              }
+            }
+          }
+          if (mem_ok) {
+            // Move the Store out of the loop creating clones along
+            // all paths out of the loop that observe the stored value
+            _igvn.rehash_node_delayed(phi);
+            int count = phi->replace_edge(n, n->in(MemNode::Memory));
+            assert(count > 0, "inconsistent phi");
+            for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+              Node* u = n->fast_out(i);
+              Node* c = get_ctrl(u);
+
+              if (u->is_Phi()) {
+                c = u->in(0)->in(u->find_edge(n));
+              }
+              IdealLoopTree *u_loop = get_loop(c);
+              assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop");
+              while(true) {
+                Node* next_c = find_non_split_ctrl(idom(c));
+                if (n_loop->is_member(get_loop(next_c))) {
+                  break;
+                }
+                c = next_c;
+              }
+
+              Node* st = n->clone();
+              st->set_req(0, c);
+              _igvn.register_new_node_with_optimizer(st);
+
+              set_ctrl(st, c);
+              IdealLoopTree* new_loop = get_loop(c);
+              assert(new_loop != n_loop, "should be moved out of loop");
+              if (new_loop->_child == NULL) new_loop->_body.push(st);
+
+              _igvn.replace_input_of(u, u->find_edge(n), st);
+              --imax;
+              --i;
+            }
+
+
+            assert(n->outcnt() == 0, "all uses should be gone");
+            _igvn.replace_input_of(n, MemNode::Memory, C->top());
+            // Disconnect the phi now. An empty phi can confuse other
+            // optimizations in this pass of loop opts..
+            if (phi->in(LoopNode::LoopBackControl) == phi) {
+              _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
+              n_loop->_body.yank(phi);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
 //------------------------------split_if_with_blocks_pre-----------------------
 // Do the real work in a non-recursive function.  Data nodes want to be
 // cloned in the pre-order so they can feed each other nicely.
@@ -683,6 +886,11 @@
   Node *n_ctrl = get_ctrl(n);
   if( !n_ctrl ) return n;       // Dead node
 
+  Node* res = try_move_store_before_loop(n, n_ctrl);
+  if (res != NULL) {
+    return n;
+  }
+
   // Attempt to remix address expressions for loop invariants
   Node *m = remix_address_expressions( n );
   if( m ) return m;
@@ -691,16 +899,18 @@
   // Returns the block to clone thru.
   Node *n_blk = has_local_phi_input( n );
   if( !n_blk ) return n;
+
   // Do not clone the trip counter through on a CountedLoop
   // (messes up the canonical shape).
   if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
 
   // Check for having no control input; not pinned.  Allow
   // dominating control.
-  if( n->in(0) ) {
+  if (n->in(0)) {
     Node *dom = idom(n_blk);
-    if( dom_lca( n->in(0), dom ) != n->in(0) )
+    if (dom_lca(n->in(0), dom) != n->in(0)) {
       return n;
+    }
   }
   // Policy: when is it profitable.  You must get more wins than
   // policy before it is considered profitable.  Policy is usually 0,
@@ -1029,6 +1239,8 @@
     }
   }
 
+  try_move_store_after_loop(n);
+
   // Check for Opaque2's who's loop has disappeared - who's input is in the
   // same loop nest as their output.  Remove 'em, they are no longer useful.
   if( n_op == Op_Opaque2 &&
--- a/hotspot/src/share/vm/opto/macro.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/macro.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -324,18 +324,28 @@
         return in;
       } else if (in->is_Call()) {
         CallNode *call = in->as_Call();
-        if (!call->may_modify(tinst, phase)) {
-          mem = call->in(TypeFunc::Memory);
+        if (call->may_modify(tinst, phase)) {
+          assert(call->is_ArrayCopy(), "ArrayCopy is the only call node that doesn't make allocation escape");
+
+          if (call->as_ArrayCopy()->modifies(offset, offset, phase, false)) {
+            return in;
+          }
         }
         mem = in->in(TypeFunc::Memory);
       } else if (in->is_MemBar()) {
+        if (ArrayCopyNode::may_modify(tinst, in->as_MemBar(), phase)) {
+          assert(in->in(0)->is_Proj() && in->in(0)->in(0)->is_ArrayCopy(), "should be arraycopy");
+          ArrayCopyNode* ac = in->in(0)->in(0)->as_ArrayCopy();
+          assert(ac->is_clonebasic(), "Only basic clone is a non escaping clone");
+          return ac;
+        }
         mem = in->in(TypeFunc::Memory);
       } else {
         assert(false, "unexpected projection");
       }
     } else if (mem->is_Store()) {
       const TypePtr* atype = mem->as_Store()->adr_type();
-      int adr_idx = Compile::current()->get_alias_index(atype);
+      int adr_idx = phase->C->get_alias_index(atype);
       if (adr_idx == alias_idx) {
         assert(atype->isa_oopptr(), "address type must be oopptr");
         int adr_offset = atype->offset();
@@ -373,7 +383,7 @@
         adr = mem->in(3); // Destination array
       }
       const TypePtr* atype = adr->bottom_type()->is_ptr();
-      int adr_idx = Compile::current()->get_alias_index(atype);
+      int adr_idx = phase->C->get_alias_index(atype);
       if (adr_idx == alias_idx) {
         assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
         return NULL;
@@ -386,12 +396,63 @@
   }
 }
 
+// Generate loads from source of the arraycopy for fields of
+// destination needed at a deoptimization point
+Node* PhaseMacroExpand::make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset, Node* ctl, BasicType ft, const Type *ftype, AllocateNode *alloc) {
+  BasicType bt = ft;
+  const Type *type = ftype;
+  if (ft == T_NARROWOOP) {
+    bt = T_OBJECT;
+    type = ftype->make_oopptr();
+  }
+  Node* res = NULL;
+  if (ac->is_clonebasic()) {
+    Node* base = ac->in(ArrayCopyNode::Src)->in(AddPNode::Base);
+    Node* adr = _igvn.transform(new AddPNode(base, base, MakeConX(offset)));
+    const TypePtr* adr_type = _igvn.type(base)->is_ptr()->add_offset(offset);
+    Node* m = ac->in(TypeFunc::Memory);
+    while (m->is_MergeMem()) {
+      m = m->as_MergeMem()->memory_at(C->get_alias_index(adr_type));
+      if (m->is_Proj() && m->in(0)->is_MemBar()) {
+        m = m->in(0)->in(TypeFunc::Memory);
+      }
+    }
+    res = LoadNode::make(_igvn, ctl, m, adr, adr_type, type, bt, MemNode::unordered, LoadNode::Pinned);
+  } else {
+    if (ac->modifies(offset, offset, &_igvn, true)) {
+      assert(ac->in(ArrayCopyNode::Dest) == alloc->result_cast(), "arraycopy destination should be allocation's result");
+      uint shift  = exact_log2(type2aelembytes(bt));
+      Node* diff = _igvn.transform(new SubINode(ac->in(ArrayCopyNode::SrcPos), ac->in(ArrayCopyNode::DestPos)));
+#ifdef _LP64
+      diff = _igvn.transform(new ConvI2LNode(diff));
+#endif
+      diff = _igvn.transform(new LShiftXNode(diff, intcon(shift)));
+
+      Node* off = _igvn.transform(new AddXNode(MakeConX(offset), diff));
+      Node* base = ac->in(ArrayCopyNode::Src);
+      Node* adr = _igvn.transform(new AddPNode(base, base, off));
+      const TypePtr* adr_type = _igvn.type(base)->is_ptr()->add_offset(offset);
+      Node* m = ac->in(TypeFunc::Memory);
+      res = LoadNode::make(_igvn, ctl, m, adr, adr_type, type, bt, MemNode::unordered, LoadNode::Pinned);
+    }
+  }
+  if (res != NULL) {
+    res = _igvn.transform(res);
+    if (ftype->isa_narrowoop()) {
+      // PhaseMacroExpand::scalar_replacement adds DecodeN nodes
+      res = _igvn.transform(new EncodePNode(res, ftype));
+    }
+    return res;
+  }
+  return NULL;
+}
+
 //
 // Given a Memory Phi, compute a value Phi containing the values from stores
 // on the input paths.
-// Note: this function is recursive, its depth is limied by the "level" argument
+// Note: this function is recursive, its depth is limited by the "level" argument
 // Returns the computed Phi, or NULL if it cannot compute it.
-Node *PhaseMacroExpand::value_from_mem_phi(Node *mem, BasicType ft, const Type *phi_type, const TypeOopPtr *adr_t, Node *alloc, Node_Stack *value_phis, int level) {
+Node *PhaseMacroExpand::value_from_mem_phi(Node *mem, BasicType ft, const Type *phi_type, const TypeOopPtr *adr_t, AllocateNode *alloc, Node_Stack *value_phis, int level) {
   assert(mem->is_Phi(), "sanity");
   int alias_idx = C->get_alias_index(adr_t);
   int offset = adr_t->offset();
@@ -458,6 +519,9 @@
         assert(val->in(0)->is_LoadStore() || val->in(0)->Opcode() == Op_EncodeISOArray, "sanity");
         assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
         return NULL;
+      } else if (val->is_ArrayCopy()) {
+        Node* res = make_arraycopy_load(val->as_ArrayCopy(), offset, val->in(0), ft, phi_type, alloc);
+        values.at_put(j, res);
       } else {
 #ifdef ASSERT
         val->dump();
@@ -479,7 +543,7 @@
 }
 
 // Search the last value stored into the object's field.
-Node *PhaseMacroExpand::value_from_mem(Node *sfpt_mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc) {
+Node *PhaseMacroExpand::value_from_mem(Node *sfpt_mem, Node *sfpt_ctl, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc) {
   assert(adr_t->is_known_instance_field(), "instance required");
   int instance_id = adr_t->instance_id();
   assert((uint)instance_id == alloc->_idx, "wrong allocation");
@@ -538,6 +602,8 @@
       } else {
         done = true;
       }
+    } else if (mem->is_ArrayCopy()) {
+      done = true;
     } else {
       assert(false, "unexpected node");
     }
@@ -562,6 +628,13 @@
           value_phis.pop();
         }
       }
+    } else if (mem->is_ArrayCopy()) {
+      Node* ctl = mem->in(0);
+      if (sfpt_ctl->is_Proj() && sfpt_ctl->as_Proj()->is_uncommon_trap_proj(Deoptimization::Reason_none)) {
+        // pin the loads in the uncommon trap path
+        ctl = sfpt_ctl;
+      }
+      return make_arraycopy_load(mem->as_ArrayCopy(), offset, ctl, ft, ftype, alloc);
     }
   }
   // Something go wrong.
@@ -738,6 +811,7 @@
   while (safepoints.length() > 0) {
     SafePointNode* sfpt = safepoints.pop();
     Node* mem = sfpt->memory();
+    Node* ctl = sfpt->control();
     assert(sfpt->jvms() != NULL, "missed JVMS");
     // Fields of scalar objs are referenced only at the end
     // of regular debuginfo at the last (youngest) JVMS.
@@ -789,7 +863,7 @@
 
       const TypeOopPtr *field_addr_type = res_type->add_offset(offset)->isa_oopptr();
 
-      Node *field_val = value_from_mem(mem, basic_elem_type, field_type, field_addr_type, alloc);
+      Node *field_val = value_from_mem(mem, ctl, basic_elem_type, field_type, field_addr_type, alloc);
       if (field_val == NULL) {
         // We weren't able to find a value for this field,
         // give up on eliminating this allocation.
--- a/hotspot/src/share/vm/opto/macro.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/macro.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -85,8 +85,8 @@
                               Node* length,
                               const TypeFunc* slow_call_type,
                               address slow_call_address);
-  Node *value_from_mem(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc);
-  Node *value_from_mem_phi(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc, Node_Stack *value_phis, int level);
+  Node *value_from_mem(Node *mem, Node *ctl, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc);
+  Node *value_from_mem_phi(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc, Node_Stack *value_phis, int level);
 
   bool eliminate_boxing_node(CallStaticJavaNode *boxing);
   bool eliminate_allocate_node(AllocateNode *alloc);
@@ -200,6 +200,8 @@
                             Node* old_eden_top, Node* new_eden_top,
                             Node* length);
 
+  Node* make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset, Node* ctl, BasicType ft, const Type *ftype, AllocateNode *alloc);
+
 public:
   PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn), _has_locks(false) {
     _igvn.set_delay_transform(true);
--- a/hotspot/src/share/vm/opto/memnode.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -108,37 +108,6 @@
 
 #endif
 
-static bool membar_for_arraycopy_helper(const TypeOopPtr *t_oop, Node* n, PhaseTransform *phase) {
-  if (n->is_Proj()) {
-    n = n->in(0);
-    if (n->is_Call() && n->as_Call()->may_modify(t_oop, phase)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-static bool membar_for_arraycopy(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase) {
-  Node* mem = mb->in(TypeFunc::Memory);
-
-  if (mem->is_MergeMem()) {
-    Node* n = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
-    if (membar_for_arraycopy_helper(t_oop, n, phase)) {
-      return true;
-    } else if (n->is_Phi()) {
-      for (uint i = 1; i < n->req(); i++) {
-        if (n->in(i) != NULL) {
-          if (membar_for_arraycopy_helper(t_oop, n->in(i), phase)) {
-            return true;
-          }
-        }
-      }
-    }
-  }
-
-  return false;
-}
-
 Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase) {
   assert((t_oop != NULL), "sanity");
   bool is_instance = t_oop->is_known_instance_field();
@@ -183,7 +152,7 @@
           }
         }
       } else if (proj_in->is_MemBar()) {
-        if (membar_for_arraycopy(t_oop, proj_in->as_MemBar(), phase)) {
+        if (ArrayCopyNode::may_modify(t_oop, proj_in->as_MemBar(), phase)) {
           break;
         }
         result = proj_in->in(TypeFunc::Memory);
@@ -545,35 +514,12 @@
         Node* dest = ac->in(ArrayCopyNode::Dest);
 
         if (dest == ld_base) {
-          Node* src_pos = ac->in(ArrayCopyNode::SrcPos);
-          Node* dest_pos = ac->in(ArrayCopyNode::DestPos);
-          Node* len = ac->in(ArrayCopyNode::Length);
-
-          const TypeInt *dest_pos_t = phase->type(dest_pos)->isa_int();
           const TypeX *ld_offs_t = phase->type(ld_offs)->isa_intptr_t();
-          const TypeInt *len_t = phase->type(len)->isa_int();
-          const TypeAryPtr* ary_t = phase->type(dest)->isa_aryptr();
-
-          if (dest_pos_t != NULL && ld_offs_t != NULL && len_t != NULL && ary_t != NULL) {
-            BasicType ary_elem  = ary_t->klass()->as_array_klass()->element_type()->basic_type();
-            uint header = arrayOopDesc::base_offset_in_bytes(ary_elem);
-            uint elemsize = type2aelembytes(ary_elem);
-
-            intptr_t dest_pos_plus_len_lo = (((intptr_t)dest_pos_t->_lo) + len_t->_lo) * elemsize + header;
-            intptr_t dest_pos_plus_len_hi = (((intptr_t)dest_pos_t->_hi) + len_t->_hi) * elemsize + header;
-            intptr_t dest_pos_lo = ((intptr_t)dest_pos_t->_lo) * elemsize + header;
-            intptr_t dest_pos_hi = ((intptr_t)dest_pos_t->_hi) * elemsize + header;
-
-            if (can_see_stored_value) {
-              if (ld_offs_t->_lo >= dest_pos_hi && ld_offs_t->_hi < dest_pos_plus_len_lo) {
-                return ac;
-              }
-            } else {
-              if (ld_offs_t->_hi < dest_pos_lo || ld_offs_t->_lo >= dest_pos_plus_len_hi) {
-                mem = ac->in(TypeFunc::Memory);
-              }
-              return ac;
-            }
+          if (ac->modifies(ld_offs_t->_lo, ld_offs_t->_hi, phase, can_see_stored_value)) {
+            return ac;
+          }
+          if (!can_see_stored_value) {
+            mem = ac->in(TypeFunc::Memory);
           }
         }
       }
@@ -703,7 +649,7 @@
           continue;         // (a) advance through independent call memory
         }
       } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) {
-        if (membar_for_arraycopy(addr_t, mem->in(0)->as_MemBar(), phase)) {
+        if (ArrayCopyNode::may_modify(addr_t, mem->in(0)->as_MemBar(), phase)) {
           break;
         }
         mem = mem->in(0)->in(TypeFunc::Memory);
@@ -883,18 +829,17 @@
 // Is the value loaded previously stored by an arraycopy? If so return
 // a load node that reads from the source array so we may be able to
 // optimize out the ArrayCopy node later.
-Node* MemNode::can_see_arraycopy_value(Node* st, PhaseTransform* phase) const {
+Node* LoadNode::can_see_arraycopy_value(Node* st, PhaseTransform* phase) const {
   Node* ld_adr = in(MemNode::Address);
   intptr_t ld_off = 0;
   AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
   Node* ac = find_previous_arraycopy(phase, ld_alloc, st, true);
   if (ac != NULL) {
     assert(ac->is_ArrayCopy(), "what kind of node can this be?");
-    assert(is_Load(), "only for loads");
-
+
+    Node* ld = clone();
     if (ac->as_ArrayCopy()->is_clonebasic()) {
       assert(ld_alloc != NULL, "need an alloc");
-      Node* ld = clone();
       Node* addp = in(MemNode::Address)->clone();
       assert(addp->is_AddP(), "address must be addp");
       assert(addp->in(AddPNode::Base) == ac->in(ArrayCopyNode::Dest)->in(AddPNode::Base), "strange pattern");
@@ -906,9 +851,7 @@
         assert(ld_alloc->in(0) != NULL, "alloc must have control");
         ld->set_req(0, ld_alloc->in(0));
       }
-      return ld;
     } else {
-      Node* ld = clone();
       Node* addp = in(MemNode::Address)->clone();
       assert(addp->in(AddPNode::Base) == addp->in(AddPNode::Address), "should be");
       addp->set_req(AddPNode::Base, ac->in(ArrayCopyNode::Src));
@@ -933,8 +876,10 @@
         assert(ac->in(0) != NULL, "alloc must have control");
         ld->set_req(0, ac->in(0));
       }
-      return ld;
     }
+    // load depends on the tests that validate the arraycopy
+    ld->as_Load()->_depends_only_on_test = Pinned;
+    return ld;
   }
   return NULL;
 }
@@ -2426,40 +2371,47 @@
 
   Node* mem     = in(MemNode::Memory);
   Node* address = in(MemNode::Address);
-
   // Back-to-back stores to same address?  Fold em up.  Generally
   // unsafe if I have intervening uses...  Also disallowed for StoreCM
   // since they must follow each StoreP operation.  Redundant StoreCMs
   // are eliminated just before matching in final_graph_reshape.
-  if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) &&
-      mem->Opcode() != Op_StoreCM) {
-    // Looking at a dead closed cycle of memory?
-    assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
-
-    assert(Opcode() == mem->Opcode() ||
-           phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw,
-           "no mismatched stores, except on raw memory");
-
-    if (mem->outcnt() == 1 &&           // check for intervening uses
-        mem->as_Store()->memory_size() <= this->memory_size()) {
-      // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away.
-      // For example, 'mem' might be the final state at a conditional return.
-      // Or, 'mem' might be used by some node which is live at the same time
-      // 'this' is live, which might be unschedulable.  So, require exactly
-      // ONE user, the 'this' store, until such time as we clone 'mem' for
-      // each of 'mem's uses (thus making the exactly-1-user-rule hold true).
-      if (can_reshape) {  // (%%% is this an anachronism?)
-        set_req_X(MemNode::Memory, mem->in(MemNode::Memory),
-                  phase->is_IterGVN());
-      } else {
-        // It's OK to do this in the parser, since DU info is always accurate,
-        // and the parser always refers to nodes via SafePointNode maps.
-        set_req(MemNode::Memory, mem->in(MemNode::Memory));
+  {
+    Node* st = mem;
+    // If Store 'st' has more than one use, we cannot fold 'st' away.
+    // For example, 'st' might be the final state at a conditional
+    // return.  Or, 'st' might be used by some node which is live at
+    // the same time 'st' is live, which might be unschedulable.  So,
+    // require exactly ONE user until such time as we clone 'mem' for
+    // each of 'mem's uses (thus making the exactly-1-user-rule hold
+    // true).
+    while (st->is_Store() && st->outcnt() == 1 && st->Opcode() != Op_StoreCM) {
+      // Looking at a dead closed cycle of memory?
+      assert(st != st->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
+      assert(Opcode() == st->Opcode() ||
+             st->Opcode() == Op_StoreVector ||
+             Opcode() == Op_StoreVector ||
+             phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
+             (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI), // expanded ClearArrayNode
+             err_msg_res("no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()]));
+
+      if (st->in(MemNode::Address)->eqv_uncast(address) &&
+          st->as_Store()->memory_size() <= this->memory_size()) {
+        Node* use = st->raw_out(0);
+        phase->igvn_rehash_node_delayed(use);
+        if (can_reshape) {
+          use->set_req_X(MemNode::Memory, st->in(MemNode::Memory), phase->is_IterGVN());
+        } else {
+          // It's OK to do this in the parser, since DU info is always accurate,
+          // and the parser always refers to nodes via SafePointNode maps.
+          use->set_req(MemNode::Memory, st->in(MemNode::Memory));
+        }
+        return this;
       }
-      return this;
+      st = st->in(MemNode::Memory);
     }
   }
 
+
   // Capture an unaliased, unconditional, simple store into an initializer.
   // Or, if it is independent of the allocation, hoist it above the allocation.
   if (ReduceFieldZeroing && /*can_reshape &&*/
--- a/hotspot/src/share/vm/opto/memnode.hpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Tue Aug 25 17:30:14 2015 +0200
@@ -126,7 +126,6 @@
   // Can this node (load or store) accurately see a stored value in
   // the given memory state?  (The state may or may not be in(Memory).)
   Node* can_see_stored_value(Node* st, PhaseTransform* phase) const;
-  Node* can_see_arraycopy_value(Node* st, PhaseTransform* phase) const;
 
 #ifndef PRODUCT
   static void dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st);
@@ -252,6 +251,9 @@
 protected:
   const Type* load_array_final_field(const TypeKlassPtr *tkls,
                                      ciKlass* klass) const;
+
+  Node* can_see_arraycopy_value(Node* st, PhaseTransform* phase) const;
+
   // depends_only_on_test is almost always true, and needs to be almost always
   // true to enable key hoisting & commoning optimizations.  However, for the
   // special case of RawPtr loads from TLS top & end, and other loads performed by
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Mon Aug 24 15:41:56 2015 -0400
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Tue Aug 25 17:30:14 2015 +0200
@@ -1565,6 +1565,7 @@
   declare_toplevel_type(Generation*)                                      \
   declare_toplevel_type(GenerationSpec**)                                 \
   declare_toplevel_type(HeapWord*)                                        \
+  declare_toplevel_type(HeapWord* volatile)                               \
   declare_toplevel_type(MemRegion*)                                       \
   declare_toplevel_type(OffsetTableContigSpace*)                          \
   declare_toplevel_type(Space*)                                           \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/arraycopy/TestEliminatedArrayCopyDeopt.java	Tue Aug 25 17:30:14 2015 +0200
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8130847
+ * @summary Eliminated instance/array written to by an array copy variant must be correctly initialized when reallocated at a deopt
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestEliminatedArrayCopyDeopt
+ *
+ */
+
+// Test that if an ArrayCopy node is eliminated because it doesn't
+// escape, then the correct field/array element values are captured so
+// on a deoptimization, when the object/array is reallocated, it is
+// correctly initialized
+
+public class TestEliminatedArrayCopyDeopt {
+
+    static class A implements Cloneable {
+        int f0;
+        int f1;
+        int f2;
+        int f3;
+        int f4;
+        int f5;
+        int f6;
+        int f7;
+        int f8;
+        int f9;
+        int f10;
+        int f11;
+        int f12;
+        int f13;
+        int f14;
+        int f15;
+
+        public Object clone() throws CloneNotSupportedException {
+            return super.clone();
+        }
+    }
+
+    // Clone
+    static boolean m1(A a, boolean flag) throws CloneNotSupportedException {
+        A c = (A)a.clone();
+        if (flag) {
+            // never taken branch that causes the deoptimization
+            if (c.f0 != 0x42) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // Array clone
+    static int[] m2_src = null;
+    static boolean m2(boolean flag) throws CloneNotSupportedException {
+        int[] src  = new int[10];
+        m2_src = src;
+        for (int i = 0; i < src.length; i++) {
+            src[i] = 0x42+i;
+        }
+        int[] c = (int[])src.clone();
+        if (flag) {
+            for (int i = 0; i < c.length; i++) {
+                if (c[i] != src[i]) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    // Array copy
+    static boolean m3(int[] src, boolean flag) {
+        int[] dst = new int[10];
+        System.arraycopy(src, 0, dst, 0, 10);
+        if (flag) {
+            for (int i = 0; i < dst.length; i++) {
+                if (dst[i] != src[i]) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    // Array copy of subrange
+    static boolean m4(int[] src, boolean flag) {
+        int[] dst = new int[10];
+        dst[0] = 0x42;
+        dst[1] = 0x42 - 1;
+        dst[2] = 0x42 - 2;
+        dst[8] = 0x42 - 8;
+        dst[9] = 0x42 - 9;
+        int src_off = 2;
+        int dst_off = 3;
+        int len = 5;
+        System.arraycopy(src, src_off, dst, dst_off, len);
+        if (flag) {
+            for (int i = 0; i < dst.length; i++) {
+                if (i >= dst_off &&  i < dst_off + len) {
+                    if (dst[i] != src[i - dst_off + src_off]) {
+                        return false;
+                    }
+                } else {
+                    if (dst[i] != 0x42-i) {
+                        return false;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    // Array copy with Phi
+    static boolean m5(int[] src, boolean flag1, boolean flag2) {
+        int[] dst = new int[10];
+        if (flag1) {
+            System.arraycopy(src, 0, dst, 0, 10);
+        }
+        if (flag2) {
+            for (int i = 0; i < dst.length; i++) {
+                if (dst[i] != src[i]) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    static public void main(String[] args) throws Exception {
+        boolean success = true;
+        A a = new A();
+        a.f0 = 0x42;
+        for (int i = 0; i < 20000; i++) {
+            m1(a, false);
+        }
+        if (!m1(a, true)) {
+            System.out.println("m1 failed");
+            success = false;
+        }
+
+        for (int i = 0; i < 20000; i++) {
+            m2(false);
+        }
+        if (!m2(true)) {
+            System.out.println("m2 failed");
+            success = false;
+        }
+
+        int[] src = new int[10];
+        for (int i = 0; i < src.length; i++) {
+            src[i] = 0x42+i;
+        }
+
+        for (int i = 0; i < 20000; i++) {
+            m3(src, false);
+        }
+        if (!m3(src, true)) {
+            System.out.println("m3 failed");
+            success = false;
+        }
+
+        for (int i = 0; i < 20000; i++) {
+            m4(src, false);
+        }
+        if (!m4(src, true)) {
+            System.out.println("m4 failed");
+            success = false;
+        }
+
+        for (int i = 0; i < 20000; i++) {
+            m5(src, i%2 == 0, false);
+        }
+        if (!m5(src, true, true)) {
+            System.out.println("m4 failed");
+            success = false;
+        }
+
+        if (!success) {
+            throw new RuntimeException("Test failed");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/TestMoveStoresOutOfLoops.java	Tue Aug 25 17:30:14 2015 +0200
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8080289
+ * @summary Sink stores out of loops if possible
+ * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+PrintCompilation -XX:CompileCommand=dontinline,TestMoveStoresOutOfLoops::test*  TestMoveStoresOutOfLoops
+ *
+ */
+
+import java.lang.reflect.*;
+import java.util.*;
+import java.util.function.*;
+
+public class TestMoveStoresOutOfLoops {
+
+    private static long[] array = new long[10];
+    private static long[] array2 = new long[10];
+    private static boolean[] array3 = new boolean[1000];
+    private static byte[] byte_array = new byte[10];
+
+    // Array store should be moved out of the loop, value stored
+    // should be 999, the loop should be eliminated
+    static void test_after_1(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            array[idx] = i;
+        }
+    }
+
+    // Array store can't be moved out of loop because of following
+    // non loop invariant array access
+    static void test_after_2(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            array[idx] = i;
+            array2[i%10] = i;
+        }
+    }
+
+    // Array store can't be moved out of loop because of following
+    // use
+    static void test_after_3(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            array[idx] = i;
+            if (array[0] == -1) {
+                break;
+            }
+        }
+    }
+
+    // Array store can't be moved out of loop because of preceding
+    // use
+    static void test_after_4(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            if (array[0] == -2) {
+                break;
+            }
+            array[idx] = i;
+        }
+    }
+
+    // All array stores should be moved out of the loop, one after
+    // the other
+    static void test_after_5(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            array[idx] = i;
+            array[idx+1] = i;
+            array[idx+2] = i;
+            array[idx+3] = i;
+            array[idx+4] = i;
+            array[idx+5] = i;
+        }
+    }
+
+    // Array store can be moved after the loop but needs to be
+    // cloned on both exit paths
+    static void test_after_6(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            array[idx] = i;
+            if (array3[i]) {
+                return;
+            }
+        }
+    }
+
+    // Optimize out redundant stores
+    static void test_stores_1(int ignored) {
+        array[0] = 0;
+        array[1] = 1;
+        array[2] = 2;
+        array[0] = 0;
+        array[1] = 1;
+        array[2] = 2;
+    }
+
+    static void test_stores_2(int idx) {
+        array[idx+0] = 0;
+        array[idx+1] = 1;
+        array[idx+2] = 2;
+        array[idx+0] = 0;
+        array[idx+1] = 1;
+        array[idx+2] = 2;
+    }
+
+    static void test_stores_3(int idx) {
+        byte_array[idx+0] = 0;
+        byte_array[idx+1] = 1;
+        byte_array[idx+2] = 2;
+        byte_array[idx+0] = 0;
+        byte_array[idx+1] = 1;
+        byte_array[idx+2] = 2;
+    }
+
+    // Array store can be moved out of the loop before the loop header
+    static void test_before_1(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            array[idx] = 999;
+        }
+    }
+
+    // Array store can't be moved out of the loop before the loop
+    // header because there's more than one store on this slice
+    static void test_before_2(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            array[idx] = 999;
+            array[i%2] = 0;
+        }
+    }
+
+    // Array store can't be moved out of the loop before the loop
+    // header because of use before store
+    static int test_before_3(int idx) {
+        int res = 0;
+        for (int i = 0; i < 1000; i++) {
+            res += array[i%10];
+            array[idx] = 999;
+        }
+        return res;
+    }
+
+    // Array store can't be moved out of the loop before the loop
+    // header because of possible early exit
+    static void test_before_4(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            if (idx / (i+1) > 0) {
+                return;
+            }
+            array[idx] = 999;
+        }
+    }
+
+    // Array store can't be moved out of the loop before the loop
+    // header because it doesn't postdominate the loop head
+    static void test_before_5(int idx) {
+        for (int i = 0; i < 1000; i++) {
+            if (i % 2 == 0) {
+                array[idx] = 999;
+            }
+        }
+    }
+
+    // Array store can be moved out of the loop before the loop header
+    static int test_before_6(int idx) {
+        int res = 0;
+        for (int i = 0; i < 1000; i++) {
+            if (i%2 == 1) {
+                res *= 2;
+            } else {
+                res++;
+            }
+            array[idx] = 999;
+        }
+        return res;
+    }
+
+    final HashMap<String,Method> tests = new HashMap<>();
+    {
+        for (Method m : this.getClass().getDeclaredMethods()) {
+            if (m.getName().matches("test_(before|after|stores)_[0-9]+")) {
+                assert(Modifier.isStatic(m.getModifiers())) : m;
+                tests.put(m.getName(), m);
+            }
+        }
+    }
+
+    boolean success = true;
+    void doTest(String name, Runnable init, Function<String, Boolean> check) throws Exception {
+        Method m = tests.get(name);
+        for (int i = 0; i < 20000; i++) {
+            init.run();
+            m.invoke(null, 0);
+            success = success && check.apply(name);
+            if (!success) {
+                break;
+            }
+        }
+    }
+
+    static void array_init() {
+        array[0] = -1;
+    }
+
+    static boolean array_check(String name) {
+        boolean success = true;
+        if (array[0] != 999) {
+            success = false;
+            System.out.println(name + " failed: array[0] = " + array[0]);
+        }
+        return success;
+    }
+
+    static void array_init2() {
+        for (int i = 0; i < 6; i++) {
+            array[i] = -1;
+        }
+    }
+
+    static boolean array_check2(String name) {
+        boolean success = true;
+        for (int i = 0; i < 6; i++) {
+            if (array[i] != 999) {
+                success = false;
+                System.out.println(name + " failed: array[" + i + "] = " + array[i]);
+            }
+        }
+        return success;
+    }
+
+    static void array_init3() {
+        for (int i = 0; i < 3; i++) {
+            array[i] = -1;
+        }
+    }
+
+    static boolean array_check3(String name) {
+        boolean success = true;
+        for (int i = 0; i < 3; i++) {
+            if (array[i] != i) {
+                success = false;
+                System.out.println(name + " failed: array[" + i + "] = " + array[i]);
+            }
+        }
+        return success;
+    }
+
+    static void array_init4() {
+        for (int i = 0; i < 3; i++) {
+            byte_array[i] = -1;
+        }
+    }
+
+    static boolean array_check4(String name) {
+        boolean success = true;
+        for (int i = 0; i < 3; i++) {
+            if (byte_array[i] != i) {
+                success = false;
+                System.out.println(name + " failed: byte_array[" + i + "] = " + byte_array[i]);
+            }
+        }
+        return success;
+    }
+
+    static public void main(String[] args) throws Exception {
+        TestMoveStoresOutOfLoops test = new TestMoveStoresOutOfLoops();
+        test.doTest("test_after_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_after_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_after_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_after_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_after_5", TestMoveStoresOutOfLoops::array_init2, TestMoveStoresOutOfLoops::array_check2);
+        test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        array3[999] = true;
+        test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+
+        test.doTest("test_stores_1", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3);
+        test.doTest("test_stores_2", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3);
+        test.doTest("test_stores_3", TestMoveStoresOutOfLoops::array_init4, TestMoveStoresOutOfLoops::array_check4);
+
+        test.doTest("test_before_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_before_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_before_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_before_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_before_5", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+        test.doTest("test_before_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
+
+        if (!test.success) {
+            throw new RuntimeException("Some tests failed");
+        }
+    }
+}