6912521: System.arraycopy works slower than the simple loop for little lengths
authorroland
Wed, 18 Feb 2015 18:14:07 +0100
changeset 29337 ef2be52deeaf
parent 29336 b287769dcff1
child 29338 92297a8bd48e
6912521: System.arraycopy works slower than the simple loop for little lengths Summary: convert small array copies to series of loads and stores Reviewed-by: kvn, vlivanov
hotspot/src/share/vm/opto/arraycopynode.cpp
hotspot/src/share/vm/opto/arraycopynode.hpp
hotspot/src/share/vm/opto/callnode.cpp
hotspot/src/share/vm/opto/callnode.hpp
hotspot/src/share/vm/opto/classes.cpp
hotspot/src/share/vm/opto/compile.cpp
hotspot/src/share/vm/opto/compile.hpp
hotspot/src/share/vm/opto/graphKit.cpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/macroArrayCopy.cpp
hotspot/test/compiler/arraycopy/TestArrayCopyAsLoadsStores.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/opto/arraycopynode.cpp	Wed Feb 18 18:14:07 2015 +0100
@@ -0,0 +1,584 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/arraycopynode.hpp"
+#include "opto/graphKit.hpp"
+
+ArrayCopyNode::ArrayCopyNode(Compile* C, bool alloc_tightly_coupled)
+  : CallNode(arraycopy_type(), NULL, TypeRawPtr::BOTTOM),
+    _alloc_tightly_coupled(alloc_tightly_coupled),
+    _kind(None),
+    _arguments_validated(false) {
+  init_class_id(Class_ArrayCopy);
+  init_flags(Flag_is_macro);
+  C->add_macro_node(this);
+}
+
+uint ArrayCopyNode::size_of() const { return sizeof(*this); }
+
+ArrayCopyNode* ArrayCopyNode::make(GraphKit* kit, bool may_throw,
+                                   Node* src, Node* src_offset,
+                                   Node* dest, Node* dest_offset,
+                                   Node* length,
+                                   bool alloc_tightly_coupled,
+                                   Node* src_klass, Node* dest_klass,
+                                   Node* src_length, Node* dest_length) {
+
+  ArrayCopyNode* ac = new ArrayCopyNode(kit->C, alloc_tightly_coupled);
+  Node* prev_mem = kit->set_predefined_input_for_runtime_call(ac);
+
+  ac->init_req(ArrayCopyNode::Src, src);
+  ac->init_req(ArrayCopyNode::SrcPos, src_offset);
+  ac->init_req(ArrayCopyNode::Dest, dest);
+  ac->init_req(ArrayCopyNode::DestPos, dest_offset);
+  ac->init_req(ArrayCopyNode::Length, length);
+  ac->init_req(ArrayCopyNode::SrcLen, src_length);
+  ac->init_req(ArrayCopyNode::DestLen, dest_length);
+  ac->init_req(ArrayCopyNode::SrcKlass, src_klass);
+  ac->init_req(ArrayCopyNode::DestKlass, dest_klass);
+
+  if (may_throw) {
+    ac->set_req(TypeFunc::I_O , kit->i_o());
+    kit->add_safepoint_edges(ac, false);
+  }
+
+  return ac;
+}
+
+void ArrayCopyNode::connect_outputs(GraphKit* kit) {
+  kit->set_all_memory_call(this, true);
+  kit->set_control(kit->gvn().transform(new ProjNode(this,TypeFunc::Control)));
+  kit->set_i_o(kit->gvn().transform(new ProjNode(this, TypeFunc::I_O)));
+  kit->make_slow_call_ex(this, kit->env()->Throwable_klass(), true);
+  kit->set_all_memory_call(this);
+}
+
+#ifndef PRODUCT
+const char* ArrayCopyNode::_kind_names[] = {"arraycopy", "arraycopy, validated arguments", "clone", "oop array clone", "CopyOf", "CopyOfRange"};
+void ArrayCopyNode::dump_spec(outputStream *st) const {
+  CallNode::dump_spec(st);
+  st->print(" (%s%s)", _kind_names[_kind], _alloc_tightly_coupled ? ", tightly coupled allocation" : "");
+}
+#endif
+
+intptr_t ArrayCopyNode::get_length_if_constant(PhaseGVN *phase) const {
+  // check that length is constant
+  Node* length = in(ArrayCopyNode::Length);
+  const Type* length_type = phase->type(length);
+
+  if (length_type == Type::TOP) {
+    return -1;
+  }
+
+  assert(is_clonebasic() || is_arraycopy() || is_copyof() || is_copyofrange(), "unexpected array copy type");
+
+  return is_clonebasic() ? length->find_intptr_t_con(-1) : length->find_int_con(-1);
+}
+
+int ArrayCopyNode::get_count(PhaseGVN *phase) const {
+  Node* src = in(ArrayCopyNode::Src);
+  const Type* src_type = phase->type(src);
+
+  if (is_clonebasic()) {
+    if (src_type->isa_instptr()) {
+      const TypeInstPtr* inst_src = src_type->is_instptr();
+      ciInstanceKlass* ik = inst_src->klass()->as_instance_klass();
+      // ciInstanceKlass::nof_nonstatic_fields() doesn't take injected
+      // fields into account. They are rare anyway so easier to simply
+      // skip instances with injected fields.
+      if ((!inst_src->klass_is_exact() && (ik->is_interface() || ik->has_subklass())) || ik->has_injected_fields()) {
+        return -1;
+      }
+      int nb_fields = ik->nof_nonstatic_fields();
+      return nb_fields;
+    } else {
+      const TypeAryPtr* ary_src = src_type->isa_aryptr();
+      assert (ary_src != NULL, "not an array or instance?");
+      // clone passes a length as a rounded number of longs. If we're
+      // cloning an array we'll do it element by element. If the
+      // length input to ArrayCopyNode is constant, length of input
+      // array must be too.
+
+      assert((get_length_if_constant(phase) == -1) == !ary_src->size()->is_con(), "inconsistent");
+
+      if (ary_src->size()->is_con()) {
+        return ary_src->size()->get_con();
+      }
+      return -1;
+    }
+  }
+
+  return get_length_if_constant(phase);
+}
+
+Node* ArrayCopyNode::try_clone_instance(PhaseGVN *phase, bool can_reshape, int count) {
+  if (!is_clonebasic()) {
+    return NULL;
+  }
+
+  Node* src = in(ArrayCopyNode::Src);
+  Node* dest = in(ArrayCopyNode::Dest);
+  Node* ctl = in(TypeFunc::Control);
+  Node* in_mem = in(TypeFunc::Memory);
+
+  const Type* src_type = phase->type(src);
+  const Type* dest_type = phase->type(dest);
+
+  assert(src->is_AddP(), "should be base + off");
+  assert(dest->is_AddP(), "should be base + off");
+  Node* base_src = src->in(AddPNode::Base);
+  Node* base_dest = dest->in(AddPNode::Base);
+
+  MergeMemNode* mem = MergeMemNode::make(in_mem);
+
+  const TypeInstPtr* inst_src = src_type->isa_instptr();
+
+  if (inst_src == NULL) {
+    return NULL;
+  }
+
+  if (!inst_src->klass_is_exact()) {
+    ciInstanceKlass* ik = inst_src->klass()->as_instance_klass();
+    assert(!ik->is_interface() && !ik->has_subklass(), "inconsistent klass hierarchy");
+    phase->C->dependencies()->assert_leaf_type(ik);
+  }
+
+  ciInstanceKlass* ik = inst_src->klass()->as_instance_klass();
+  assert(ik->nof_nonstatic_fields() <= ArrayCopyLoadStoreMaxElem, "too many fields");
+
+  for (int i = 0; i < count; i++) {
+    ciField* field = ik->nonstatic_field_at(i);
+    int fieldidx = phase->C->alias_type(field)->index();
+    const TypePtr* adr_type = phase->C->alias_type(field)->adr_type();
+    Node* off = phase->MakeConX(field->offset());
+    Node* next_src = phase->transform(new AddPNode(base_src,base_src,off));
+    Node* next_dest = phase->transform(new AddPNode(base_dest,base_dest,off));
+    BasicType bt = field->layout_type();
+
+    const Type *type;
+    if (bt == T_OBJECT) {
+      if (!field->type()->is_loaded()) {
+        type = TypeInstPtr::BOTTOM;
+      } else {
+        ciType* field_klass = field->type();
+        type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+      }
+    } else {
+      type = Type::get_const_basic_type(bt);
+    }
+
+    Node* v = LoadNode::make(*phase, ctl, mem->memory_at(fieldidx), next_src, adr_type, type, bt, MemNode::unordered);
+    v = phase->transform(v);
+    Node* s = StoreNode::make(*phase, ctl, mem->memory_at(fieldidx), next_dest, adr_type, v, bt, MemNode::unordered);
+    s = phase->transform(s);
+    mem->set_memory_at(fieldidx, s);
+  }
+
+  if (!finish_transform(phase, can_reshape, ctl, mem)) {
+    return NULL;
+  }
+
+  return mem;
+}
+
+bool ArrayCopyNode::prepare_array_copy(PhaseGVN *phase, bool can_reshape,
+                                       Node*& adr_src,
+                                       Node*& base_src,
+                                       Node*& adr_dest,
+                                       Node*& base_dest,
+                                       BasicType& copy_type,
+                                       const Type*& value_type,
+                                       bool& disjoint_bases) {
+  Node* src = in(ArrayCopyNode::Src);
+  Node* dest = in(ArrayCopyNode::Dest);
+  const Type* src_type = phase->type(src);
+  const TypeAryPtr* ary_src = src_type->isa_aryptr();
+
+  if (is_arraycopy() || is_copyofrange() || is_copyof()) {
+    const Type* dest_type = phase->type(dest);
+    const TypeAryPtr* ary_dest = dest_type->isa_aryptr();
+    Node* src_offset = in(ArrayCopyNode::SrcPos);
+    Node* dest_offset = in(ArrayCopyNode::DestPos);
+
+    // newly allocated object is guaranteed to not overlap with source object
+    disjoint_bases = is_alloc_tightly_coupled();
+
+    if (ary_src  == NULL || ary_src->klass()  == NULL ||
+        ary_dest == NULL || ary_dest->klass() == NULL) {
+      // We don't know if arguments are arrays
+      return false;
+    }
+
+    BasicType src_elem  = ary_src->klass()->as_array_klass()->element_type()->basic_type();
+    BasicType dest_elem = ary_dest->klass()->as_array_klass()->element_type()->basic_type();
+    if (src_elem  == T_ARRAY)  src_elem  = T_OBJECT;
+    if (dest_elem == T_ARRAY)  dest_elem = T_OBJECT;
+
+    if (src_elem != dest_elem || dest_elem == T_VOID) {
+      // We don't know if arguments are arrays of the same type
+      return false;
+    }
+
+    if (dest_elem == T_OBJECT && (!is_alloc_tightly_coupled() || !GraphKit::use_ReduceInitialCardMarks())) {
+      // It's an object array copy but we can't emit the card marking
+      // that is needed
+      return false;
+    }
+
+    value_type = ary_src->elem();
+
+    base_src = src;
+    base_dest = dest;
+
+    uint shift  = exact_log2(type2aelembytes(dest_elem));
+    uint header = arrayOopDesc::base_offset_in_bytes(dest_elem);
+
+    adr_src = src;
+    adr_dest = dest;
+
+    src_offset = Compile::conv_I2X_index(phase, src_offset, ary_src->size());
+    dest_offset = Compile::conv_I2X_index(phase, dest_offset, ary_dest->size());
+
+    Node* src_scale = phase->transform(new LShiftXNode(src_offset, phase->intcon(shift)));
+    Node* dest_scale = phase->transform(new LShiftXNode(dest_offset, phase->intcon(shift)));
+
+    adr_src = phase->transform(new AddPNode(base_src, adr_src, src_scale));
+    adr_dest = phase->transform(new AddPNode(base_dest, adr_dest, dest_scale));
+
+    adr_src = new AddPNode(base_src, adr_src, phase->MakeConX(header));
+    adr_dest = new AddPNode(base_dest, adr_dest, phase->MakeConX(header));
+
+    adr_src = phase->transform(adr_src);
+    adr_dest = phase->transform(adr_dest);
+
+    copy_type = dest_elem;
+  } else {
+    assert (is_clonebasic(), "should be");
+
+    disjoint_bases = true;
+    assert(src->is_AddP(), "should be base + off");
+    assert(dest->is_AddP(), "should be base + off");
+    adr_src = src;
+    base_src = src->in(AddPNode::Base);
+    adr_dest = dest;
+    base_dest = dest->in(AddPNode::Base);
+
+    assert(phase->type(src->in(AddPNode::Offset))->is_intptr_t()->get_con() == phase->type(dest->in(AddPNode::Offset))->is_intptr_t()->get_con(), "same start offset?");
+    BasicType elem = ary_src->klass()->as_array_klass()->element_type()->basic_type();
+    if (elem == T_ARRAY)  elem = T_OBJECT;
+
+    int diff = arrayOopDesc::base_offset_in_bytes(elem) - phase->type(src->in(AddPNode::Offset))->is_intptr_t()->get_con();
+    assert(diff >= 0, "clone should not start after 1st array element");
+    if (diff > 0) {
+      adr_src = phase->transform(new AddPNode(base_src, adr_src, phase->MakeConX(diff)));
+      adr_dest = phase->transform(new AddPNode(base_dest, adr_dest, phase->MakeConX(diff)));
+    }
+
+    copy_type = elem;
+    value_type = ary_src->elem();
+  }
+  return true;
+}
+
+const TypePtr* ArrayCopyNode::get_address_type(PhaseGVN *phase, Node* n) {
+  const Type* at = phase->type(n);
+  assert(at != Type::TOP, "unexpected type");
+  const TypePtr* atp = at->isa_ptr();
+  // adjust atp to be the correct array element address type
+  atp = atp->add_offset(Type::OffsetBot);
+  return atp;
+}
+
+void ArrayCopyNode::array_copy_test_overlap(PhaseGVN *phase, bool can_reshape, bool disjoint_bases, int count, Node*& forward_ctl, Node*& backward_ctl) {
+  Node* ctl = in(TypeFunc::Control);
+  if (!disjoint_bases && count > 1) {
+    Node* src_offset = in(ArrayCopyNode::SrcPos);
+    Node* dest_offset = in(ArrayCopyNode::DestPos);
+    assert(src_offset != NULL && dest_offset != NULL, "should be");
+    Node* cmp = phase->transform(new CmpINode(src_offset, dest_offset));
+    Node *bol = phase->transform(new BoolNode(cmp, BoolTest::lt));
+    IfNode *iff = new IfNode(ctl, bol, PROB_FAIR, COUNT_UNKNOWN);
+
+    phase->transform(iff);
+
+    forward_ctl = phase->transform(new IfFalseNode(iff));
+    backward_ctl = phase->transform(new IfTrueNode(iff));
+  } else {
+    forward_ctl = ctl;
+  }
+}
+
+Node* ArrayCopyNode::array_copy_forward(PhaseGVN *phase,
+                                        bool can_reshape,
+                                        Node* forward_ctl,
+                                        Node* start_mem_src,
+                                        Node* start_mem_dest,
+                                        const TypePtr* atp_src,
+                                        const TypePtr* atp_dest,
+                                        Node* adr_src,
+                                        Node* base_src,
+                                        Node* adr_dest,
+                                        Node* base_dest,
+                                        BasicType copy_type,
+                                        const Type* value_type,
+                                        int count) {
+  Node* mem = phase->C->top();
+  if (!forward_ctl->is_top()) {
+    // copy forward
+    mem = start_mem_dest;
+
+    if (count > 0) {
+      Node* v = LoadNode::make(*phase, forward_ctl, start_mem_src, adr_src, atp_src, value_type, copy_type, MemNode::unordered);
+      v = phase->transform(v);
+      mem = StoreNode::make(*phase, forward_ctl, mem, adr_dest, atp_dest, v, copy_type, MemNode::unordered);
+      mem = phase->transform(mem);
+      for (int i = 1; i < count; i++) {
+        Node* off  = phase->MakeConX(type2aelembytes(copy_type) * i);
+        Node* next_src = phase->transform(new AddPNode(base_src,adr_src,off));
+        Node* next_dest = phase->transform(new AddPNode(base_dest,adr_dest,off));
+        v = LoadNode::make(*phase, forward_ctl, mem, next_src, atp_src, value_type, copy_type, MemNode::unordered);
+        v = phase->transform(v);
+        mem = StoreNode::make(*phase, forward_ctl,mem,next_dest,atp_dest,v, copy_type, MemNode::unordered);
+        mem = phase->transform(mem);
+      }
+    } else if(can_reshape) {
+      PhaseIterGVN* igvn = phase->is_IterGVN();
+      igvn->_worklist.push(adr_src);
+      igvn->_worklist.push(adr_dest);
+    }
+  }
+  return mem;
+}
+
+Node* ArrayCopyNode::array_copy_backward(PhaseGVN *phase,
+                                         bool can_reshape,
+                                         Node* backward_ctl,
+                                         Node* start_mem_src,
+                                         Node* start_mem_dest,
+                                         const TypePtr* atp_src,
+                                         const TypePtr* atp_dest,
+                                         Node* adr_src,
+                                         Node* base_src,
+                                         Node* adr_dest,
+                                         Node* base_dest,
+                                         BasicType copy_type,
+                                         const Type* value_type,
+                                         int count) {
+  Node* mem = phase->C->top();
+  if (!backward_ctl->is_top()) {
+    // copy backward
+    mem = start_mem_dest;
+
+    if (count > 0) {
+      for (int i = count-1; i >= 1; i--) {
+        Node* off  = phase->MakeConX(type2aelembytes(copy_type) * i);
+        Node* next_src = phase->transform(new AddPNode(base_src,adr_src,off));
+        Node* next_dest = phase->transform(new AddPNode(base_dest,adr_dest,off));
+        Node* v = LoadNode::make(*phase, backward_ctl, mem, next_src, atp_src, value_type, copy_type, MemNode::unordered);
+        v = phase->transform(v);
+        mem = StoreNode::make(*phase, backward_ctl,mem,next_dest,atp_dest,v, copy_type, MemNode::unordered);
+        mem = phase->transform(mem);
+      }
+      Node* v = LoadNode::make(*phase, backward_ctl, mem, adr_src, atp_src, value_type, copy_type, MemNode::unordered);
+      v = phase->transform(v);
+      mem = StoreNode::make(*phase, backward_ctl, mem, adr_dest, atp_dest, v, copy_type, MemNode::unordered);
+      mem = phase->transform(mem);
+    } else if(can_reshape) {
+      PhaseIterGVN* igvn = phase->is_IterGVN();
+      igvn->_worklist.push(adr_src);
+      igvn->_worklist.push(adr_dest);
+    }
+  }
+  return mem;
+}
+
+bool ArrayCopyNode::finish_transform(PhaseGVN *phase, bool can_reshape,
+                                     Node* ctl, Node *mem) {
+  if (can_reshape) {
+    PhaseIterGVN* igvn = phase->is_IterGVN();
+    igvn->set_delay_transform(false);
+    if (is_clonebasic()) {
+      Node* out_mem = proj_out(TypeFunc::Memory);
+
+      if (out_mem->outcnt() != 1 || !out_mem->raw_out(0)->is_MergeMem() ||
+          out_mem->raw_out(0)->outcnt() != 1 || !out_mem->raw_out(0)->raw_out(0)->is_MemBar()) {
+        assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking");
+        return false;
+      }
+
+      igvn->replace_node(out_mem->raw_out(0), mem);
+
+      Node* out_ctl = proj_out(TypeFunc::Control);
+      igvn->replace_node(out_ctl, ctl);
+    } else {
+      // replace fallthrough projections of the ArrayCopyNode by the
+      // new memory, control and the input IO.
+      CallProjections callprojs;
+      extract_projections(&callprojs, true);
+
+      igvn->replace_node(callprojs.fallthrough_ioproj, in(TypeFunc::I_O));
+      igvn->replace_node(callprojs.fallthrough_memproj, mem);
+      igvn->replace_node(callprojs.fallthrough_catchproj, ctl);
+
+      // The ArrayCopyNode is not disconnected. It still has the
+      // projections for the exception case. Replace current
+      // ArrayCopyNode with a dummy new one with a top() control so
+      // that this part of the graph stays consistent but is
+      // eventually removed.
+
+      set_req(0, phase->C->top());
+      remove_dead_region(phase, can_reshape);
+    }
+  } else {
+    if (in(TypeFunc::Control) != ctl) {
+      // we can't return new memory and control from Ideal at parse time
+      assert(!is_clonebasic(), "added control for clone?");
+      return NULL;
+    }
+  }
+  return true;
+}
+
+
+Node *ArrayCopyNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (remove_dead_region(phase, can_reshape))  return this;
+
+  if (StressArrayCopyMacroNode && !can_reshape) {
+    phase->record_for_igvn(this);
+    return NULL;
+  }
+
+  // See if it's a small array copy and we can inline it as
+  // loads/stores
+  // Here we can only do:
+  // - arraycopy if all arguments were validated before and we don't
+  // need card marking
+  // - clone for which we don't need to do card marking
+
+  if (!is_clonebasic() && !is_arraycopy_validated() &&
+      !is_copyofrange_validated() && !is_copyof_validated()) {
+    return NULL;
+  }
+
+  if (in(TypeFunc::Control)->is_top() || in(TypeFunc::Memory)->is_top()) {
+    return NULL;
+  }
+
+  int count = get_count(phase);
+
+  if (count < 0 || count > ArrayCopyLoadStoreMaxElem) {
+    return NULL;
+  }
+
+  Node* mem = try_clone_instance(phase, can_reshape, count);
+  if (mem != NULL) {
+    return mem;
+  }
+
+  Node* adr_src = NULL;
+  Node* base_src = NULL;
+  Node* adr_dest = NULL;
+  Node* base_dest = NULL;
+  BasicType copy_type = T_ILLEGAL;
+  const Type* value_type = NULL;
+  bool disjoint_bases = false;
+
+  if (!prepare_array_copy(phase, can_reshape,
+                          adr_src, base_src, adr_dest, base_dest,
+                          copy_type, value_type, disjoint_bases)) {
+    return NULL;
+  }
+
+  Node* src = in(ArrayCopyNode::Src);
+  Node* dest = in(ArrayCopyNode::Dest);
+  const TypePtr* atp_src = get_address_type(phase, src);
+  const TypePtr* atp_dest = get_address_type(phase, dest);
+  uint alias_idx_src = phase->C->get_alias_index(atp_src);
+  uint alias_idx_dest = phase->C->get_alias_index(atp_dest);
+
+  Node *in_mem = in(TypeFunc::Memory);
+  Node *start_mem_src = in_mem;
+  Node *start_mem_dest = in_mem;
+  if (in_mem->is_MergeMem()) {
+    start_mem_src = in_mem->as_MergeMem()->memory_at(alias_idx_src);
+    start_mem_dest = in_mem->as_MergeMem()->memory_at(alias_idx_dest);
+  }
+
+
+  if (can_reshape) {
+    assert(!phase->is_IterGVN()->delay_transform(), "cannot delay transforms");
+    phase->is_IterGVN()->set_delay_transform(true);
+  }
+
+  Node* backward_ctl = phase->C->top();
+  Node* forward_ctl = phase->C->top();
+  array_copy_test_overlap(phase, can_reshape, disjoint_bases, count, forward_ctl, backward_ctl);
+
+  Node* forward_mem = array_copy_forward(phase, can_reshape, forward_ctl,
+                                         start_mem_src, start_mem_dest,
+                                         atp_src, atp_dest,
+                                         adr_src, base_src, adr_dest, base_dest,
+                                         copy_type, value_type, count);
+
+  Node* backward_mem = array_copy_backward(phase, can_reshape, backward_ctl,
+                                           start_mem_src, start_mem_dest,
+                                           atp_src, atp_dest,
+                                           adr_src, base_src, adr_dest, base_dest,
+                                           copy_type, value_type, count);
+
+  Node* ctl = NULL;
+  if (!forward_ctl->is_top() && !backward_ctl->is_top()) {
+    ctl = new RegionNode(3);
+    mem = new PhiNode(ctl, Type::MEMORY, atp_dest);
+    ctl->init_req(1, forward_ctl);
+    mem->init_req(1, forward_mem);
+    ctl->init_req(2, backward_ctl);
+    mem->init_req(2, backward_mem);
+    ctl = phase->transform(ctl);
+    mem = phase->transform(mem);
+  } else if (!forward_ctl->is_top()) {
+    ctl = forward_ctl;
+    mem = forward_mem;
+  } else {
+    assert(!backward_ctl->is_top(), "no copy?");
+    ctl = backward_ctl;
+    mem = backward_mem;
+  }
+
+  if (can_reshape) {
+    assert(phase->is_IterGVN()->delay_transform(), "should be delaying transforms");
+    phase->is_IterGVN()->set_delay_transform(false);
+  }
+
+  MergeMemNode* out_mem = MergeMemNode::make(in_mem);
+  out_mem->set_memory_at(alias_idx_dest, mem);
+  mem = out_mem;
+
+  if (!finish_transform(phase, can_reshape, ctl, mem)) {
+    return NULL;
+  }
+
+  return mem;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/opto/arraycopynode.hpp	Wed Feb 18 18:14:07 2015 +0100
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_OPTO_ARRAYCOPYNODE_HPP
+#define SHARE_VM_OPTO_ARRAYCOPYNODE_HPP
+
+#include "opto/callnode.hpp"
+
+class GraphKit;
+
+class ArrayCopyNode : public CallNode {
+private:
+
+  // What kind of arraycopy variant is this?
+  enum {
+    None,            // not set yet
+    ArrayCopy,       // System.arraycopy()
+    CloneBasic,      // A clone that can be copied by 64 bit chunks
+    CloneOop,        // An oop array clone
+    CopyOf,          // Arrays.copyOf()
+    CopyOfRange      // Arrays.copyOfRange()
+  } _kind;
+
+#ifndef PRODUCT
+  static const char* _kind_names[CopyOfRange+1];
+#endif
+  // Is the alloc obtained with
+  // AllocateArrayNode::Ideal_array_allocation() tighly coupled
+  // (arraycopy follows immediately the allocation)?
+  // We cache the result of LibraryCallKit::tightly_coupled_allocation
+  // here because it's much easier to find whether there's a tightly
+  // couple allocation at parse time than at macro expansion time. At
+  // macro expansion time, for every use of the allocation node we
+  // would need to figure out whether it happens after the arraycopy (and
+  // can be ignored) or between the allocation and the arraycopy. At
+  // parse time, it's straightforward because whatever happens after
+  // the arraycopy is not parsed yet so doesn't exist when
+  // LibraryCallKit::tightly_coupled_allocation() is called.
+  bool _alloc_tightly_coupled;
+
+  bool _arguments_validated;
+
+  static const TypeFunc* arraycopy_type() {
+    const Type** fields = TypeTuple::fields(ParmLimit - TypeFunc::Parms);
+    fields[Src]       = TypeInstPtr::BOTTOM;
+    fields[SrcPos]    = TypeInt::INT;
+    fields[Dest]      = TypeInstPtr::BOTTOM;
+    fields[DestPos]   = TypeInt::INT;
+    fields[Length]    = TypeInt::INT;
+    fields[SrcLen]    = TypeInt::INT;
+    fields[DestLen]   = TypeInt::INT;
+    fields[SrcKlass]  = TypeKlassPtr::BOTTOM;
+    fields[DestKlass] = TypeKlassPtr::BOTTOM;
+    const TypeTuple *domain = TypeTuple::make(ParmLimit, fields);
+
+    // create result type (range)
+    fields = TypeTuple::fields(0);
+
+    const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+    return TypeFunc::make(domain, range);
+  }
+
+  ArrayCopyNode(Compile* C, bool alloc_tightly_coupled);
+
+  intptr_t get_length_if_constant(PhaseGVN *phase) const;
+  int get_count(PhaseGVN *phase) const;
+  static const TypePtr* get_address_type(PhaseGVN *phase, Node* n);
+
+  Node* try_clone_instance(PhaseGVN *phase, bool can_reshape, int count);
+  Node* conv_I2X_offset(PhaseGVN *phase, Node* offset, const TypeAryPtr* ary_t);
+  bool prepare_array_copy(PhaseGVN *phase, bool can_reshape,
+                          Node*& adr_src, Node*& base_src, Node*& adr_dest, Node*& base_dest,
+                          BasicType& copy_type, const Type*& value_type, bool& disjoint_bases);
+  void array_copy_test_overlap(PhaseGVN *phase, bool can_reshape,
+                               bool disjoint_bases, int count,
+                               Node*& forward_ctl, Node*& backward_ctl);
+  Node* array_copy_forward(PhaseGVN *phase, bool can_reshape, Node* ctl,
+                           Node* start_mem_src, Node* start_mem_dest,
+                           const TypePtr* atp_src, const TypePtr* atp_dest,
+                           Node* adr_src, Node* base_src, Node* adr_dest, Node* base_dest,
+                           BasicType copy_type, const Type* value_type, int count);
+  Node* array_copy_backward(PhaseGVN *phase, bool can_reshape, Node* ctl,
+                            Node *start_mem_src, Node* start_mem_dest,
+                            const TypePtr* atp_src, const TypePtr* atp_dest,
+                            Node* adr_src, Node* base_src, Node* adr_dest, Node* base_dest,
+                            BasicType copy_type, const Type* value_type, int count);
+  bool finish_transform(PhaseGVN *phase, bool can_reshape,
+                        Node* ctl, Node *mem);
+
+public:
+
+  enum {
+    Src   = TypeFunc::Parms,
+    SrcPos,
+    Dest,
+    DestPos,
+    Length,
+    SrcLen,
+    DestLen,
+    SrcKlass,
+    DestKlass,
+    ParmLimit
+  };
+
+  static ArrayCopyNode* make(GraphKit* kit, bool may_throw,
+                             Node* src, Node* src_offset,
+                             Node* dest,  Node* dest_offset,
+                             Node* length,
+                             bool alloc_tightly_coupled,
+                             Node* src_klass = NULL, Node* dest_klass = NULL,
+                             Node* src_length = NULL, Node* dest_length = NULL);
+
+  void connect_outputs(GraphKit* kit);
+
+  bool is_arraycopy()             const  { assert(_kind != None, "should bet set"); return _kind == ArrayCopy; }
+  bool is_arraycopy_validated()   const  { assert(_kind != None, "should bet set"); return _kind == ArrayCopy && _arguments_validated; }
+  bool is_clonebasic()            const  { assert(_kind != None, "should bet set"); return _kind == CloneBasic; }
+  bool is_cloneoop()              const  { assert(_kind != None, "should bet set"); return _kind == CloneOop; }
+  bool is_copyof()                const  { assert(_kind != None, "should bet set"); return _kind == CopyOf; }
+  bool is_copyof_validated()      const  { assert(_kind != None, "should bet set"); return _kind == CopyOf && _arguments_validated; }
+  bool is_copyofrange()           const  { assert(_kind != None, "should bet set"); return _kind == CopyOfRange; }
+  bool is_copyofrange_validated() const  { assert(_kind != None, "should bet set"); return _kind == CopyOfRange && _arguments_validated; }
+
+  void set_arraycopy(bool validated)   { assert(_kind == None, "shouldn't bet set yet"); _kind = ArrayCopy; _arguments_validated = validated; }
+  void set_clonebasic()                { assert(_kind == None, "shouldn't bet set yet"); _kind = CloneBasic; }
+  void set_cloneoop()                  { assert(_kind == None, "shouldn't bet set yet"); _kind = CloneOop; }
+  void set_copyof(bool validated)      { assert(_kind == None, "shouldn't bet set yet"); _kind = CopyOf; _arguments_validated = validated; }
+  void set_copyofrange(bool validated) { assert(_kind == None, "shouldn't bet set yet"); _kind = CopyOfRange; _arguments_validated = validated; }
+
+  virtual int Opcode() const;
+  virtual uint size_of() const; // Size is bigger
+  virtual bool guaranteed_safepoint()  { return false; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  bool is_alloc_tightly_coupled() const { return _alloc_tightly_coupled; }
+
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+#endif // SHARE_VM_OPTO_ARRAYCOPYNODE_HPP
--- a/hotspot/src/share/vm/opto/callnode.cpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Wed Feb 18 18:14:07 2015 +0100
@@ -1875,194 +1875,3 @@
     log->tail(tag);
   }
 }
-
-ArrayCopyNode::ArrayCopyNode(Compile* C, bool alloc_tightly_coupled)
-  : CallNode(arraycopy_type(), NULL, TypeRawPtr::BOTTOM),
-    _alloc_tightly_coupled(alloc_tightly_coupled),
-    _kind(None),
-    _arguments_validated(false) {
-  init_class_id(Class_ArrayCopy);
-  init_flags(Flag_is_macro);
-  C->add_macro_node(this);
-}
-
-uint ArrayCopyNode::size_of() const { return sizeof(*this); }
-
-ArrayCopyNode* ArrayCopyNode::make(GraphKit* kit, bool may_throw,
-                                   Node* src, Node* src_offset,
-                                   Node* dest, Node* dest_offset,
-                                   Node* length,
-                                   bool alloc_tightly_coupled,
-                                   Node* src_klass, Node* dest_klass,
-                                   Node* src_length, Node* dest_length) {
-
-  ArrayCopyNode* ac = new ArrayCopyNode(kit->C, alloc_tightly_coupled);
-  Node* prev_mem = kit->set_predefined_input_for_runtime_call(ac);
-
-  ac->init_req(ArrayCopyNode::Src, src);
-  ac->init_req(ArrayCopyNode::SrcPos, src_offset);
-  ac->init_req(ArrayCopyNode::Dest, dest);
-  ac->init_req(ArrayCopyNode::DestPos, dest_offset);
-  ac->init_req(ArrayCopyNode::Length, length);
-  ac->init_req(ArrayCopyNode::SrcLen, src_length);
-  ac->init_req(ArrayCopyNode::DestLen, dest_length);
-  ac->init_req(ArrayCopyNode::SrcKlass, src_klass);
-  ac->init_req(ArrayCopyNode::DestKlass, dest_klass);
-
-  if (may_throw) {
-    ac->set_req(TypeFunc::I_O , kit->i_o());
-    kit->add_safepoint_edges(ac, false);
-  }
-
-  return ac;
-}
-
-void ArrayCopyNode::connect_outputs(GraphKit* kit) {
-  kit->set_all_memory_call(this, true);
-  kit->set_control(kit->gvn().transform(new ProjNode(this,TypeFunc::Control)));
-  kit->set_i_o(kit->gvn().transform(new ProjNode(this, TypeFunc::I_O)));
-  kit->make_slow_call_ex(this, kit->env()->Throwable_klass(), true);
-  kit->set_all_memory_call(this);
-}
-
-#ifndef PRODUCT
-const char* ArrayCopyNode::_kind_names[] = {"arraycopy", "arraycopy, validated arguments", "clone", "oop array clone", "CopyOf", "CopyOfRange"};
-void ArrayCopyNode::dump_spec(outputStream *st) const {
-  CallNode::dump_spec(st);
-  st->print(" (%s%s)", _kind_names[_kind], _alloc_tightly_coupled ? ", tightly coupled allocation" : "");
-}
-#endif
-
-int ArrayCopyNode::get_count(PhaseGVN *phase) const {
-  Node* src = in(ArrayCopyNode::Src);
-  const Type* src_type = phase->type(src);
-
-  assert(is_clonebasic(), "unexpected arraycopy type");
-  if (src_type->isa_instptr()) {
-    const TypeInstPtr* inst_src = src_type->is_instptr();
-    ciInstanceKlass* ik = inst_src->klass()->as_instance_klass();
-    // ciInstanceKlass::nof_nonstatic_fields() doesn't take injected
-    // fields into account. They are rare anyway so easier to simply
-    // skip instances with injected fields.
-    if ((!inst_src->klass_is_exact() && (ik->is_interface() || ik->has_subklass())) || ik->has_injected_fields()) {
-      return -1;
-    }
-    int nb_fields = ik->nof_nonstatic_fields();
-    return nb_fields;
-  }
-  return -1;
-}
-
-Node* ArrayCopyNode::try_clone_instance(PhaseGVN *phase, bool can_reshape, int count) {
-  assert(is_clonebasic(), "unexpected arraycopy type");
-
-  Node* src = in(ArrayCopyNode::Src);
-  Node* dest = in(ArrayCopyNode::Dest);
-  Node* ctl = in(TypeFunc::Control);
-  Node* in_mem = in(TypeFunc::Memory);
-
-  const Type* src_type = phase->type(src);
-  const Type* dest_type = phase->type(dest);
-
-  assert(src->is_AddP(), "should be base + off");
-  assert(dest->is_AddP(), "should be base + off");
-  Node* base_src = src->in(AddPNode::Base);
-  Node* base_dest = dest->in(AddPNode::Base);
-
-  MergeMemNode* mem = MergeMemNode::make(in_mem);
-
-  const TypeInstPtr* inst_src = src_type->is_instptr();
-
-  if (!inst_src->klass_is_exact()) {
-    ciInstanceKlass* ik = inst_src->klass()->as_instance_klass();
-    assert(!ik->is_interface() && !ik->has_subklass(), "inconsistent klass hierarchy");
-    phase->C->dependencies()->assert_leaf_type(ik);
-  }
-
-  ciInstanceKlass* ik = inst_src->klass()->as_instance_klass();
-  assert(ik->nof_nonstatic_fields() <= ArrayCopyLoadStoreMaxElem, "too many fields");
-
-  for (int i = 0; i < count; i++) {
-    ciField* field = ik->nonstatic_field_at(i);
-    int fieldidx = phase->C->alias_type(field)->index();
-    const TypePtr* adr_type = phase->C->alias_type(field)->adr_type();
-    Node* off = phase->MakeConX(field->offset());
-    Node* next_src = phase->transform(new AddPNode(base_src,base_src,off));
-    Node* next_dest = phase->transform(new AddPNode(base_dest,base_dest,off));
-    BasicType bt = field->layout_type();
-
-    const Type *type;
-    if (bt == T_OBJECT) {
-      if (!field->type()->is_loaded()) {
-        type = TypeInstPtr::BOTTOM;
-      } else {
-        ciType* field_klass = field->type();
-        type = TypeOopPtr::make_from_klass(field_klass->as_klass());
-      }
-    } else {
-      type = Type::get_const_basic_type(bt);
-    }
-
-    Node* v = LoadNode::make(*phase, ctl, mem->memory_at(fieldidx), next_src, adr_type, type, bt, MemNode::unordered);
-    v = phase->transform(v);
-    Node* s = StoreNode::make(*phase, ctl, mem->memory_at(fieldidx), next_dest, adr_type, v, bt, MemNode::unordered);
-    s = phase->transform(s);
-    mem->set_memory_at(fieldidx, s);
-  }
-
-  if (!finish_transform(phase, can_reshape, ctl, mem)) {
-    return NULL;
-  }
-
-  return mem;
-}
-
-bool ArrayCopyNode::finish_transform(PhaseGVN *phase, bool can_reshape,
-                                     Node* ctl, Node *mem) {
-  if (can_reshape) {
-    PhaseIterGVN* igvn = phase->is_IterGVN();
-    assert(is_clonebasic(), "unexpected arraycopy type");
-    Node* out_mem = proj_out(TypeFunc::Memory);
-
-    if (out_mem->outcnt() != 1 || !out_mem->raw_out(0)->is_MergeMem() ||
-        out_mem->raw_out(0)->outcnt() != 1 || !out_mem->raw_out(0)->raw_out(0)->is_MemBar()) {
-      assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking");
-      return false;
-    }
-
-    igvn->replace_node(out_mem->raw_out(0), mem);
-
-    Node* out_ctl = proj_out(TypeFunc::Control);
-    igvn->replace_node(out_ctl, ctl);
-  }
-  return true;
-}
-
-
-Node *ArrayCopyNode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  if (remove_dead_region(phase, can_reshape))  return this;
-
-  if (StressArrayCopyMacroNode && !can_reshape) return NULL;
-
-  // See if it's a small array copy and we can inline it as
-  // loads/stores
-  // Here we can only do:
-  // - clone for which we don't need to do card marking
-
-  if (!is_clonebasic()) {
-    return NULL;
-  }
-
-  if (in(TypeFunc::Control)->is_top() || in(TypeFunc::Memory)->is_top()) {
-    return NULL;
-  }
-
-  int count = get_count(phase);
-
-  if (count < 0 || count > ArrayCopyLoadStoreMaxElem) {
-    return NULL;
-  }
-
-  Node* mem = try_clone_instance(phase, can_reshape, count);
-  return mem;
-}
--- a/hotspot/src/share/vm/opto/callnode.hpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/callnode.hpp	Wed Feb 18 18:14:07 2015 +0100
@@ -1083,117 +1083,4 @@
 #endif
 };
 
-class GraphKit;
-
-class ArrayCopyNode : public CallNode {
-private:
-
-  // What kind of arraycopy variant is this?
-  enum {
-    None,            // not set yet
-    ArrayCopy,       // System.arraycopy()
-    CloneBasic,      // A clone that can be copied by 64 bit chunks
-    CloneOop,        // An oop array clone
-    CopyOf,          // Arrays.copyOf()
-    CopyOfRange      // Arrays.copyOfRange()
-  } _kind;
-
-#ifndef PRODUCT
-  static const char* _kind_names[CopyOfRange+1];
-#endif
-  // Is the alloc obtained with
-  // AllocateArrayNode::Ideal_array_allocation() tighly coupled
-  // (arraycopy follows immediately the allocation)?
-  // We cache the result of LibraryCallKit::tightly_coupled_allocation
-  // here because it's much easier to find whether there's a tightly
-  // couple allocation at parse time than at macro expansion time. At
-  // macro expansion time, for every use of the allocation node we
-  // would need to figure out whether it happens after the arraycopy (and
-  // can be ignored) or between the allocation and the arraycopy. At
-  // parse time, it's straightforward because whatever happens after
-  // the arraycopy is not parsed yet so doesn't exist when
-  // LibraryCallKit::tightly_coupled_allocation() is called.
-  bool _alloc_tightly_coupled;
-
-  bool _arguments_validated;
-
-  static const TypeFunc* arraycopy_type() {
-    const Type** fields = TypeTuple::fields(ParmLimit - TypeFunc::Parms);
-    fields[Src]       = TypeInstPtr::BOTTOM;
-    fields[SrcPos]    = TypeInt::INT;
-    fields[Dest]      = TypeInstPtr::BOTTOM;
-    fields[DestPos]   = TypeInt::INT;
-    fields[Length]    = TypeInt::INT;
-    fields[SrcLen]    = TypeInt::INT;
-    fields[DestLen]   = TypeInt::INT;
-    fields[SrcKlass]  = TypeKlassPtr::BOTTOM;
-    fields[DestKlass] = TypeKlassPtr::BOTTOM;
-    const TypeTuple *domain = TypeTuple::make(ParmLimit, fields);
-
-    // create result type (range)
-    fields = TypeTuple::fields(0);
-
-    const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
-
-    return TypeFunc::make(domain, range);
-  }
-
-  ArrayCopyNode(Compile* C, bool alloc_tightly_coupled);
-
-  int get_count(PhaseGVN *phase) const;
-  static const TypePtr* get_address_type(PhaseGVN *phase, Node* n);
-
-  Node* try_clone_instance(PhaseGVN *phase, bool can_reshape, int count);
-  bool finish_transform(PhaseGVN *phase, bool can_reshape,
-                        Node* ctl, Node *mem);
-
-public:
-
-  enum {
-    Src   = TypeFunc::Parms,
-    SrcPos,
-    Dest,
-    DestPos,
-    Length,
-    SrcLen,
-    DestLen,
-    SrcKlass,
-    DestKlass,
-    ParmLimit
-  };
-
-  static ArrayCopyNode* make(GraphKit* kit, bool may_throw,
-                             Node* src, Node* src_offset,
-                             Node* dest,  Node* dest_offset,
-                             Node* length,
-                             bool alloc_tightly_coupled,
-                             Node* src_klass = NULL, Node* dest_klass = NULL,
-                             Node* src_length = NULL, Node* dest_length = NULL);
-
-  void connect_outputs(GraphKit* kit);
-
-  bool is_arraycopy()             const  { assert(_kind != None, "should bet set"); return _kind == ArrayCopy; }
-  bool is_arraycopy_validated()   const  { assert(_kind != None, "should bet set"); return _kind == ArrayCopy && _arguments_validated; }
-  bool is_clonebasic()            const  { assert(_kind != None, "should bet set"); return _kind == CloneBasic; }
-  bool is_cloneoop()              const  { assert(_kind != None, "should bet set"); return _kind == CloneOop; }
-  bool is_copyof()                const  { assert(_kind != None, "should bet set"); return _kind == CopyOf; }
-  bool is_copyofrange()           const  { assert(_kind != None, "should bet set"); return _kind == CopyOfRange; }
-
-  void set_arraycopy(bool validated)   { assert(_kind == None, "shouldn't bet set yet"); _kind = ArrayCopy; _arguments_validated = validated; }
-  void set_clonebasic()                { assert(_kind == None, "shouldn't bet set yet"); _kind = CloneBasic; }
-  void set_cloneoop()                  { assert(_kind == None, "shouldn't bet set yet"); _kind = CloneOop; }
-  void set_copyof()                    { assert(_kind == None, "shouldn't bet set yet"); _kind = CopyOf; _arguments_validated = false; }
-  void set_copyofrange()               { assert(_kind == None, "shouldn't bet set yet"); _kind = CopyOfRange; _arguments_validated = false; }
-
-  virtual int Opcode() const;
-  virtual uint size_of() const; // Size is bigger
-  virtual bool guaranteed_safepoint()  { return false; }
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
-
-  bool is_alloc_tightly_coupled() const { return _alloc_tightly_coupled; }
-
-#ifndef PRODUCT
-  virtual void dump_spec(outputStream *st) const;
-#endif
-};
 #endif // SHARE_VM_OPTO_CALLNODE_HPP
--- a/hotspot/src/share/vm/opto/classes.cpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/classes.cpp	Wed Feb 18 18:14:07 2015 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "opto/addnode.hpp"
+#include "opto/arraycopynode.hpp"
 #include "opto/callnode.hpp"
 #include "opto/castnode.hpp"
 #include "opto/cfgnode.hpp"
--- a/hotspot/src/share/vm/opto/compile.cpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/compile.cpp	Wed Feb 18 18:14:07 2015 +0100
@@ -42,6 +42,7 @@
 #include "opto/chaitin.hpp"
 #include "opto/compile.hpp"
 #include "opto/connode.hpp"
+#include "opto/convertnode.hpp"
 #include "opto/divnode.hpp"
 #include "opto/escape.hpp"
 #include "opto/idealGraphPrinter.hpp"
@@ -3866,6 +3867,26 @@
   return SSC_full_test;
 }
 
+Node* Compile::conv_I2X_index(PhaseGVN *phase, Node* idx, const TypeInt* sizetype) {
+#ifdef _LP64
+  // The scaled index operand to AddP must be a clean 64-bit value.
+  // Java allows a 32-bit int to be incremented to a negative
+  // value, which appears in a 64-bit register as a large
+  // positive number.  Using that large positive number as an
+  // operand in pointer arithmetic has bad consequences.
+  // On the other hand, 32-bit overflow is rare, and the possibility
+  // can often be excluded, if we annotate the ConvI2L node with
+  // a type assertion that its value is known to be a small positive
+  // number.  (The prior range check has ensured this.)
+  // This assertion is used by ConvI2LNode::Ideal.
+  int index_max = max_jint - 1;  // array size is max_jint, index is one less
+  if (sizetype != NULL)  index_max = sizetype->_hi - 1;
+  const TypeLong* lidxtype = TypeLong::make(CONST64(0), index_max, Type::WidenMax);
+  idx = phase->transform(new ConvI2LNode(idx, lidxtype));
+#endif
+  return idx;
+}
+
 // The message about the current inlining is accumulated in
 // _print_inlining_stream and transfered into the _print_inlining_list
 // once we know whether inlining succeeds or not. For regular
--- a/hotspot/src/share/vm/opto/compile.hpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/compile.hpp	Wed Feb 18 18:14:07 2015 +0100
@@ -74,6 +74,7 @@
 class JVMState;
 class Type;
 class TypeData;
+class TypeInt;
 class TypePtr;
 class TypeOopPtr;
 class TypeFunc;
@@ -1221,6 +1222,8 @@
   enum { SSC_always_false, SSC_always_true, SSC_easy_test, SSC_full_test };
   int static_subtype_check(ciKlass* superk, ciKlass* subk);
 
+  static Node* conv_I2X_index(PhaseGVN *phase, Node* offset, const TypeInt* sizetype);
+
   // Auxiliary method for randomized fuzzing/stressing
   static bool randomized_select(int count);
 };
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Wed Feb 18 18:14:07 2015 +0100
@@ -1660,22 +1660,7 @@
 
   // must be correct type for alignment purposes
   Node* base  = basic_plus_adr(ary, header);
-#ifdef _LP64
-  // The scaled index operand to AddP must be a clean 64-bit value.
-  // Java allows a 32-bit int to be incremented to a negative
-  // value, which appears in a 64-bit register as a large
-  // positive number.  Using that large positive number as an
-  // operand in pointer arithmetic has bad consequences.
-  // On the other hand, 32-bit overflow is rare, and the possibility
-  // can often be excluded, if we annotate the ConvI2L node with
-  // a type assertion that its value is known to be a small positive
-  // number.  (The prior range check has ensured this.)
-  // This assertion is used by ConvI2LNode::Ideal.
-  int index_max = max_jint - 1;  // array size is max_jint, index is one less
-  if (sizetype != NULL)  index_max = sizetype->_hi - 1;
-  const TypeLong* lidxtype = TypeLong::make(CONST64(0), index_max, Type::WidenMax);
-  idx = _gvn.transform( new ConvI2LNode(idx, lidxtype) );
-#endif
+  idx = Compile::conv_I2X_index(&_gvn, idx, sizetype);
   Node* scale = _gvn.transform( new LShiftXNode(idx, intcon(shift)) );
   return basic_plus_adr(ary, base, scale);
 }
--- a/hotspot/src/share/vm/opto/library_call.cpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Wed Feb 18 18:14:07 2015 +0100
@@ -30,6 +30,7 @@
 #include "compiler/compileLog.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "opto/addnode.hpp"
+#include "opto/arraycopynode.hpp"
 #include "opto/callGenerator.hpp"
 #include "opto/castnode.hpp"
 #include "opto/cfgnode.hpp"
@@ -3876,18 +3877,57 @@
       // Extreme case:  Arrays.copyOf((Integer[])x, 10, String[].class).
       // This will fail a store-check if x contains any non-nulls.
 
-      Node* alloc = tightly_coupled_allocation(newcopy, NULL);
-
-      ArrayCopyNode* ac = ArrayCopyNode::make(this, true, original, start, newcopy, intcon(0), moved, alloc != NULL,
+      // ArrayCopyNode:Ideal may transform the ArrayCopyNode to
+      // loads/stores but it is legal only if we're sure the
+      // Arrays.copyOf would succeed. So we need all input arguments
+      // to the copyOf to be validated, including that the copy to the
+      // new array won't trigger an ArrayStoreException. That subtype
+      // check can be optimized if we know something on the type of
+      // the input array from type speculation.
+      if (_gvn.type(klass_node)->singleton()) {
+        ciKlass* subk   = _gvn.type(load_object_klass(original))->is_klassptr()->klass();
+        ciKlass* superk = _gvn.type(klass_node)->is_klassptr()->klass();
+
+        int test = C->static_subtype_check(superk, subk);
+        if (test != Compile::SSC_always_true && test != Compile::SSC_always_false) {
+          const TypeOopPtr* t_original = _gvn.type(original)->is_oopptr();
+          if (t_original->speculative_type() != NULL) {
+            original = maybe_cast_profiled_obj(original, t_original->speculative_type(), true);
+          }
+        }
+      }
+
+      bool validated = false;
+      // Reason_class_check rather than Reason_intrinsic because we
+      // want to intrinsify even if this traps.
+      if (!too_many_traps(Deoptimization::Reason_class_check)) {
+        Node* not_subtype_ctrl = gen_subtype_check(load_object_klass(original),
+                                                   klass_node);
+
+        if (not_subtype_ctrl != top()) {
+          PreserveJVMState pjvms(this);
+          set_control(not_subtype_ctrl);
+          uncommon_trap(Deoptimization::Reason_class_check,
+                        Deoptimization::Action_make_not_entrant);
+          assert(stopped(), "Should be stopped");
+        }
+        validated = true;
+      }
+
+      ArrayCopyNode* ac = ArrayCopyNode::make(this, true, original, start, newcopy, intcon(0), moved, true,
                                               load_object_klass(original), klass_node);
       if (!is_copyOfRange) {
-        ac->set_copyof();
+        ac->set_copyof(validated);
       } else {
-        ac->set_copyofrange();
+        ac->set_copyofrange(validated);
       }
       Node* n = _gvn.transform(ac);
-      assert(n == ac, "cannot disappear");
-      ac->connect_outputs(this);
+      if (n == ac) {
+        ac->connect_outputs(this);
+      } else {
+        assert(validated, "shouldn't transform if all arguments not validated");
+        set_all_memory(n);
+      }
     }
   } // original reexecute is set back here
 
--- a/hotspot/src/share/vm/opto/macroArrayCopy.cpp	Thu Feb 19 23:47:01 2015 +0300
+++ b/hotspot/src/share/vm/opto/macroArrayCopy.cpp	Wed Feb 18 18:14:07 2015 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "opto/arraycopynode.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "opto/convertnode.hpp"
 #include "opto/graphKit.hpp"
@@ -519,7 +520,7 @@
     // Test S[] against D[], not S against D, because (probably)
     // the secondary supertype cache is less busy for S[] than S.
     // This usually only matters when D is an interface.
-    Node* not_subtype_ctrl = ac->is_arraycopy_validated() ? top() :
+    Node* not_subtype_ctrl = (ac->is_arraycopy_validated() || ac->is_copyof_validated() || ac->is_copyofrange_validated()) ? top() :
       Phase::gen_subtype_check(src_klass, dest_klass, ctrl, mem, &_igvn);
     // Plug failing path into checked_oop_disjoint_arraycopy
     if (not_subtype_ctrl != top()) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/arraycopy/TestArrayCopyAsLoadsStores.java	Wed Feb 18 18:14:07 2015 +0100
@@ -0,0 +1,618 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6912521
+ * @summary small array copy as loads/stores
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:CompileCommand=dontinline,TestArrayCopyAsLoadsStores::m* -XX:TypeProfileLevel=200 TestArrayCopyAsLoadsStores
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:CompileCommand=dontinline,TestArrayCopyAsLoadsStores::m* -XX:+IgnoreUnrecognizedVMOptions -XX:+StressArrayCopyMacroNode -XX:TypeProfileLevel=200 TestArrayCopyAsLoadsStores
+ *
+ */
+
+import java.lang.annotation.*;
+import java.lang.reflect.*;
+import java.util.*;
+
+public class TestArrayCopyAsLoadsStores {
+
+    public enum ArraySrc {
+        SMALL,
+        LARGE,
+        ZERO
+    }
+
+    public enum ArrayDst {
+        NONE,
+        NEW,
+        SRC
+    }
+
+    static class A {
+    }
+
+    static class B extends A {
+    }
+
+    static final A[] small_a_src = new A[5];
+    static final A[] large_a_src = new A[10];
+    static final A[] zero_a_src = new A[0];
+    static final int[] small_int_src = new int[5];
+    static final int[] large_int_src = new int[10];
+    static final int[] zero_int_src = new int[0];
+    static final Object[] small_object_src = new Object[5];
+    static Object src;
+
+    @Retention(RetentionPolicy.RUNTIME)
+    @interface Args {
+        ArraySrc src();
+        ArrayDst dst() default ArrayDst.NONE;
+        int[] extra_args() default {};
+    }
+
+    // array clone should be compiled as loads/stores
+    @Args(src=ArraySrc.SMALL)
+    static A[] m1() throws CloneNotSupportedException {
+        return (A[])small_a_src.clone();
+    }
+
+    @Args(src=ArraySrc.SMALL)
+    static int[] m2() throws CloneNotSupportedException {
+        return (int[])small_int_src.clone();
+    }
+
+    // new array allocation should be optimized out
+    @Args(src=ArraySrc.SMALL)
+    static int m3() throws CloneNotSupportedException {
+        int[] array_clone = (int[])small_int_src.clone();
+        return array_clone[0] + array_clone[1] + array_clone[2] +
+            array_clone[3] + array_clone[4];
+    }
+
+    // should not be compiled as loads/stores
+    @Args(src=ArraySrc.LARGE)
+    static int[] m4() throws CloneNotSupportedException {
+        return (int[])large_int_src.clone();
+    }
+
+    // check that array of length 0 is handled correctly
+    @Args(src=ArraySrc.ZERO)
+    static int[] m5() throws CloneNotSupportedException {
+        return (int[])zero_int_src.clone();
+    }
+
+    // array copy should be compiled as loads/stores
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.NEW)
+    static void m6(int[] src, int[] dest) {
+        System.arraycopy(src, 0, dest, 0, 5);
+    }
+
+    // array copy should not be compiled as loads/stores
+    @Args(src=ArraySrc.LARGE, dst=ArrayDst.NEW)
+    static void m7(int[] src, int[] dest) {
+        System.arraycopy(src, 0, dest, 0, 10);
+    }
+
+    // array copy should be compiled as loads/stores
+    @Args(src=ArraySrc.SMALL)
+    static A[] m8(A[] src) {
+        src[0] = src[0]; // force null check
+        A[] dest = new A[5];
+        System.arraycopy(src, 0, dest, 0, 5);
+        return dest;
+    }
+
+    // array copy should not be compiled as loads/stores: we would
+    // need to emit GC barriers
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.NEW)
+    static void m9(A[] src, A[] dest) {
+        System.arraycopy(src, 0, dest, 0, 5);
+    }
+
+    // overlapping array regions: copy backward
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.SRC)
+    static void m10(int[] src, int[] dest) {
+        System.arraycopy(src, 0, dest, 1, 4);
+    }
+
+    static boolean m10_check(int[] src, int[] dest) {
+        boolean failure = false;
+        for (int i = 0; i < 5; i++) {
+            int j = Math.max(i - 1, 0);
+            if (dest[i] != src[j]) {
+                System.out.println("Test m10 failed for " + i + " src[" + j +"]=" + src[j] + ", dest[" + i + "]=" + dest[i]);
+                failure = true;
+            }
+        }
+        return failure;
+    }
+
+    // overlapping array regions: copy forward
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.SRC)
+    static void m11(int[] src, int[] dest) {
+        System.arraycopy(src, 1, dest, 0, 4);
+    }
+
+    static boolean m11_check(int[] src, int[] dest) {
+        boolean failure = false;
+        for (int i = 0; i < 5; i++) {
+            int j = Math.min(i + 1, 4);
+            if (dest[i] != src[j]) {
+                System.out.println("Test m11 failed for " + i + " src[" + j +"]=" + src[j] + ", dest[" + i + "]=" + dest[i]);
+                failure = true;
+            }
+        }
+        return failure;
+    }
+
+    // overlapping array region with unknown src/dest offsets: compiled code must include both forward and backward copies
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.SRC, extra_args={0,1})
+    static void m12(int[] src, int[] dest, int srcPos, int destPos) {
+        System.arraycopy(src, srcPos, dest, destPos, 4);
+    }
+
+    static boolean m12_check(int[] src, int[] dest) {
+        boolean failure = false;
+        for (int i = 0; i < 5; i++) {
+            int j = Math.max(i - 1, 0);
+            if (dest[i] != src[j]) {
+                System.out.println("Test m10 failed for " + i + " src[" + j +"]=" + src[j] + ", dest[" + i + "]=" + dest[i]);
+                failure = true;
+            }
+        }
+        return failure;
+    }
+
+    // Array allocation and copy should optimize out
+    @Args(src=ArraySrc.SMALL)
+    static int m13(int[] src) {
+        int[] dest = new int[5];
+        System.arraycopy(src, 0, dest, 0, 5);
+        return dest[0] + dest[1] + dest[2] + dest[3] + dest[4];
+    }
+
+    // Check that copy of length 0 is handled correctly
+    @Args(src=ArraySrc.ZERO, dst=ArrayDst.NEW)
+    static void m14(int[] src, int[] dest) {
+        System.arraycopy(src, 0, dest, 0, 0);
+    }
+
+    // copyOf should compile to loads/stores
+    @Args(src=ArraySrc.SMALL)
+    static A[] m15() {
+        return Arrays.copyOf(small_a_src, 5, A[].class);
+    }
+
+    static Object[] helper16(int i) {
+        Object[] arr = null;
+        if ((i%2) == 0) {
+            arr = small_a_src;
+        } else {
+            arr = small_object_src;
+        }
+        return arr;
+    }
+
+    // CopyOf may need subtype check
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.NONE, extra_args={0})
+    static A[] m16(A[] unused_src, int i) {
+        Object[] arr = helper16(i);
+        return Arrays.copyOf(arr, 5, A[].class);
+    }
+
+    static Object[] helper17_1(int i) {
+        Object[] arr = null;
+        if ((i%2) == 0) {
+            arr = small_a_src;
+        } else {
+            arr = small_object_src;
+        }
+        return arr;
+    }
+
+    static A[] helper17_2(Object[] arr) {
+        return Arrays.copyOf(arr, 5, A[].class);
+    }
+
+    // CopyOf may leverage type speculation
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.NONE, extra_args={0})
+    static A[] m17(A[] unused_src, int i) {
+        Object[] arr = helper17_1(i);
+        return helper17_2(arr);
+    }
+
+    static Object[] helper18_1(int i) {
+        Object[] arr = null;
+        if ((i%2) == 0) {
+            arr = small_a_src;
+        } else {
+            arr = small_object_src;
+        }
+        return arr;
+    }
+
+    static Object[] helper18_2(Object[] arr) {
+        return Arrays.copyOf(arr, 5, Object[].class);
+    }
+
+    // CopyOf should not attempt to use type speculation if it's not needed
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.NONE, extra_args={0})
+    static Object[] m18(A[] unused_src, int i) {
+        Object[] arr = helper18_1(i);
+        return helper18_2(arr);
+    }
+
+    static Object[] helper19(int i) {
+        Object[] arr = null;
+        if ((i%2) == 0) {
+            arr = small_a_src;
+        } else {
+            arr = small_object_src;
+        }
+        return arr;
+    }
+
+    // CopyOf may need subtype check. Test is run to make type check
+    // fail and cause deoptimization. Next compilation should not
+    // compile as loads/stores because the first compilation
+    // deoptimized.
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.NONE, extra_args={0})
+    static A[] m19(A[] unused_src, int i) {
+        Object[] arr = helper19(i);
+        return Arrays.copyOf(arr, 5, A[].class);
+    }
+
+    // copyOf for large array should not compile to loads/stores
+    @Args(src=ArraySrc.LARGE)
+    static A[] m20() {
+        return Arrays.copyOf(large_a_src, 10, A[].class);
+    }
+
+    // check zero length copyOf is handled correctly
+    @Args(src=ArraySrc.ZERO)
+    static A[] m21() {
+        return Arrays.copyOf(zero_a_src, 0, A[].class);
+    }
+
+    // Run with srcPos=0 for a 1st compile, then with incorrect value
+    // of srcPos to cause deoptimization, then with srcPos=0 for a 2nd
+    // compile. The 2nd compile shouldn't turn arraycopy into
+    // loads/stores because input arguments are no longer known to be
+    // valid.
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.NEW, extra_args={0})
+    static void m22(int[] src, int[] dest, int srcPos) {
+        System.arraycopy(src, srcPos, dest, 0, 5);
+    }
+
+    // copyOfRange should compile to loads/stores
+    @Args(src=ArraySrc.SMALL)
+    static A[] m23() {
+        return Arrays.copyOfRange(small_a_src, 1, 4, A[].class);
+    }
+
+    static boolean m23_check(A[] src, A[] dest) {
+        boolean failure = false;
+        for (int i = 0; i < 3; i++) {
+            if (src[i+1] != dest[i]) {
+                System.out.println("Test m23 failed for " + i + " src[" + (i+1) +"]=" + dest[i] + ", dest[" + i + "]=" + dest[i]);
+                failure = true;
+            }
+        }
+        return failure;
+    }
+
+    // array copy should be compiled as loads/stores. Invoke then with
+    // incompatible array type to verify we don't allow a forbidden
+    // arraycopy to happen.
+    @Args(src=ArraySrc.SMALL)
+    static A[] m24(Object[] src) {
+        src[0] = src[0]; // force null check
+        A[] dest = new A[5];
+        System.arraycopy(src, 0, dest, 0, 5);
+        return dest;
+    }
+
+    // overlapping array region with unknown src/dest offsets but
+    // length 1: compiled code doesn't need both forward and backward
+    // copies
+    @Args(src=ArraySrc.SMALL, dst=ArrayDst.SRC, extra_args={0,1})
+    static void m25(int[] src, int[] dest, int srcPos, int destPos) {
+        System.arraycopy(src, srcPos, dest, destPos, 1);
+    }
+
+    static boolean m25_check(int[] src, int[] dest) {
+        boolean failure = false;
+        if (dest[1] != src[0]) {
+            System.out.println("Test m10 failed for src[0]=" + src[0] + ", dest[1]=" + dest[1]);
+            return true;
+        }
+        return false;
+    }
+
+    final HashMap<String,Method> tests = new HashMap<>();
+    {
+        for (Method m : this.getClass().getDeclaredMethods()) {
+            if (m.getName().matches("m[0-9]+(_check)?")) {
+                assert(Modifier.isStatic(m.getModifiers())) : m;
+                tests.put(m.getName(), m);
+            }
+        }
+    }
+
+    boolean success = true;
+
+    void doTest(String name) throws Exception {
+        Method m = tests.get(name);
+        Method m_check = tests.get(name + "_check");
+        Class[] paramTypes = m.getParameterTypes();
+        Object[] params = new Object[paramTypes.length];
+        Class retType = m.getReturnType();
+        boolean isIntArray = (retType.isPrimitive() && !retType.equals(Void.TYPE)) ||
+            (retType.equals(Void.TYPE) && paramTypes[0].getComponentType().isPrimitive()) ||
+            (retType.isArray() && retType.getComponentType().isPrimitive());
+
+        Args args = m.getAnnotation(Args.class);
+
+        Object src = null;
+        switch(args.src()) {
+        case SMALL: {
+            if (isIntArray) {
+                src = small_int_src;
+            } else {
+                src = small_a_src;
+            }
+            break;
+        }
+        case LARGE: {
+            if (isIntArray) {
+                src = large_int_src;
+            } else {
+                src = large_a_src;
+            }
+            break;
+        }
+        case ZERO: {
+            assert isIntArray;
+            if (isIntArray) {
+                src = zero_int_src;
+            } else {
+                src = zero_a_src;
+            }
+            break;
+        }
+        }
+
+        for (int i = 0; i < 20000; i++) {
+            boolean failure = false;
+
+            int p = 0;
+
+            if (params.length > 0) {
+                if (isIntArray) {
+                    params[0] = ((int[])src).clone();
+                } else {
+                    params[0] = ((A[])src).clone();
+                }
+                p++;
+            }
+
+            if (params.length > 1) {
+                switch(args.dst()) {
+                case NEW: {
+                    if (isIntArray) {
+                        params[1] = new int[((int[])params[0]).length];
+                    } else {
+                        params[1] = new A[((A[])params[0]).length];
+                    }
+                    p++;
+                    break;
+                }
+                case SRC: {
+                    params[1] = params[0];
+                    p++;
+                    break;
+                }
+                case NONE: break;
+                }
+            }
+
+            for (int j = 0; j < args.extra_args().length; j++) {
+                params[p+j] = args.extra_args()[j];
+            }
+
+            Object res = m.invoke(null, params);
+
+            if (retType.isPrimitive() && !retType.equals(Void.TYPE)) {
+                int s = (int)res;
+                int sum = 0;
+                int[] int_res = (int[])src;
+                for (int j = 0; j < int_res.length; j++) {
+                    sum += int_res[j];
+                }
+                failure = (s != sum);
+                if (failure) {
+                    System.out.println("Test " + name + " failed: result = " + s + " != " + sum);
+                }
+            } else {
+                Object dest = null;
+                if (!retType.equals(Void.TYPE)) {
+                    dest = res;
+                } else {
+                    dest = params[1];
+                }
+
+                if (m_check != null) {
+                    failure = (boolean)m_check.invoke(null,  new Object[] { src, dest });
+                } else {
+                    if (isIntArray) {
+                        int[] int_res = (int[])src;
+                        int[] int_dest = (int[])dest;
+                        for (int j = 0; j < int_res.length; j++) {
+                            if (int_res[j] != int_dest[j]) {
+                                System.out.println("Test " + name + " failed for " + j + " src[" + j +"]=" + int_res[j] + ", dest[" + j + "]=" + int_dest[j]);
+                                failure = true;
+                            }
+                        }
+                    } else {
+                        Object[] object_res = (Object[])src;
+                        Object[] object_dest = (Object[])dest;
+                        for (int j = 0; j < object_res.length; j++) {
+                            if (object_res[j] != object_dest[j]) {
+                                System.out.println("Test " + name + " failed for " + j + " src[" + j +"]=" + object_res[j] + ", dest[" + j + "]=" + object_dest[j]);
+                                failure = true;
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (failure) {
+                success = false;
+                break;
+            }
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        for (int i = 0; i < small_a_src.length; i++) {
+            small_a_src[i] = new A();
+        }
+
+        for (int i = 0; i < small_int_src.length; i++) {
+            small_int_src[i] = i;
+        }
+
+        for (int i = 0; i < large_int_src.length; i++) {
+            large_int_src[i] = i;
+        }
+
+        for (int i = 0; i < 5; i++) {
+            small_object_src[i] = new Object();
+        }
+
+        TestArrayCopyAsLoadsStores test = new TestArrayCopyAsLoadsStores();
+
+        test.doTest("m1");
+        test.doTest("m2");
+        test.doTest("m3");
+        test.doTest("m4");
+        test.doTest("m5");
+        test.doTest("m6");
+        test.doTest("m7");
+        test.doTest("m8");
+        test.doTest("m9");
+        test.doTest("m10");
+        test.doTest("m11");
+        test.doTest("m12");
+        test.doTest("m13");
+        test.doTest("m14");
+        test.doTest("m15");
+
+        // make both branches of the If appear taken
+        for (int i = 0; i < 20000; i++) {
+            helper16(i);
+        }
+
+        test.doTest("m16");
+
+        // load class B so type check in m17 would not be simple comparison
+        B b = new B();
+        // make both branches of the If appear taken
+        for (int i = 0; i < 20000; i++) {
+            helper17_1(i);
+        }
+
+        test.doTest("m17");
+
+        // make both branches of the If appear taken
+        for (int i = 0; i < 20000; i++) {
+            helper18_1(i);
+        }
+        test.doTest("m18");
+
+        // make both branches of the If appear taken
+        for (int i = 0; i < 20000; i++) {
+            helper19(i);
+        }
+
+        // Compile
+        for (int i = 0; i < 20000; i++) {
+            m19(null, 0);
+        }
+
+        // force deopt
+        boolean m19_exception = false;
+        for (int i = 0; i < 10; i++) {
+            try {
+                m19(null, 1);
+            } catch(ArrayStoreException ase) {
+                m19_exception = true;
+            }
+        }
+
+        if (!m19_exception) {
+            System.out.println("Test m19: exception wasn't thrown");
+            test.success = false;
+        }
+
+        test.doTest("m19");
+
+        test.doTest("m20");
+        test.doTest("m21");
+
+        // Compile
+        int[] dst = new int[small_int_src.length];
+        for (int i = 0; i < 20000; i++) {
+            m22(small_int_src, dst, 0);
+        }
+
+        // force deopt
+        for (int i = 0; i < 10; i++) {
+            try {
+                m22(small_int_src, dst, 5);
+            } catch(ArrayIndexOutOfBoundsException aioobe) {}
+        }
+
+        test.doTest("m22");
+        test.doTest("m23");
+
+        test.doTest("m24");
+        boolean m24_exception = false;
+        try {
+            m24(small_object_src);
+        } catch(ArrayStoreException ase) {
+            m24_exception = true;
+        }
+
+        if (!m24_exception) {
+            System.out.println("Test m24: exception wasn't thrown");
+            test.success = false;
+        }
+
+        test.doTest("m25");
+
+        if (!test.success) {
+            throw new RuntimeException("some tests failed");
+        }
+    }
+}