jdk-sandbox: changeset 35085:839c8ba29724

--- a/hotspot/make/aix/Makefile	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/make/aix/Makefile	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 #
 # Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -61,10 +61,6 @@
     FORCE_TIERED=1
   endif
 endif
-# C1 is not ported on ppc64(le), so we cannot build a tiered VM:
-ifneq (,$(filter $(ARCH),ppc64 pp64le))
-  FORCE_TIERED=0
-endif
 
 ifdef LP64
   ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")

--- a/hotspot/make/aix/makefiles/fastdebug.make	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/make/aix/makefiles/fastdebug.make	Fri Dec 04 16:38:04 2015 +0100
@@ -68,5 +68,5 @@
 LFLAGS_QIPA=
 
 VERSION = optimized
-SYSDEFS += -DASSERT -DFASTDEBUG
+SYSDEFS += -DASSERT
 PICFLAGS = DEFAULT

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/make/aix/makefiles/tiered.make	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+#
+
+# Sets make macros for making tiered version of VM
+
+TYPE=TIERED
+
+VM_SUBDIR = server
+
+CFLAGS += -DCOMPILER2 -DCOMPILER1

--- a/hotspot/make/linux/Makefile	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/make/linux/Makefile	Fri Dec 04 16:38:04 2015 +0100
@@ -57,14 +57,6 @@
     FORCE_TIERED=1
   endif
 endif
-# C1 is not ported on ppc64, so we cannot build a tiered VM:
-# Notice: after 8046471 ARCH will be 'ppc' for top-level ppc64 builds but
-# 'ppc64' for HotSpot-only ppc64 builds. Need to detect both variants here!
-ifneq (,$(findstring $(ARCH), ppc ppc64))
-  ifeq ($(ARCH_DATA_MODEL), 64)
-    FORCE_TIERED=0
-  endif
-endif
 
 ifdef LP64
   ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")

--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -53,9 +53,6 @@
   return 0x00;                  // illegal instruction 0x00000000
 }
 
-void Assembler::print_instruction(int inst) {
-  Unimplemented();
-}
 
 // Patch instruction `inst' at offset `inst_pos' to refer to
 // `dest_pos' and return the resulting instruction.  We should have
@@ -484,7 +481,7 @@
       if (d != s) { mr(d, s); }
       return 0;
     }
-    if (return_simm16_rest) {
+    if (return_simm16_rest && (d == s)) {
       return xd;
     }
     addi(d, s, xd);

--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -31,10 +31,37 @@
 // Address is an abstraction used to represent a memory location
 // as used in assembler instructions.
 // PPC instructions grok either baseReg + indexReg or baseReg + disp.
-// So far we do not use this as simplification by this class is low
-// on PPC with its simple addressing mode. Use RegisterOrConstant to
-// represent an offset.
 class Address VALUE_OBJ_CLASS_SPEC {
+ private:
+  Register _base;         // Base register.
+  Register _index;        // Index register.
+  intptr_t _disp;         // Displacement.
+
+ public:
+  Address(Register b, Register i, address d = 0)
+    : _base(b), _index(i), _disp((intptr_t)d) {
+    assert(i == noreg || d == 0, "can't have both");
+  }
+
+  Address(Register b, address d = 0)
+    : _base(b), _index(noreg), _disp((intptr_t)d) {}
+
+  Address(Register b, intptr_t d)
+    : _base(b), _index(noreg), _disp(d) {}
+
+  Address(Register b, RegisterOrConstant roc)
+    : _base(b), _index(noreg), _disp(0) {
+    if (roc.is_constant()) _disp = roc.as_constant(); else _index = roc.as_register();
+  }
+
+  Address()
+    : _base(noreg), _index(noreg), _disp(0) {}
+
+  // accessors
+  Register base()  const { return _base; }
+  Register index() const { return _index; }
+  int      disp()  const { return (int)_disp; }
+  bool     is_const() const { return _base == noreg && _index == noreg; }
 };
 
 class AddressLiteral VALUE_OBJ_CLASS_SPEC {
@@ -164,10 +191,14 @@
 };
 #endif
 
+
+// The PPC Assembler: Pure assembler doing NO optimizations on the
+// instruction level; i.e., what you write is what you get. The
+// Assembler is generating code into a CodeBuffer.
+
 class Assembler : public AbstractAssembler {
  protected:
   // Displacement routines
-  static void print_instruction(int inst);
   static int  patched_branch(int dest_pos, int inst, int inst_pos);
   static int  branch_destination(int inst, int pos);
 
@@ -839,41 +870,38 @@
 
   enum Predict { pt = 1, pn = 0 }; // pt = predict taken
 
-  // instruction must start at passed address
+  // Instruction must start at passed address.
   static int instr_len(unsigned char *instr) { return BytesPerInstWord; }
 
-  // instruction must be left-justified in argument
-  static int instr_len(unsigned long instr)  { return BytesPerInstWord; }
-
   // longest instructions
   static int instr_maxlen() { return BytesPerInstWord; }
 
   // Test if x is within signed immediate range for nbits.
   static bool is_simm(int x, unsigned int nbits) {
     assert(0 < nbits && nbits < 32, "out of bounds");
-    const int   min      = -( ((int)1) << nbits-1 );
-    const int   maxplus1 =  ( ((int)1) << nbits-1 );
+    const int   min      = -(((int)1) << nbits-1);
+    const int   maxplus1 =  (((int)1) << nbits-1);
     return min <= x && x < maxplus1;
   }
 
   static bool is_simm(jlong x, unsigned int nbits) {
     assert(0 < nbits && nbits < 64, "out of bounds");
-    const jlong min      = -( ((jlong)1) << nbits-1 );
-    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
+    const jlong min      = -(((jlong)1) << nbits-1);
+    const jlong maxplus1 =  (((jlong)1) << nbits-1);
     return min <= x && x < maxplus1;
   }
 
-  // Test if x is within unsigned immediate range for nbits
+  // Test if x is within unsigned immediate range for nbits.
   static bool is_uimm(int x, unsigned int nbits) {
     assert(0 < nbits && nbits < 32, "out of bounds");
-    const int   maxplus1 = ( ((int)1) << nbits );
-    return 0 <= x && x < maxplus1;
+    const unsigned int maxplus1 = (((unsigned int)1) << nbits);
+    return (unsigned int)x < maxplus1;
   }
 
   static bool is_uimm(jlong x, unsigned int nbits) {
     assert(0 < nbits && nbits < 64, "out of bounds");
-    const jlong maxplus1 =  ( ((jlong)1) << nbits );
-    return 0 <= x && x < maxplus1;
+    const julong maxplus1 = (((julong)1) << nbits);
+    return (julong)x < maxplus1;
   }
 
  protected:
@@ -1376,8 +1404,11 @@
   inline void orc(    Register a, Register s, Register b);
   inline void orc_(   Register a, Register s, Register b);
   inline void extsb(  Register a, Register s);
+  inline void extsb_( Register a, Register s);
   inline void extsh(  Register a, Register s);
+  inline void extsh_( Register a, Register s);
   inline void extsw(  Register a, Register s);
+  inline void extsw_( Register a, Register s);
 
   // extended mnemonics
   inline void nop();
@@ -1767,6 +1798,8 @@
   inline void smt_yield();
   inline void smt_mdoio();
   inline void smt_mdoom();
+  // >= Power8
+  inline void smt_miso();
 
   // trap instructions
   inline void twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur)
@@ -2168,6 +2201,7 @@
   inline void load_const(Register d, void* a,           Register tmp = noreg);
   inline void load_const(Register d, Label& L,          Register tmp = noreg);
   inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
+  inline void load_const32(Register d, int i); // load signed int (patchable)
 
   // Load a 64 bit constant, optimized, not identifyable.
   // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a

--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -206,8 +206,11 @@
 inline void Assembler::orc(     Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(0)); }
 inline void Assembler::orc_(    Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::extsb(   Register a, Register s)                { emit_int32(EXTSB_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsb_(  Register a, Register s)                { emit_int32(EXTSB_OPCODE   | rta(a) | rs(s) | rc(1)); }
 inline void Assembler::extsh(   Register a, Register s)                { emit_int32(EXTSH_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsh_(  Register a, Register s)                { emit_int32(EXTSH_OPCODE   | rta(a) | rs(s) | rc(1)); }
 inline void Assembler::extsw(   Register a, Register s)                { emit_int32(EXTSW_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsw_(  Register a, Register s)                { emit_int32(EXTSW_OPCODE   | rta(a) | rs(s) | rc(1)); }
 
 // extended mnemonics
 inline void Assembler::nop()                              { Assembler::ori(R0, R0, 0); }
@@ -609,6 +612,8 @@
 inline void Assembler::smt_yield()            { Assembler::or_unchecked(R27, R27, R27); }
 inline void Assembler::smt_mdoio()            { Assembler::or_unchecked(R29, R29, R29); }
 inline void Assembler::smt_mdoom()            { Assembler::or_unchecked(R30, R30, R30); }
+// >= Power8
+inline void Assembler::smt_miso()             { Assembler::or_unchecked(R26, R26, R26); }
 
 inline void Assembler::twi_0(Register a)      { twi_unchecked(0, a, 0);}
 
@@ -967,12 +972,15 @@
 
 // Load a 64 bit constant encoded by an AddressLiteral. patchable.
 inline void Assembler::load_const(Register d, AddressLiteral& a, Register tmp) {
-  assert(d != R0, "R0 not allowed");
   // First relocate (we don't change the offset in the RelocationHolder,
   // just pass a.rspec()), then delegate to load_const(Register, long).
   relocate(a.rspec());
   load_const(d, (long)a.value(), tmp);
 }
 
+inline void Assembler::load_const32(Register d, int i) {
+  lis(d, i >> 16);
+  ori(d, d, i & 0xFFFF);
+}
 
 #endif // CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_CodeStubs_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_ppc.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_ppc.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#define __ ce->masm()->
+
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+                               bool throw_index_out_of_bounds_exception)
+  : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+  , _index(index) {
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  if (_info->deoptimize_on_exception()) {
+    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+    // May be used by optimizations like LoopInvariantCodeMotion or RangeCheckEliminator.
+    DEBUG_ONLY( __ untested("RangeCheckStub: predicate_failed_trap_id"); )
+    //__ load_const_optimized(R0, a);
+    __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+    __ mtctr(R0);
+    __ bctrl();
+    ce->add_call_info_here(_info);
+    ce->verify_oop_map(_info);
+    debug_only(__ illtrap());
+    return;
+  }
+
+  address stub = _throw_index_out_of_bounds_exception ? Runtime1::entry_for(Runtime1::throw_index_exception_id)
+                                                      : Runtime1::entry_for(Runtime1::throw_range_check_failed_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+
+  Register index = R0; // pass in R0
+  if (_index->is_register()) {
+    __ extsw(index, _index->as_register());
+  } else {
+    __ load_const_optimized(index, _index->as_jint());
+  }
+
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+  _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  // Parameter 1: bci
+  __ load_const_optimized(R0, _bci);
+  __ std(R0, -16, R1_SP);
+
+  // Parameter 2: Method*
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  AddressLiteral md = __ constant_metadata_address(m); // Notify OOP recorder (don't need the relocation).
+  __ load_const_optimized(R0, md.value());
+  __ std(R0, -8, R1_SP);
+
+  address a = Runtime1::entry_for(Runtime1::counter_overflow_id);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+
+  __ b(_continuation);
+}
+
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(Runtime1::throw_div0_exception_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  address a;
+  if (_info->deoptimize_on_exception()) {
+    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  } else {
+    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+  }
+
+  if (ImplicitNullChecks || TrapBasedNullChecks) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+// Implementation of SimpleExceptionStub
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(_stub);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  if (_obj->is_valid()) { __ mr_if_needed(/*tmp1 in do_CheckCast*/ R4_ARG2, _obj->as_register()); }
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  debug_only( __ illtrap(); )
+}
+
+
+// Implementation of NewInstanceStub
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id = stub_id;
+}
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(_stub_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of NewTypeArrayStub
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(Runtime1::new_type_array_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of NewObjectArrayStub
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(Runtime1::new_object_array_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of MonitorAccessStubs
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+  : MonitorAccessStub(obj_reg, lock_reg) {
+  _info = new CodeEmitInfo(info);
+}
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorenter_id : Runtime1::monitorenter_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mr_if_needed(/*scratch_opr()->as_register()*/ R4_ARG2, _obj_reg->as_register());
+  assert(_lock_reg->as_register() == R5_ARG3, "");
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_compute_lock) {
+    ce->monitor_address(_monitor_ix, _lock_reg);
+  }
+  address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorexit_id : Runtime1::monitorexit_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  assert(_lock_reg->as_register() == R4_ARG2, "");
+  __ mtctr(R0);
+  __ bctrl();
+  __ b(_continuation);
+}
+
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
+// - Replace original code with a call to the stub.
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (especially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+int PatchingStub::_patch_info_offset = -(5 * BytesPerInstWord);
+
+void PatchingStub::align_patch_site(MacroAssembler* ) {
+  // Patch sites on ppc are always properly aligned.
+}
+
+#ifdef ASSERT
+inline void compare_with_patch_site(address template_start, address pc_start, int bytes_to_copy) {
+  address start = template_start;
+  for (int i = 0; i < bytes_to_copy; i++) {
+    address ptr = (address)(pc_start + i);
+    int a_byte = (*ptr) & 0xFF;
+    assert(a_byte == *start++, "should be the same code");
+  }
+}
+#endif
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  // copy original code here
+  assert(NativeGeneralJump::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF,
+         "not enough room for call");
+  assert((_bytes_to_copy & 0x3) == 0, "must copy a multiple of four bytes");
+
+  Label call_patch;
+
+  int being_initialized_entry = __ offset();
+
+  if (_id == load_klass_id) {
+    // Produce a copy of the load klass instruction for use by the being initialized case.
+    AddressLiteral addrlit((address)NULL, metadata_Relocation::spec(_index));
+    __ load_const(_obj, addrlit, R0);
+    DEBUG_ONLY( compare_with_patch_site(__ code_section()->start() + being_initialized_entry, _pc_start, _bytes_to_copy); )
+  } else if (_id == load_mirror_id || _id == load_appendix_id) {
+    // Produce a copy of the load mirror instruction for use by the being initialized case.
+    AddressLiteral addrlit((address)NULL, oop_Relocation::spec(_index));
+    __ load_const(_obj, addrlit, R0);
+    DEBUG_ONLY( compare_with_patch_site(__ code_section()->start() + being_initialized_entry, _pc_start, _bytes_to_copy); )
+  } else {
+    // Make a copy the code which is going to be patched.
+    for (int i = 0; i < _bytes_to_copy; i++) {
+      address ptr = (address)(_pc_start + i);
+      int a_byte = (*ptr) & 0xFF;
+      __ emit_int8 (a_byte);
+    }
+  }
+
+  address end_of_patch = __ pc();
+  int bytes_to_skip = 0;
+  if (_id == load_mirror_id) {
+    int offset = __ offset();
+    __ block_comment(" being_initialized check");
+
+    // Static field accesses have special semantics while the class
+    // initializer is being run so we emit a test which can be used to
+    // check that this code is being executed by the initializing
+    // thread.
+    assert(_obj != noreg, "must be a valid register");
+    assert(_index >= 0, "must have oop index");
+    __ mr(R0, _obj); // spill
+    __ ld(_obj, java_lang_Class::klass_offset_in_bytes(), _obj);
+    __ ld(_obj, in_bytes(InstanceKlass::init_thread_offset()), _obj);
+    __ cmpd(CCR0, _obj, R16_thread);
+    __ mr(_obj, R0); // restore
+    __ bne(CCR0, call_patch);
+
+    // Load_klass patches may execute the patched code before it's
+    // copied back into place so we need to jump back into the main
+    // code of the nmethod to continue execution.
+    __ b(_patch_site_continuation);
+
+    // Make sure this extra code gets skipped.
+    bytes_to_skip += __ offset() - offset;
+  }
+
+  // Now emit the patch record telling the runtime how to find the
+  // pieces of the patch.  We only need 3 bytes but it has to be
+  // aligned as an instruction so emit 4 bytes.
+  int sizeof_patch_record = 4;
+  bytes_to_skip += sizeof_patch_record;
+
+  // Emit the offsets needed to find the code to patch.
+  int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
+
+  // Emit the patch record.  We need to emit a full word, so emit an extra empty byte.
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
+  address patch_info_pc = __ pc();
+  assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
+
+  address entry = __ pc();
+  NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
+  address target = NULL;
+  relocInfo::relocType reloc_type = relocInfo::none;
+  switch (_id) {
+    case access_field_id:  target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
+    case load_klass_id:    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+                           reloc_type = relocInfo::metadata_type; break;
+    case load_mirror_id:   target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+                           reloc_type = relocInfo::oop_type; break;
+    case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+                           reloc_type = relocInfo::oop_type; break;
+    default: ShouldNotReachHere();
+  }
+  __ bind(call_patch);
+
+  __ block_comment("patch entry point");
+  //__ load_const(R0, target); + mtctr + bctrl must have size -_patch_info_offset
+  __ load_const32(R0, MacroAssembler::offset_to_global_toc(target));
+  __ add(R0, R29_TOC, R0);
+  __ mtctr(R0);
+  __ bctrl();
+  assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
+  ce->add_call_info_here(_info);
+  __ b(_patch_site_entry);
+  if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) {
+    CodeSection* cs = __ code_section();
+    address pc = (address)_pc_start;
+    RelocIterator iter(cs, pc, pc + 1);
+    relocInfo::change_reloc_info_for_address(&iter, (address) pc, reloc_type, relocInfo::none);
+  }
+}
+
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(Runtime1::deoptimize_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+
+  __ load_const_optimized(R0, _trap_request); // Pass trap request in R0.
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  debug_only(__ illtrap());
+}
+
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  //---------------slow case: call to native-----------------
+  __ bind(_entry);
+  __ mr(R3_ARG1, src()->as_register());
+  __ extsw(R4_ARG2, src_pos()->as_register());
+  __ mr(R5_ARG3, dst()->as_register());
+  __ extsw(R6_ARG4, dst_pos()->as_register());
+  __ extsw(R7_ARG5, length()->as_register());
+
+  ce->emit_static_call_stub();
+
+  bool success = ce->emit_trampoline_stub_for_call(SharedRuntime::get_resolve_static_call_stub());
+  if (!success) { return; }
+
+  __ relocate(relocInfo::static_call_type);
+  // Note: At this point we do not have the address of the trampoline
+  // stub, and the entry point might be too far away for bl, so __ pc()
+  // serves as dummy and the bl will be patched later.
+  __ code()->set_insts_mark();
+  __ bl(__ pc());
+  ce->add_call_info_here(info());
+  ce->verify_oop_map(info());
+
+#ifndef PRODUCT
+  const address counter = (address)&Runtime1::_arraycopy_slowcase_cnt;
+  const Register tmp = R3, tmp2 = R4;
+  int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+  __ lwz(tmp2, simm16_offs, tmp);
+  __ addi(tmp2, tmp2, 1);
+  __ stw(tmp2, simm16_offs, tmp);
+#endif
+
+  __ b(_continuation);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
+  __ bind(_entry);
+
+  assert(pre_val()->is_register(), "Precondition.");
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+
+  __ cmpdi(CCR0, pre_val_reg, 0);
+  __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), _continuation);
+
+  address stub = Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ std(pre_val_reg, -8, R1_SP); // Pass pre_val on stack.
+  __ mtctr(R0);
+  __ bctrl();
+  __ b(_continuation);
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register addr_reg = addr()->as_pointer_register();
+  Register new_val_reg = new_val()->as_register();
+
+  __ cmpdi(CCR0, new_val_reg, 0);
+  __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), _continuation);
+
+  address stub = Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ mr(R0, addr_reg); // Pass addr in R0.
+  __ bctrl();
+  __ b(_continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+///////////////////////////////////////////////////////////////////////////////////
+
+#undef __

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_Defs_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_DEFS_PPC_HPP
+#define CPU_PPC_VM_C1_DEFS_PPC_HPP
+
+// Native word offsets from memory address.
+enum {
+#if defined(VM_LITTLE_ENDIAN)
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerInt
+#else
+  pd_lo_word_offset_in_bytes = BytesPerInt,
+  pd_hi_word_offset_in_bytes = 0
+#endif
+};
+
+
+// Explicit rounding operations are not required to implement the strictFP mode.
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = 32,              // Number of registers used during code emission.
+  pd_nof_caller_save_cpu_regs_frame_map = 27,  // Number of cpu registers killed by calls. (At least R3_ARG1 ... R10_ARG8, but using all like C2.)
+  pd_nof_cpu_regs_reg_alloc = 27,              // Number of registers that are visible to register allocator.
+  pd_nof_cpu_regs_linearscan = 32,             // Number of registers visible linear scan.
+  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
+  pd_last_callee_saved_reg = pd_nof_cpu_regs_reg_alloc - 1,
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
+
+  pd_nof_fpu_regs_frame_map = 32,              // Number of registers used during code emission.
+  pd_nof_caller_save_fpu_regs_frame_map = 32,  // Number of fpu registers killed by calls.
+  pd_nof_fpu_regs_reg_alloc = 32,              // Number of registers that are visible to register allocator.
+  pd_nof_fpu_regs_linearscan = 32,             // Number of registers visible to linear scan.
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_nof_cpu_regs_frame_map + pd_nof_fpu_regs_reg_alloc - 1,
+
+  pd_nof_xmm_regs_linearscan = 0,
+  pd_nof_caller_save_xmm_regs = 0,
+  pd_first_xmm_reg = -1,
+  pd_last_xmm_reg = -1
+};
+
+// For debug info: a float value in a register is saved in single precision by runtime stubs.
+enum {
+  pd_float_saved_as_double = true
+};
+
+#endif // CPU_PPC_VM_C1_DEFS_PPC_HPP

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_FpuStackSim_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
+#define CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
+
+// No FPU stack on PPC.
+class FpuStackSim;
+
+#endif // CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_ppc.inline.hpp"
+
+
+const int FrameMap::pd_c_runtime_reserved_arg_size = 7;
+
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool outgoing) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset.
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(SP_opr, st_off + STACK_BIAS, type));
+  } else if (r_1->is_Register()) {
+    Register reg = r_1->as_Register();
+    //if (outgoing) {
+    //  assert(!reg->is_in(), "should be using I regs");
+    //} else {
+    //  assert(!reg->is_out(), "should be using O regs");
+    //}
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      opr = as_long_opr(reg);
+    } else if (type == T_OBJECT || type == T_ARRAY) {
+      opr = as_oop_opr(reg);
+    } else {
+      opr = as_opr(reg);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    FloatRegister f = r_1->as_FloatRegister();
+    if (type == T_DOUBLE) {
+      opr = as_double_opr(f);
+    } else {
+      opr = as_float_opr(f);
+    }
+  }
+  return opr;
+}
+
+//               FrameMap
+//--------------------------------------------------------
+
+FloatRegister FrameMap::_fpu_regs [FrameMap::nof_fpu_regs];
+
+LIR_Opr  FrameMap::R0_opr;
+LIR_Opr  FrameMap::R1_opr;
+LIR_Opr  FrameMap::R2_opr;
+LIR_Opr  FrameMap::R3_opr;
+LIR_Opr  FrameMap::R4_opr;
+LIR_Opr  FrameMap::R5_opr;
+LIR_Opr  FrameMap::R6_opr;
+LIR_Opr  FrameMap::R7_opr;
+LIR_Opr  FrameMap::R8_opr;
+LIR_Opr  FrameMap::R9_opr;
+LIR_Opr FrameMap::R10_opr;
+LIR_Opr FrameMap::R11_opr;
+LIR_Opr FrameMap::R12_opr;
+LIR_Opr FrameMap::R13_opr;
+LIR_Opr FrameMap::R14_opr;
+LIR_Opr FrameMap::R15_opr;
+LIR_Opr FrameMap::R16_opr;
+LIR_Opr FrameMap::R17_opr;
+LIR_Opr FrameMap::R18_opr;
+LIR_Opr FrameMap::R19_opr;
+LIR_Opr FrameMap::R20_opr;
+LIR_Opr FrameMap::R21_opr;
+LIR_Opr FrameMap::R22_opr;
+LIR_Opr FrameMap::R23_opr;
+LIR_Opr FrameMap::R24_opr;
+LIR_Opr FrameMap::R25_opr;
+LIR_Opr FrameMap::R26_opr;
+LIR_Opr FrameMap::R27_opr;
+LIR_Opr FrameMap::R28_opr;
+LIR_Opr FrameMap::R29_opr;
+LIR_Opr FrameMap::R30_opr;
+LIR_Opr FrameMap::R31_opr;
+
+LIR_Opr  FrameMap::R0_oop_opr;
+//LIR_Opr  FrameMap::R1_oop_opr;
+LIR_Opr  FrameMap::R2_oop_opr;
+LIR_Opr  FrameMap::R3_oop_opr;
+LIR_Opr  FrameMap::R4_oop_opr;
+LIR_Opr  FrameMap::R5_oop_opr;
+LIR_Opr  FrameMap::R6_oop_opr;
+LIR_Opr  FrameMap::R7_oop_opr;
+LIR_Opr  FrameMap::R8_oop_opr;
+LIR_Opr  FrameMap::R9_oop_opr;
+LIR_Opr FrameMap::R10_oop_opr;
+LIR_Opr FrameMap::R11_oop_opr;
+LIR_Opr FrameMap::R12_oop_opr;
+//LIR_Opr FrameMap::R13_oop_opr;
+LIR_Opr FrameMap::R14_oop_opr;
+LIR_Opr FrameMap::R15_oop_opr;
+//LIR_Opr FrameMap::R16_oop_opr;
+LIR_Opr FrameMap::R17_oop_opr;
+LIR_Opr FrameMap::R18_oop_opr;
+LIR_Opr FrameMap::R19_oop_opr;
+LIR_Opr FrameMap::R20_oop_opr;
+LIR_Opr FrameMap::R21_oop_opr;
+LIR_Opr FrameMap::R22_oop_opr;
+LIR_Opr FrameMap::R23_oop_opr;
+LIR_Opr FrameMap::R24_oop_opr;
+LIR_Opr FrameMap::R25_oop_opr;
+LIR_Opr FrameMap::R26_oop_opr;
+LIR_Opr FrameMap::R27_oop_opr;
+LIR_Opr FrameMap::R28_oop_opr;
+//LIR_Opr FrameMap::R29_oop_opr;
+LIR_Opr FrameMap::R30_oop_opr;
+LIR_Opr FrameMap::R31_oop_opr;
+
+LIR_Opr  FrameMap::R0_metadata_opr;
+//LIR_Opr  FrameMap::R1_metadata_opr;
+LIR_Opr  FrameMap::R2_metadata_opr;
+LIR_Opr  FrameMap::R3_metadata_opr;
+LIR_Opr  FrameMap::R4_metadata_opr;
+LIR_Opr  FrameMap::R5_metadata_opr;
+LIR_Opr  FrameMap::R6_metadata_opr;
+LIR_Opr  FrameMap::R7_metadata_opr;
+LIR_Opr  FrameMap::R8_metadata_opr;
+LIR_Opr  FrameMap::R9_metadata_opr;
+LIR_Opr FrameMap::R10_metadata_opr;
+LIR_Opr FrameMap::R11_metadata_opr;
+LIR_Opr FrameMap::R12_metadata_opr;
+//LIR_Opr FrameMap::R13_metadata_opr;
+LIR_Opr FrameMap::R14_metadata_opr;
+LIR_Opr FrameMap::R15_metadata_opr;
+//LIR_Opr FrameMap::R16_metadata_opr;
+LIR_Opr FrameMap::R17_metadata_opr;
+LIR_Opr FrameMap::R18_metadata_opr;
+LIR_Opr FrameMap::R19_metadata_opr;
+LIR_Opr FrameMap::R20_metadata_opr;
+LIR_Opr FrameMap::R21_metadata_opr;
+LIR_Opr FrameMap::R22_metadata_opr;
+LIR_Opr FrameMap::R23_metadata_opr;
+LIR_Opr FrameMap::R24_metadata_opr;
+LIR_Opr FrameMap::R25_metadata_opr;
+LIR_Opr FrameMap::R26_metadata_opr;
+LIR_Opr FrameMap::R27_metadata_opr;
+LIR_Opr FrameMap::R28_metadata_opr;
+//LIR_Opr FrameMap::R29_metadata_opr;
+LIR_Opr FrameMap::R30_metadata_opr;
+LIR_Opr FrameMap::R31_metadata_opr;
+
+LIR_Opr FrameMap::SP_opr;
+
+LIR_Opr FrameMap::R0_long_opr;
+LIR_Opr FrameMap::R3_long_opr;
+
+LIR_Opr FrameMap::F1_opr;
+LIR_Opr FrameMap::F1_double_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+FloatRegister FrameMap::nr2floatreg (int rnr) {
+  assert(_init_done, "tables not initialized");
+  debug_only(fpu_range_check(rnr);)
+  return _fpu_regs[rnr];
+}
+
+
+// Returns true if reg could be smashed by a callee.
+bool FrameMap::is_caller_save_register (LIR_Opr reg) {
+  if (reg->is_single_fpu() || reg->is_double_fpu()) { return true; }
+  if (reg->is_double_cpu()) {
+    return is_caller_save_register(reg->as_register_lo()) ||
+           is_caller_save_register(reg->as_register_hi());
+  }
+  return is_caller_save_register(reg->as_register());
+}
+
+
+bool FrameMap::is_caller_save_register (Register r) {
+  // not visible to allocator: R0: scratch, R1: SP
+  // r->encoding() < 2 + nof_caller_save_cpu_regs();
+  return true; // Currently all regs are caller save.
+}
+
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+
+  int i = 0;
+
+  // Put generally available registers at the beginning (allocated, saved for GC).
+  for (int j = 0; j < nof_cpu_regs; ++j) {
+    Register rj = as_Register(j);
+    if (reg_needs_save(rj)) {
+      map_register(i++, rj);
+    }
+  }
+  assert(i == nof_cpu_regs_reg_alloc, "number of allocated registers");
+
+  // The following registers are not normally available.
+  for (int j = 0; j < nof_cpu_regs; ++j) {
+    Register rj = as_Register(j);
+    if (!reg_needs_save(rj)) {
+      map_register(i++, rj);
+    }
+  }
+  assert(i == nof_cpu_regs, "number of CPU registers");
+
+  for (i = 0; i < nof_fpu_regs; i++) {
+    _fpu_regs[i] = as_FloatRegister(i);
+  }
+
+  _init_done = true;
+
+  R0_opr  = as_opr(R0);
+  R1_opr  = as_opr(R1);
+  R2_opr  = as_opr(R2);
+  R3_opr  = as_opr(R3);
+  R4_opr  = as_opr(R4);
+  R5_opr  = as_opr(R5);
+  R6_opr  = as_opr(R6);
+  R7_opr  = as_opr(R7);
+  R8_opr  = as_opr(R8);
+  R9_opr  = as_opr(R9);
+  R10_opr = as_opr(R10);
+  R11_opr = as_opr(R11);
+  R12_opr = as_opr(R12);
+  R13_opr = as_opr(R13);
+  R14_opr = as_opr(R14);
+  R15_opr = as_opr(R15);
+  R16_opr = as_opr(R16);
+  R17_opr = as_opr(R17);
+  R18_opr = as_opr(R18);
+  R19_opr = as_opr(R19);
+  R20_opr = as_opr(R20);
+  R21_opr = as_opr(R21);
+  R22_opr = as_opr(R22);
+  R23_opr = as_opr(R23);
+  R24_opr = as_opr(R24);
+  R25_opr = as_opr(R25);
+  R26_opr = as_opr(R26);
+  R27_opr = as_opr(R27);
+  R28_opr = as_opr(R28);
+  R29_opr = as_opr(R29);
+  R30_opr = as_opr(R30);
+  R31_opr = as_opr(R31);
+
+  R0_oop_opr  = as_oop_opr(R0);
+  //R1_oop_opr  = as_oop_opr(R1);
+  R2_oop_opr  = as_oop_opr(R2);
+  R3_oop_opr  = as_oop_opr(R3);
+  R4_oop_opr  = as_oop_opr(R4);
+  R5_oop_opr  = as_oop_opr(R5);
+  R6_oop_opr  = as_oop_opr(R6);
+  R7_oop_opr  = as_oop_opr(R7);
+  R8_oop_opr  = as_oop_opr(R8);
+  R9_oop_opr  = as_oop_opr(R9);
+  R10_oop_opr = as_oop_opr(R10);
+  R11_oop_opr = as_oop_opr(R11);
+  R12_oop_opr = as_oop_opr(R12);
+  //R13_oop_opr = as_oop_opr(R13);
+  R14_oop_opr = as_oop_opr(R14);
+  R15_oop_opr = as_oop_opr(R15);
+  //R16_oop_opr = as_oop_opr(R16);
+  R17_oop_opr = as_oop_opr(R17);
+  R18_oop_opr = as_oop_opr(R18);
+  R19_oop_opr = as_oop_opr(R19);
+  R20_oop_opr = as_oop_opr(R20);
+  R21_oop_opr = as_oop_opr(R21);
+  R22_oop_opr = as_oop_opr(R22);
+  R23_oop_opr = as_oop_opr(R23);
+  R24_oop_opr = as_oop_opr(R24);
+  R25_oop_opr = as_oop_opr(R25);
+  R26_oop_opr = as_oop_opr(R26);
+  R27_oop_opr = as_oop_opr(R27);
+  R28_oop_opr = as_oop_opr(R28);
+  //R29_oop_opr = as_oop_opr(R29);
+  R30_oop_opr = as_oop_opr(R30);
+  R31_oop_opr = as_oop_opr(R31);
+
+  R0_metadata_opr  = as_metadata_opr(R0);
+  //R1_metadata_opr  = as_metadata_opr(R1);
+  R2_metadata_opr  = as_metadata_opr(R2);
+  R3_metadata_opr  = as_metadata_opr(R3);
+  R4_metadata_opr  = as_metadata_opr(R4);
+  R5_metadata_opr  = as_metadata_opr(R5);
+  R6_metadata_opr  = as_metadata_opr(R6);
+  R7_metadata_opr  = as_metadata_opr(R7);
+  R8_metadata_opr  = as_metadata_opr(R8);
+  R9_metadata_opr  = as_metadata_opr(R9);
+  R10_metadata_opr = as_metadata_opr(R10);
+  R11_metadata_opr = as_metadata_opr(R11);
+  R12_metadata_opr = as_metadata_opr(R12);
+  //R13_metadata_opr = as_metadata_opr(R13);
+  R14_metadata_opr = as_metadata_opr(R14);
+  R15_metadata_opr = as_metadata_opr(R15);
+  //R16_metadata_opr = as_metadata_opr(R16);
+  R17_metadata_opr = as_metadata_opr(R17);
+  R18_metadata_opr = as_metadata_opr(R18);
+  R19_metadata_opr = as_metadata_opr(R19);
+  R20_metadata_opr = as_metadata_opr(R20);
+  R21_metadata_opr = as_metadata_opr(R21);
+  R22_metadata_opr = as_metadata_opr(R22);
+  R23_metadata_opr = as_metadata_opr(R23);
+  R24_metadata_opr = as_metadata_opr(R24);
+  R25_metadata_opr = as_metadata_opr(R25);
+  R26_metadata_opr = as_metadata_opr(R26);
+  R27_metadata_opr = as_metadata_opr(R27);
+  R28_metadata_opr = as_metadata_opr(R28);
+  //R29_metadata_opr = as_metadata_opr(R29);
+  R30_metadata_opr = as_metadata_opr(R30);
+  R31_metadata_opr = as_metadata_opr(R31);
+
+  SP_opr = as_pointer_opr(R1_SP);
+
+  R0_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(R0), cpu_reg2rnr(R0));
+  R3_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(R3), cpu_reg2rnr(R3));
+
+  F1_opr = as_float_opr(F1);
+  F1_double_opr = as_double_opr(F1);
+
+  // All the allocated cpu regs are caller saved.
+  for (int i = 0; i < max_nof_caller_save_cpu_regs; i++) {
+    _caller_save_cpu_regs[i] = LIR_OprFact::single_cpu(i);
+  }
+
+  // All the fpu regs are caller saved.
+  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  return Address(R1_SP, STACK_BIAS + in_bytes(sp_offset));
+}
+
+
+VMReg FrameMap::fpu_regname (int n) {
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+
+LIR_Opr FrameMap::stack_pointer() {
+  return SP_opr;
+}
+
+
+// JSR 292
+// On PPC64, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  return LIR_OprFact::illegalOpr;
+}
+
+
+bool FrameMap::validate_frame() {
+  int max_offset = in_bytes(framesize_in_bytes());
+  int java_index = 0;
+  for (int i = 0; i < _incoming_arguments->length(); i++) {
+    LIR_Opr opr = _incoming_arguments->at(i);
+    if (opr->is_stack()) {
+      max_offset = MAX2(_argument_locations->at(java_index), max_offset);
+    }
+    java_index += type2size[opr->type()];
+  }
+  return Assembler::is_simm16(max_offset + STACK_BIAS);
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
+#define CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
+
+ public:
+
+  enum {
+    nof_reg_args = 8,   // Registers R3-R10 are available for parameter passing.
+    first_available_sp_in_frame = frame::jit_out_preserve_size,
+    frame_pad_in_bytes = 0
+  };
+
+  static const int pd_c_runtime_reserved_arg_size;
+
+  static LIR_Opr  R0_opr;
+  static LIR_Opr  R1_opr;
+  static LIR_Opr  R2_opr;
+  static LIR_Opr  R3_opr;
+  static LIR_Opr  R4_opr;
+  static LIR_Opr  R5_opr;
+  static LIR_Opr  R6_opr;
+  static LIR_Opr  R7_opr;
+  static LIR_Opr  R8_opr;
+  static LIR_Opr  R9_opr;
+  static LIR_Opr R10_opr;
+  static LIR_Opr R11_opr;
+  static LIR_Opr R12_opr;
+  static LIR_Opr R13_opr;
+  static LIR_Opr R14_opr;
+  static LIR_Opr R15_opr;
+  static LIR_Opr R16_opr;
+  static LIR_Opr R17_opr;
+  static LIR_Opr R18_opr;
+  static LIR_Opr R19_opr;
+  static LIR_Opr R20_opr;
+  static LIR_Opr R21_opr;
+  static LIR_Opr R22_opr;
+  static LIR_Opr R23_opr;
+  static LIR_Opr R24_opr;
+  static LIR_Opr R25_opr;
+  static LIR_Opr R26_opr;
+  static LIR_Opr R27_opr;
+  static LIR_Opr R28_opr;
+  static LIR_Opr R29_opr;
+  static LIR_Opr R30_opr;
+  static LIR_Opr R31_opr;
+
+  static LIR_Opr  R0_oop_opr;
+  //R1: Stack pointer. Not an oop.
+  static LIR_Opr  R2_oop_opr;
+  static LIR_Opr  R3_oop_opr;
+  static LIR_Opr  R4_oop_opr;
+  static LIR_Opr  R5_oop_opr;
+  static LIR_Opr  R6_oop_opr;
+  static LIR_Opr  R7_oop_opr;
+  static LIR_Opr  R8_oop_opr;
+  static LIR_Opr  R9_oop_opr;
+  static LIR_Opr R10_oop_opr;
+  static LIR_Opr R11_oop_opr;
+  static LIR_Opr R12_oop_opr;
+  //R13: System thread register. Not usable.
+  static LIR_Opr R14_oop_opr;
+  static LIR_Opr R15_oop_opr;
+  //R16: Java thread register. Not an oop.
+  static LIR_Opr R17_oop_opr;
+  static LIR_Opr R18_oop_opr;
+  static LIR_Opr R19_oop_opr;
+  static LIR_Opr R20_oop_opr;
+  static LIR_Opr R21_oop_opr;
+  static LIR_Opr R22_oop_opr;
+  static LIR_Opr R23_oop_opr;
+  static LIR_Opr R24_oop_opr;
+  static LIR_Opr R25_oop_opr;
+  static LIR_Opr R26_oop_opr;
+  static LIR_Opr R27_oop_opr;
+  static LIR_Opr R28_oop_opr;
+  static LIR_Opr R29_oop_opr;
+  //R29: TOC register. Not an oop.
+  static LIR_Opr R30_oop_opr;
+  static LIR_Opr R31_oop_opr;
+
+  static LIR_Opr  R0_metadata_opr;
+  //R1: Stack pointer. Not metadata.
+  static LIR_Opr  R2_metadata_opr;
+  static LIR_Opr  R3_metadata_opr;
+  static LIR_Opr  R4_metadata_opr;
+  static LIR_Opr  R5_metadata_opr;
+  static LIR_Opr  R6_metadata_opr;
+  static LIR_Opr  R7_metadata_opr;
+  static LIR_Opr  R8_metadata_opr;
+  static LIR_Opr  R9_metadata_opr;
+  static LIR_Opr R10_metadata_opr;
+  static LIR_Opr R11_metadata_opr;
+  static LIR_Opr R12_metadata_opr;
+  //R13: System thread register. Not usable.
+  static LIR_Opr R14_metadata_opr;
+  static LIR_Opr R15_metadata_opr;
+  //R16: Java thread register. Not metadata.
+  static LIR_Opr R17_metadata_opr;
+  static LIR_Opr R18_metadata_opr;
+  static LIR_Opr R19_metadata_opr;
+  static LIR_Opr R20_metadata_opr;
+  static LIR_Opr R21_metadata_opr;
+  static LIR_Opr R22_metadata_opr;
+  static LIR_Opr R23_metadata_opr;
+  static LIR_Opr R24_metadata_opr;
+  static LIR_Opr R25_metadata_opr;
+  static LIR_Opr R26_metadata_opr;
+  static LIR_Opr R27_metadata_opr;
+  static LIR_Opr R28_metadata_opr;
+  //R29: TOC register. Not metadata.
+  static LIR_Opr R30_metadata_opr;
+  static LIR_Opr R31_metadata_opr;
+
+  static LIR_Opr SP_opr;
+
+  static LIR_Opr R0_long_opr;
+  static LIR_Opr R3_long_opr;
+
+  static LIR_Opr F1_opr;
+  static LIR_Opr F1_double_opr;
+
+ private:
+  static FloatRegister  _fpu_regs [nof_fpu_regs];
+
+  static LIR_Opr as_long_single_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_long_pair_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r->successor()), cpu_reg2rnr(r));
+  }
+
+ public:
+
+#ifdef _LP64
+  static LIR_Opr as_long_opr(Register r) {
+    return as_long_single_opr(r);
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return as_long_single_opr(r);
+  }
+#else
+  static LIR_Opr as_long_opr(Register r) {
+    Unimplemented(); return 0;
+//    return as_long_pair_opr(r);
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    Unimplemented(); return 0;
+//    return as_opr(r);
+  }
+#endif
+  static LIR_Opr as_float_opr(FloatRegister r) {
+    return LIR_OprFact::single_fpu(r->encoding());
+  }
+  static LIR_Opr as_double_opr(FloatRegister r) {
+    return LIR_OprFact::double_fpu(r->encoding());
+  }
+
+  static FloatRegister nr2floatreg (int rnr);
+
+  static VMReg fpu_regname (int n);
+
+  static bool is_caller_save_register(LIR_Opr  reg);
+  static bool is_caller_save_register(Register r);
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg()             { return pd_last_cpu_reg; }
+
+  // Registers which need to be saved in the frames (e.g. for GC).
+  // Register usage:
+  //  R0: scratch
+  //  R1: sp
+  // R13: system thread id
+  // R16: java thread
+  // R29: global TOC
+  static bool reg_needs_save(Register r) { return r != R0 && r != R1 && r != R13 && r != R16 && r != R29; }
+
+#endif // CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,3133 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "nativeInst_ppc.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#define __ _masm->
+
+
+const ConditionRegister LIR_Assembler::BOOL_RESULT = CCR5;
+
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
+  Unimplemented(); return false; // Currently not used on this platform.
+}
+
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+  return FrameMap::R3_oop_opr;
+}
+
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+  return FrameMap::R3_opr;
+}
+
+
+// This specifies the stack pointer decrement needed to build the frame.
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+  return in_bytes(frame_map()->framesize_in_bytes());
+}
+
+
+// Inline cache check: the inline cached class is in inline_cache_reg;
+// we fetch the class of the receiver and compare it with the cached class.
+// If they do not match we jump to slow case.
+int LIR_Assembler::check_icache() {
+  int offset = __ offset();
+  __ inline_cache_check(R3_ARG1, R19_inline_cache_reg);
+  return offset;
+}
+
+
+void LIR_Assembler::osr_entry() {
+  // On-stack-replacement entry sequence:
+  //
+  //   1. Create a new compiled activation.
+  //   2. Initialize local variables in the compiled activation. The expression
+  //      stack must be empty at the osr_bci; it is not initialized.
+  //   3. Jump to the continuation address in compiled code to resume execution.
+
+  // OSR entry point
+  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+  ValueStack* entry_state = osr_entry->end()->state();
+  int number_of_locks = entry_state->locks_size();
+
+  // Create a frame for the compiled activation.
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[number_of_locks-1..0]
+  //
+  // Locals is a direct copy of the interpreter frame so in the osr buffer
+  // the first slot in the local array is the last local from the interpreter
+  // and the last slot is local[0] (receiver) from the interpreter.
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method).
+
+  // Initialize monitors in the compiled activation.
+  //   R3: pointer to osr buffer
+  //
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_register();
+  { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+    int monitor_offset = BytesPerWord * method()->max_locals() +
+      (2 * BytesPerWord) * (number_of_locks - 1);
+    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+    // the OSR buffer using 2 word entries: first the lock and then
+    // the oop.
+    for (int i = 0; i < number_of_locks; i++) {
+      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+#ifdef ASSERT
+      // Verify the interpreter's monitor has a non-null object.
+      {
+        Label L;
+        __ ld(R0, slot_offset + 1*BytesPerWord, OSR_buf);
+        __ cmpdi(CCR0, R0, 0);
+        __ bne(CCR0, L);
+        __ stop("locked object is NULL");
+        __ bind(L);
+      }
+#endif // ASSERT
+      // Copy the lock field into the compiled activation.
+      Address ml = frame_map()->address_for_monitor_lock(i),
+              mo = frame_map()->address_for_monitor_object(i);
+      assert(ml.index() == noreg && mo.index() == noreg, "sanity");
+      __ ld(R0, slot_offset + 0, OSR_buf);
+      __ std(R0, ml.disp(), ml.base());
+      __ ld(R0, slot_offset + 1*BytesPerWord, OSR_buf);
+      __ std(R0, mo.disp(), mo.base());
+    }
+  }
+}
+
+
+int LIR_Assembler::emit_exception_handler() {
+  // If the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri).
+  __ nop();
+
+  // Generate code for the exception handler.
+  address handler_base = __ start_a_stub(exception_handler_size);
+
+  if (handler_base == NULL) {
+    // Not enough space left for the handler.
+    bailout("exception handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+  address entry_point = CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::handle_exception_from_callee_id));
+  //__ load_const_optimized(R0, entry_point);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry_point));
+  __ mtctr(R0);
+  __ bctr();
+
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+  _masm->block_comment("Unwind handler");
+
+  int offset = code_offset();
+  bool preserve_exception = method()->is_synchronized() || compilation()->env()->dtrace_method_probes();
+  const Register Rexception = R3 /*LIRGenerator::exceptionOopOpr()*/, Rexception_save = R31;
+
+  // Fetch the exception from TLS and clear out exception related thread state.
+  __ ld(Rexception, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+  __ li(R0, 0);
+  __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+  __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
+
+  __ bind(_unwind_handler_entry);
+  __ verify_not_null_oop(Rexception);
+  if (preserve_exception) { __ mr(Rexception_save, Rexception); }
+
+  // Perform needed unlocking
+  MonitorExitStub* stub = NULL;
+  if (method()->is_synchronized()) {
+    monitor_address(0, FrameMap::R4_opr);
+    stub = new MonitorExitStub(FrameMap::R4_opr, true, 0);
+    __ unlock_object(R5, R6, R4, *stub->entry());
+    __ bind(*stub->continuation());
+  }
+
+  if (compilation()->env()->dtrace_method_probes()) {
+    Unimplemented();
+  }
+
+  // Dispatch to the unwind logic.
+  address unwind_stub = Runtime1::entry_for(Runtime1::unwind_exception_id);
+  //__ load_const_optimized(R0, unwind_stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(unwind_stub));
+  if (preserve_exception) { __ mr(Rexception, Rexception_save); }
+  __ mtctr(R0);
+  __ bctr();
+
+  // Emit the slow path assembly.
+  if (stub != NULL) {
+    stub->emit_code(this);
+  }
+
+  return offset;
+}
+
+
+int LIR_Assembler::emit_deopt_handler() {
+  // If the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri).
+  __ nop();
+
+  // Generate code for deopt handler.
+  address handler_base = __ start_a_stub(deopt_handler_size);
+
+  if (handler_base == NULL) {
+    // Not enough space left for the handler.
+    bailout("deopt handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+  __ bl64_patchable(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
+
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+  if (o == NULL) {
+    __ li(reg, 0);
+  } else {
+    AddressLiteral addrlit = __ constant_oop_address(o);
+    __ load_const(reg, addrlit, (reg != R0) ? R0 : noreg);
+  }
+}
+
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  // Allocate a new index in table to hold the object once it's been patched.
+  int oop_index = __ oop_recorder()->allocate_oop_index(NULL);
+  PatchingStub* patch = new PatchingStub(_masm, patching_id(info), oop_index);
+
+  AddressLiteral addrlit((address)NULL, oop_Relocation::spec(oop_index));
+  __ load_const(reg, addrlit, R0);
+
+  patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+
+void LIR_Assembler::metadata2reg(Metadata* o, Register reg) {
+  AddressLiteral md = __ constant_metadata_address(o); // Notify OOP recorder (don't need the relocation)
+  __ load_const_optimized(reg, md.value(), (reg != R0) ? R0 : noreg);
+}
+
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  // Allocate a new index in table to hold the klass once it's been patched.
+  int index = __ oop_recorder()->allocate_metadata_index(NULL);
+  PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index);
+
+  AddressLiteral addrlit((address)NULL, metadata_Relocation::spec(index));
+  assert(addrlit.rspec().type() == relocInfo::metadata_type, "must be an metadata reloc");
+  __ load_const(reg, addrlit, R0);
+
+  patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  const bool is_int = op->result_opr()->is_single_cpu();
+  Register Rdividend = is_int ? op->in_opr1()->as_register() : op->in_opr1()->as_register_lo();
+  Register Rdivisor  = noreg;
+  Register Rscratch  = op->in_opr3()->as_register();
+  Register Rresult   = is_int ? op->result_opr()->as_register() : op->result_opr()->as_register_lo();
+  long divisor = -1;
+
+  if (op->in_opr2()->is_register()) {
+    Rdivisor = is_int ? op->in_opr2()->as_register() : op->in_opr2()->as_register_lo();
+  } else {
+    divisor = is_int ? op->in_opr2()->as_constant_ptr()->as_jint()
+                     : op->in_opr2()->as_constant_ptr()->as_jlong();
+  }
+
+  assert(Rdividend != Rscratch, "");
+  assert(Rdivisor  != Rscratch, "");
+  assert(op->code() == lir_idiv || op->code() == lir_irem, "Must be irem or idiv");
+
+  if (Rdivisor == noreg) {
+    if (divisor == 1) { // stupid, but can happen
+      if (op->code() == lir_idiv) {
+        __ mr_if_needed(Rresult, Rdividend);
+      } else {
+        __ li(Rresult, 0);
+      }
+
+    } else if (is_power_of_2(divisor)) {
+      // Convert division by a power of two into some shifts and logical operations.
+      int log2 = log2_intptr(divisor);
+
+      // Round towards 0.
+      if (divisor == 2) {
+        if (is_int) {
+          __ srwi(Rscratch, Rdividend, 31);
+        } else {
+          __ srdi(Rscratch, Rdividend, 63);
+        }
+      } else {
+        if (is_int) {
+          __ srawi(Rscratch, Rdividend, 31);
+        } else {
+          __ sradi(Rscratch, Rdividend, 63);
+        }
+        __ clrldi(Rscratch, Rscratch, 64-log2);
+      }
+      __ add(Rscratch, Rdividend, Rscratch);
+
+      if (op->code() == lir_idiv) {
+        if (is_int) {
+          __ srawi(Rresult, Rscratch, log2);
+        } else {
+          __ sradi(Rresult, Rscratch, log2);
+        }
+      } else { // lir_irem
+        __ clrrdi(Rscratch, Rscratch, log2);
+        __ sub(Rresult, Rdividend, Rscratch);
+      }
+
+    } else if (divisor == -1) {
+      if (op->code() == lir_idiv) {
+        __ neg(Rresult, Rdividend);
+      } else {
+        __ li(Rresult, 0);
+      }
+
+    } else {
+      __ load_const_optimized(Rscratch, divisor);
+      if (op->code() == lir_idiv) {
+        if (is_int) {
+          __ divw(Rresult, Rdividend, Rscratch); // Can't divide minint/-1.
+        } else {
+          __ divd(Rresult, Rdividend, Rscratch); // Can't divide minint/-1.
+        }
+      } else {
+        assert(Rscratch != R0, "need both");
+        if (is_int) {
+          __ divw(R0, Rdividend, Rscratch); // Can't divide minint/-1.
+          __ mullw(Rscratch, R0, Rscratch);
+        } else {
+          __ divd(R0, Rdividend, Rscratch); // Can't divide minint/-1.
+          __ mulld(Rscratch, R0, Rscratch);
+        }
+        __ sub(Rresult, Rdividend, Rscratch);
+      }
+
+    }
+    return;
+  }
+
+  Label regular, done;
+  if (is_int) {
+    __ cmpwi(CCR0, Rdivisor, -1);
+  } else {
+    __ cmpdi(CCR0, Rdivisor, -1);
+  }
+  __ bne(CCR0, regular);
+  if (op->code() == lir_idiv) {
+    __ neg(Rresult, Rdividend);
+    __ b(done);
+    __ bind(regular);
+    if (is_int) {
+      __ divw(Rresult, Rdividend, Rdivisor); // Can't divide minint/-1.
+    } else {
+      __ divd(Rresult, Rdividend, Rdivisor); // Can't divide minint/-1.
+    }
+  } else { // lir_irem
+    __ li(Rresult, 0);
+    __ b(done);
+    __ bind(regular);
+    if (is_int) {
+      __ divw(Rscratch, Rdividend, Rdivisor); // Can't divide minint/-1.
+      __ mullw(Rscratch, Rscratch, Rdivisor);
+    } else {
+      __ divd(Rscratch, Rdividend, Rdivisor); // Can't divide minint/-1.
+      __ mulld(Rscratch, Rscratch, Rdivisor);
+    }
+    __ sub(Rresult, Rdividend, Rscratch);
+  }
+  __ bind(done);
+}
+
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
+  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
+  assert(op->info() == NULL, "shouldn't have CodeEmitInfo");
+#endif
+
+  Label *L = op->label();
+  if (op->cond() == lir_cond_always) {
+    __ b(*L);
+  } else {
+    Label done;
+    bool is_unordered = false;
+    if (op->code() == lir_cond_float_branch) {
+      assert(op->ublock() != NULL, "must have unordered successor");
+      is_unordered = true;
+    } else {
+      assert(op->code() == lir_branch, "just checking");
+    }
+
+    bool positive = false;
+    Assembler::Condition cond = Assembler::equal;
+    switch (op->cond()) {
+      case lir_cond_equal:        positive = true ; cond = Assembler::equal  ; is_unordered = false; break;
+      case lir_cond_notEqual:     positive = false; cond = Assembler::equal  ; is_unordered = false; break;
+      case lir_cond_less:         positive = true ; cond = Assembler::less   ; break;
+      case lir_cond_belowEqual:   assert(op->code() != lir_cond_float_branch, ""); // fallthru
+      case lir_cond_lessEqual:    positive = false; cond = Assembler::greater; break;
+      case lir_cond_greater:      positive = true ; cond = Assembler::greater; break;
+      case lir_cond_aboveEqual:   assert(op->code() != lir_cond_float_branch, ""); // fallthru
+      case lir_cond_greaterEqual: positive = false; cond = Assembler::less   ; break;
+      default:                    ShouldNotReachHere();
+    }
+    int bo = positive ? Assembler::bcondCRbiIs1 : Assembler::bcondCRbiIs0;
+    int bi = Assembler::bi0(BOOL_RESULT, cond);
+    if (is_unordered) {
+      if (positive) {
+        if (op->ublock() == op->block()) {
+          __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(BOOL_RESULT, Assembler::summary_overflow), *L);
+        }
+      } else {
+        if (op->ublock() != op->block()) { __ bso(BOOL_RESULT, done); }
+      }
+    }
+    __ bc_far_optimized(bo, bi, *L);
+    __ bind(done);
+  }
+}
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  Bytecodes::Code code = op->bytecode();
+  LIR_Opr src = op->in_opr(),
+          dst = op->result_opr();
+
+  switch(code) {
+    case Bytecodes::_i2l: {
+      __ extsw(dst->as_register_lo(), src->as_register());
+      break;
+    }
+    case Bytecodes::_l2i: {
+      __ mr_if_needed(dst->as_register(), src->as_register_lo()); // high bits are garbage
+      break;
+    }
+    case Bytecodes::_i2b: {
+      __ extsb(dst->as_register(), src->as_register());
+      break;
+    }
+    case Bytecodes::_i2c: {
+      __ clrldi(dst->as_register(), src->as_register(), 64-16);
+      break;
+    }
+    case Bytecodes::_i2s: {
+      __ extsh(dst->as_register(), src->as_register());
+      break;
+    }
+    case Bytecodes::_i2d:
+    case Bytecodes::_l2d: {
+      __ fcfid(dst->as_double_reg(), src->as_double_reg()); // via mem
+      break;
+    }
+    case Bytecodes::_i2f: {
+      FloatRegister rdst = dst->as_float_reg();
+      FloatRegister rsrc = src->as_double_reg(); // via mem
+      if (VM_Version::has_fcfids()) {
+        __ fcfids(rdst, rsrc);
+      } else {
+        __ fcfid(rdst, rsrc);
+        __ frsp(rdst, rdst);
+      }
+      break;
+    }
+    case Bytecodes::_l2f: { // >= Power7
+      assert(VM_Version::has_fcfids(), "fcfid+frsp needs fixup code to avoid rounding incompatibility");
+      __ fcfids(dst->as_float_reg(), src->as_double_reg()); // via mem
+      break;
+    }
+    case Bytecodes::_f2d: {
+      __ fmr_if_needed(dst->as_double_reg(), src->as_float_reg());
+      break;
+    }
+    case Bytecodes::_d2f: {
+      __ frsp(dst->as_float_reg(), src->as_double_reg());
+      break;
+    }
+    case Bytecodes::_d2i:
+    case Bytecodes::_f2i: {
+      FloatRegister rsrc = (code == Bytecodes::_d2i) ? src->as_double_reg() : src->as_float_reg();
+      Address       addr = frame_map()->address_for_slot(dst->double_stack_ix());
+      Label L;
+      // Result must be 0 if value is NaN; test by comparing value to itself.
+      __ fcmpu(CCR0, rsrc, rsrc);
+      __ li(R0, 0); // 0 in case of NAN
+      __ std(R0, addr.disp(), addr.base());
+      __ bso(CCR0, L);
+      __ fctiwz(rsrc, rsrc); // USE_KILL
+      __ stfd(rsrc, addr.disp(), addr.base());
+      __ bind(L);
+      break;
+    }
+    case Bytecodes::_d2l:
+    case Bytecodes::_f2l: {
+      FloatRegister rsrc = (code == Bytecodes::_d2l) ? src->as_double_reg() : src->as_float_reg();
+      Address       addr = frame_map()->address_for_slot(dst->double_stack_ix());
+      Label L;
+      // Result must be 0 if value is NaN; test by comparing value to itself.
+      __ fcmpu(CCR0, rsrc, rsrc);
+      __ li(R0, 0); // 0 in case of NAN
+      __ std(R0, addr.disp(), addr.base());
+      __ bso(CCR0, L);
+      __ fctidz(rsrc, rsrc); // USE_KILL
+      __ stfd(rsrc, addr.disp(), addr.base());
+      __ bind(L);
+      break;
+    }
+
+    default: ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::align_call(LIR_Code) {
+  // do nothing since all instructions are word aligned on ppc
+}
+
+
+bool LIR_Assembler::emit_trampoline_stub_for_call(address target, Register Rtoc) {
+  int start_offset = __ offset();
+  // Put the entry point as a constant into the constant pool.
+  const address entry_point_toc_addr   = __ address_constant(target, RelocationHolder::none);
+  if (entry_point_toc_addr == NULL) {
+    bailout("const section overflow");
+    return false;
+  }
+  const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
+
+  // Emit the trampoline stub which will be related to the branch-and-link below.
+  address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset, Rtoc);
+  if (!stub) {
+    bailout("no space for trampoline stub");
+    return false;
+  }
+  return true;
+}
+
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+  assert(rtype==relocInfo::opt_virtual_call_type || rtype==relocInfo::static_call_type, "unexpected rtype");
+
+  bool success = emit_trampoline_stub_for_call(op->addr());
+  if (!success) { return; }
+
+  __ relocate(rtype);
+  // Note: At this point we do not have the address of the trampoline
+  // stub, and the entry point might be too far away for bl, so __ pc()
+  // serves as dummy and the bl will be patched later.
+  __ code()->set_insts_mark();
+  __ bl(__ pc());
+  add_call_info(code_offset(), op->info());
+}
+
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+  __ calculate_address_from_global_toc(R2_TOC, __ method_toc());
+
+  // Virtual call relocation will point to ic load.
+  address virtual_call_meta_addr = __ pc();
+  // Load a clear inline cache.
+  AddressLiteral empty_ic((address) Universe::non_oop_word());
+  bool success = __ load_const_from_method_toc(R19_inline_cache_reg, empty_ic, R2_TOC);
+  if (!success) {
+    bailout("const section overflow");
+    return;
+  }
+  // Call to fixup routine. Fixup routine uses ScopeDesc info
+  // to determine who we intended to call.
+  __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
+
+  success = emit_trampoline_stub_for_call(op->addr(), R2_TOC);
+  if (!success) { return; }
+
+  // Note: At this point we do not have the address of the trampoline
+  // stub, and the entry point might be too far away for bl, so __ pc()
+  // serves as dummy and the bl will be patched later.
+  __ bl(__ pc());
+  add_call_info(code_offset(), op->info());
+}
+
+
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere(); // ic_call is used instead.
+}
+
+
+void LIR_Assembler::explicit_null_check(Register addr, CodeEmitInfo* info) {
+  ImplicitNullCheckStub* stub = new ImplicitNullCheckStub(code_offset(), info);
+  __ null_check(addr, stub->entry());
+  append_code_stub(stub);
+}
+
+
+// Attention: caller must encode oop if needed
+int LIR_Assembler::store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned) {
+  int store_offset;
+  if (!Assembler::is_simm16(offset)) {
+    // For offsets larger than a simm16 we setup the offset.
+    assert(wide && !from_reg->is_same_register(FrameMap::R0_opr), "large offset only supported in special case");
+    __ load_const_optimized(R0, offset);
+    store_offset = store(from_reg, base, R0, type, wide);
+  } else {
+    store_offset = code_offset();
+    switch (type) {
+      case T_BOOLEAN: // fall through
+      case T_BYTE  : __ stb(from_reg->as_register(), offset, base); break;
+      case T_CHAR  :
+      case T_SHORT : __ sth(from_reg->as_register(), offset, base); break;
+      case T_INT   : __ stw(from_reg->as_register(), offset, base); break;
+      case T_LONG  : __ std(from_reg->as_register_lo(), offset, base); break;
+      case T_ADDRESS:
+      case T_METADATA: __ std(from_reg->as_register(), offset, base); break;
+      case T_ARRAY : // fall through
+      case T_OBJECT:
+        {
+          if (UseCompressedOops && !wide) {
+            // Encoding done in caller
+            __ stw(from_reg->as_register(), offset, base);
+          } else {
+            __ std(from_reg->as_register(), offset, base);
+          }
+          __ verify_oop(from_reg->as_register());
+          break;
+        }
+      case T_FLOAT : __ stfs(from_reg->as_float_reg(), offset, base); break;
+      case T_DOUBLE: __ stfd(from_reg->as_double_reg(), offset, base); break;
+      default      : ShouldNotReachHere();
+    }
+  }
+  return store_offset;
+}
+
+
+// Attention: caller must encode oop if needed
+int LIR_Assembler::store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide) {
+  int store_offset = code_offset();
+  switch (type) {
+    case T_BOOLEAN: // fall through
+    case T_BYTE  : __ stbx(from_reg->as_register(), base, disp); break;
+    case T_CHAR  :
+    case T_SHORT : __ sthx(from_reg->as_register(), base, disp); break;
+    case T_INT   : __ stwx(from_reg->as_register(), base, disp); break;
+    case T_LONG  :
+#ifdef _LP64
+      __ stdx(from_reg->as_register_lo(), base, disp);
+#else
+      Unimplemented();
+#endif
+      break;
+    case T_ADDRESS:
+      __ stdx(from_reg->as_register(), base, disp);
+      break;
+    case T_ARRAY : // fall through
+    case T_OBJECT:
+      {
+        if (UseCompressedOops && !wide) {
+          // Encoding done in caller.
+          __ stwx(from_reg->as_register(), base, disp);
+        } else {
+          __ stdx(from_reg->as_register(), base, disp);
+        }
+        __ verify_oop(from_reg->as_register()); // kills R0
+        break;
+      }
+    case T_FLOAT : __ stfsx(from_reg->as_float_reg(), base, disp); break;
+    case T_DOUBLE: __ stfdx(from_reg->as_double_reg(), base, disp); break;
+    default      : ShouldNotReachHere();
+  }
+  return store_offset;
+}
+
+
+int LIR_Assembler::load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned) {
+  int load_offset;
+  if (!Assembler::is_simm16(offset)) {
+    // For offsets larger than a simm16 we setup the offset.
+    __ load_const_optimized(R0, offset);
+    load_offset = load(base, R0, to_reg, type, wide);
+  } else {
+    load_offset = code_offset();
+    switch(type) {
+      case T_BOOLEAN: // fall through
+      case T_BYTE  :   __ lbz(to_reg->as_register(), offset, base);
+                       __ extsb(to_reg->as_register(), to_reg->as_register()); break;
+      case T_CHAR  :   __ lhz(to_reg->as_register(), offset, base); break;
+      case T_SHORT :   __ lha(to_reg->as_register(), offset, base); break;
+      case T_INT   :   __ lwa(to_reg->as_register(), offset, base); break;
+      case T_LONG  :   __ ld(to_reg->as_register_lo(), offset, base); break;
+      case T_METADATA: __ ld(to_reg->as_register(), offset, base); break;
+      case T_ADDRESS:
+        if (offset == oopDesc::klass_offset_in_bytes() && UseCompressedClassPointers) {
+          __ lwz(to_reg->as_register(), offset, base);
+          __ decode_klass_not_null(to_reg->as_register());
+        } else {
+          __ ld(to_reg->as_register(), offset, base);
+        }
+        break;
+      case T_ARRAY : // fall through
+      case T_OBJECT:
+        {
+          if (UseCompressedOops && !wide) {
+            __ lwz(to_reg->as_register(), offset, base);
+            __ decode_heap_oop(to_reg->as_register());
+          } else {
+            __ ld(to_reg->as_register(), offset, base);
+          }
+          __ verify_oop(to_reg->as_register());
+          break;
+        }
+      case T_FLOAT:  __ lfs(to_reg->as_float_reg(), offset, base); break;
+      case T_DOUBLE: __ lfd(to_reg->as_double_reg(), offset, base); break;
+      default      : ShouldNotReachHere();
+    }
+  }
+  return load_offset;
+}
+
+
+int LIR_Assembler::load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide) {
+  int load_offset = code_offset();
+  switch(type) {
+    case T_BOOLEAN: // fall through
+    case T_BYTE  :  __ lbzx(to_reg->as_register(), base, disp);
+                    __ extsb(to_reg->as_register(), to_reg->as_register()); break;
+    case T_CHAR  :  __ lhzx(to_reg->as_register(), base, disp); break;
+    case T_SHORT :  __ lhax(to_reg->as_register(), base, disp); break;
+    case T_INT   :  __ lwax(to_reg->as_register(), base, disp); break;
+    case T_ADDRESS: __ ldx(to_reg->as_register(), base, disp); break;
+    case T_ARRAY : // fall through
+    case T_OBJECT:
+      {
+        if (UseCompressedOops && !wide) {
+          __ lwzx(to_reg->as_register(), base, disp);
+          __ decode_heap_oop(to_reg->as_register());
+        } else {
+          __ ldx(to_reg->as_register(), base, disp);
+        }
+        __ verify_oop(to_reg->as_register());
+        break;
+      }
+    case T_FLOAT:  __ lfsx(to_reg->as_float_reg() , base, disp); break;
+    case T_DOUBLE: __ lfdx(to_reg->as_double_reg(), base, disp); break;
+    case T_LONG  :
+#ifdef _LP64
+      __ ldx(to_reg->as_register_lo(), base, disp);
+#else
+      Unimplemented();
+#endif
+      break;
+    default      : ShouldNotReachHere();
+  }
+  return load_offset;
+}
+
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  LIR_Const* c = src->as_constant_ptr();
+  Register src_reg = R0;
+  switch (c->type()) {
+    case T_INT:
+    case T_FLOAT: {
+      int value = c->as_jint_bits();
+      __ load_const_optimized(src_reg, value);
+      Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ stw(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    case T_ADDRESS: {
+      int value = c->as_jint_bits();
+      __ load_const_optimized(src_reg, value);
+      Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ std(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    case T_OBJECT: {
+      jobject2reg(c->as_jobject(), src_reg);
+      Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ std(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    case T_LONG:
+    case T_DOUBLE: {
+      int value = c->as_jlong_bits();
+      __ load_const_optimized(src_reg, value);
+      Address addr = frame_map()->address_for_double_slot(dest->double_stack_ix());
+      __ std(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    default:
+      Unimplemented();
+  }
+}
+
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Address* addr = dest->as_address_ptr();
+  Register base = addr->base()->as_pointer_register();
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+  int offset = -1;
+  // Null check for large offsets in LIRGenerator::do_StoreField.
+  bool needs_explicit_null_check = !ImplicitNullChecks;
+
+  if (info != NULL && needs_explicit_null_check) {
+    explicit_null_check(base, info);
+  }
+
+  switch (c->type()) {
+    case T_FLOAT: type = T_INT;
+    case T_INT:
+    case T_ADDRESS: {
+      tmp = FrameMap::R0_opr;
+      __ load_const_optimized(tmp->as_register(), c->as_jint_bits());
+      break;
+    }
+    case T_DOUBLE: type = T_LONG;
+    case T_LONG: {
+      tmp = FrameMap::R0_long_opr;
+      __ load_const_optimized(tmp->as_register_lo(), c->as_jlong_bits());
+      break;
+    }
+    case T_OBJECT: {
+      tmp = FrameMap::R0_opr;
+      if (UseCompressedOops && !wide && c->as_jobject() != NULL) {
+        AddressLiteral oop_addr = __ constant_oop_address(c->as_jobject());
+        __ lis(R0, oop_addr.value() >> 16); // Don't care about sign extend (will use stw).
+        __ relocate(oop_addr.rspec(), /*compressed format*/ 1);
+        __ ori(R0, R0, oop_addr.value() & 0xffff);
+      } else {
+        jobject2reg(c->as_jobject(), R0);
+      }
+      break;
+    }
+    default:
+      Unimplemented();
+  }
+
+  // Handle either reg+reg or reg+disp address.
+  if (addr->index()->is_valid()) {
+    assert(addr->disp() == 0, "must be zero");
+    offset = store(tmp, base, addr->index()->as_pointer_register(), type, wide);
+  } else {
+    assert(Assembler::is_simm16(addr->disp()), "can't handle larger addresses");
+    offset = store(tmp, base, addr->disp(), type, wide, false);
+  }
+
+  if (info != NULL) {
+    assert(offset != -1, "offset should've been set");
+    if (!needs_explicit_null_check) {
+      add_debug_info_for_null_check(offset, info);
+    }
+  }
+}
+
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Opr to_reg = dest;
+
+  switch (c->type()) {
+    case T_INT: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register(), c->as_jint(), R0);
+      break;
+    }
+    case T_ADDRESS: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register(), c->as_jint(), R0);  // Yes, as_jint ...
+      break;
+    }
+    case T_LONG: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register_lo(), c->as_jlong(), R0);
+      break;
+    }
+
+    case T_OBJECT: {
+      if (patch_code == lir_patch_none) {
+        jobject2reg(c->as_jobject(), to_reg->as_register());
+      } else {
+        jobject2reg_with_patching(to_reg->as_register(), info);
+      }
+      break;
+    }
+
+    case T_METADATA:
+      {
+        if (patch_code == lir_patch_none) {
+          metadata2reg(c->as_metadata(), to_reg->as_register());
+        } else {
+          klass2reg_with_patching(to_reg->as_register(), info);
+        }
+      }
+      break;
+
+    case T_FLOAT:
+      {
+        if (to_reg->is_single_fpu()) {
+          address const_addr = __ float_constant(c->as_jfloat());
+          if (const_addr == NULL) {
+            bailout("const section overflow");
+            break;
+          }
+          RelocationHolder rspec = internal_word_Relocation::spec(const_addr);
+          __ relocate(rspec);
+          __ load_const(R0, const_addr);
+          __ lfsx(to_reg->as_float_reg(), R0);
+        } else {
+          assert(to_reg->is_single_cpu(), "Must be a cpu register.");
+          __ load_const_optimized(to_reg->as_register(), jint_cast(c->as_jfloat()), R0);
+        }
+      }
+      break;
+
+    case T_DOUBLE:
+      {
+        if (to_reg->is_double_fpu()) {
+          address const_addr = __ double_constant(c->as_jdouble());
+          if (const_addr == NULL) {
+            bailout("const section overflow");
+            break;
+          }
+          RelocationHolder rspec = internal_word_Relocation::spec(const_addr);
+          __ relocate(rspec);
+          __ load_const(R0, const_addr);
+          __ lfdx(to_reg->as_double_reg(), R0);
+        } else {
+          assert(to_reg->is_double_cpu(), "Must be a long register.");
+          __ load_const_optimized(to_reg->as_register_lo(), jlong_cast(c->as_jdouble()), R0);
+        }
+      }
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+  Unimplemented(); return Address();
+}
+
+
+inline RegisterOrConstant index_or_disp(LIR_Address* addr) {
+  if (addr->index()->is_illegal()) {
+    return (RegisterOrConstant)(addr->disp());
+  } else {
+    return (RegisterOrConstant)(addr->index()->as_pointer_register());
+  }
+}
+
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  const Register tmp = R0;
+  switch (type) {
+    case T_INT:
+    case T_FLOAT: {
+      Address from = frame_map()->address_for_slot(src->single_stack_ix());
+      Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ lwz(tmp, from.disp(), from.base());
+      __ stw(tmp, to.disp(), to.base());
+      break;
+    }
+    case T_ADDRESS:
+    case T_OBJECT: {
+      Address from = frame_map()->address_for_slot(src->single_stack_ix());
+      Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ ld(tmp, from.disp(), from.base());
+      __ std(tmp, to.disp(), to.base());
+      break;
+    }
+    case T_LONG:
+    case T_DOUBLE: {
+      Address from = frame_map()->address_for_double_slot(src->double_stack_ix());
+      Address to   = frame_map()->address_for_double_slot(dest->double_stack_ix());
+      __ ld(tmp, from.disp(), from.base());
+      __ std(tmp, to.disp(), to.base());
+      break;
+    }
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+  Unimplemented(); return Address();
+}
+
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+  Unimplemented(); return Address();
+}
+
+
+void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type,
+                            LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool unaligned) {
+
+  assert(type != T_METADATA, "load of metadata ptr not supported");
+  LIR_Address* addr = src_opr->as_address_ptr();
+  LIR_Opr to_reg = dest;
+
+  Register src = addr->base()->as_pointer_register();
+  Register disp_reg = noreg;
+  int disp_value = addr->disp();
+  bool needs_patching = (patch_code != lir_patch_none);
+  // null check for large offsets in LIRGenerator::do_LoadField
+  bool needs_explicit_null_check = !os::zero_page_read_protected() || !ImplicitNullChecks;
+
+  if (info != NULL && needs_explicit_null_check) {
+    explicit_null_check(src, info);
+  }
+
+  if (addr->base()->type() == T_OBJECT) {
+    __ verify_oop(src);
+  }
+
+  PatchingStub* patch = NULL;
+  if (needs_patching) {
+    patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+    assert(!to_reg->is_double_cpu() ||
+           patch_code == lir_patch_none ||
+           patch_code == lir_patch_normal, "patching doesn't match register");
+  }
+
+  if (addr->index()->is_illegal()) {
+    if (!Assembler::is_simm16(disp_value)) {
+      if (needs_patching) {
+        __ load_const32(R0, 0); // patchable int
+      } else {
+        __ load_const_optimized(R0, disp_value);
+      }
+      disp_reg = R0;
+    }
+  } else {
+    disp_reg = addr->index()->as_pointer_register();
+    assert(disp_value == 0, "can't handle 3 operand addresses");
+  }
+
+  // Remember the offset of the load. The patching_epilog must be done
+  // before the call to add_debug_info, otherwise the PcDescs don't get
+  // entered in increasing order.
+  int offset;
+
+  if (disp_reg == noreg) {
+    assert(Assembler::is_simm16(disp_value), "should have set this up");
+    offset = load(src, disp_value, to_reg, type, wide, unaligned);
+  } else {
+    assert(!unaligned, "unexpected");
+    offset = load(src, disp_reg, to_reg, type, wide);
+  }
+
+  if (patch != NULL) {
+    patching_epilog(patch, patch_code, src, info);
+  }
+  if (info != NULL && !needs_explicit_null_check) {
+    add_debug_info_for_null_check(offset, info);
+  }
+}
+
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  Address addr;
+  if (src->is_single_word()) {
+    addr = frame_map()->address_for_slot(src->single_stack_ix());
+  } else if (src->is_double_word())  {
+    addr = frame_map()->address_for_double_slot(src->double_stack_ix());
+  }
+
+  bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0;
+  load(addr.base(), addr.disp(), dest, dest->type(), true /*wide*/, unaligned);
+}
+
+
+void LIR_Assembler::reg2stack(LIR_Opr from_reg, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+  Address addr;
+  if (dest->is_single_word()) {
+    addr = frame_map()->address_for_slot(dest->single_stack_ix());
+  } else if (dest->is_double_word())  {
+    addr = frame_map()->address_for_slot(dest->double_stack_ix());
+  }
+  bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0;
+  store(from_reg, addr.base(), addr.disp(), from_reg->type(), true /*wide*/, unaligned);
+}
+
+
+void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) {
+  if (from_reg->is_float_kind() && to_reg->is_float_kind()) {
+    if (from_reg->is_double_fpu()) {
+      // double to double moves
+      assert(to_reg->is_double_fpu(), "should match");
+      __ fmr_if_needed(to_reg->as_double_reg(), from_reg->as_double_reg());
+    } else {
+      // float to float moves
+      assert(to_reg->is_single_fpu(), "should match");
+      __ fmr_if_needed(to_reg->as_float_reg(), from_reg->as_float_reg());
+    }
+  } else if (!from_reg->is_float_kind() && !to_reg->is_float_kind()) {
+    if (from_reg->is_double_cpu()) {
+      __ mr_if_needed(to_reg->as_pointer_register(), from_reg->as_pointer_register());
+    } else if (to_reg->is_double_cpu()) {
+      // int to int moves
+      __ mr_if_needed(to_reg->as_register_lo(), from_reg->as_register());
+    } else {
+      // int to int moves
+      __ mr_if_needed(to_reg->as_register(), from_reg->as_register());
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  if (to_reg->type() == T_OBJECT || to_reg->type() == T_ARRAY) {
+    __ verify_oop(to_reg->as_register());
+  }
+}
+
+
+void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type,
+                            LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack,
+                            bool wide, bool unaligned) {
+  assert(type != T_METADATA, "store of metadata ptr not supported");
+  LIR_Address* addr = dest->as_address_ptr();
+
+  Register src = addr->base()->as_pointer_register();
+  Register disp_reg = noreg;
+  int disp_value = addr->disp();
+  bool needs_patching = (patch_code != lir_patch_none);
+  bool compress_oop = (type == T_ARRAY || type == T_OBJECT) && UseCompressedOops && !wide &&
+                      Universe::narrow_oop_mode() != Universe::UnscaledNarrowOop;
+  bool load_disp = addr->index()->is_illegal() && !Assembler::is_simm16(disp_value);
+  bool use_R29 = compress_oop && load_disp; // Avoid register conflict, also do null check before killing R29.
+  // Null check for large offsets in LIRGenerator::do_StoreField.
+  bool needs_explicit_null_check = !ImplicitNullChecks || use_R29;
+
+  if (info != NULL && needs_explicit_null_check) {
+    explicit_null_check(src, info);
+  }
+
+  if (addr->base()->is_oop_register()) {
+    __ verify_oop(src);
+  }
+
+  PatchingStub* patch = NULL;
+  if (needs_patching) {
+    patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+    assert(!from_reg->is_double_cpu() ||
+           patch_code == lir_patch_none ||
+           patch_code == lir_patch_normal, "patching doesn't match register");
+  }
+
+  if (addr->index()->is_illegal()) {
+    if (load_disp) {
+      disp_reg = use_R29 ? R29_TOC : R0;
+      if (needs_patching) {
+        __ load_const32(disp_reg, 0); // patchable int
+      } else {
+        __ load_const_optimized(disp_reg, disp_value);
+      }
+    }
+  } else {
+    disp_reg = addr->index()->as_pointer_register();
+    assert(disp_value == 0, "can't handle 3 operand addresses");
+  }
+
+  // remember the offset of the store. The patching_epilog must be done
+  // before the call to add_debug_info_for_null_check, otherwise the PcDescs don't get
+  // entered in increasing order.
+  int offset;
+
+  if (compress_oop) {
+    Register co = __ encode_heap_oop(R0, from_reg->as_register());
+    from_reg = FrameMap::as_opr(co);
+  }
+
+  if (disp_reg == noreg) {
+    assert(Assembler::is_simm16(disp_value), "should have set this up");
+    offset = store(from_reg, src, disp_value, type, wide, unaligned);
+  } else {
+    assert(!unaligned, "unexpected");
+    offset = store(from_reg, src, disp_reg, type, wide);
+  }
+
+  if (use_R29) {
+    __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); // reinit
+  }
+
+  if (patch != NULL) {
+    patching_epilog(patch, patch_code, src, info);
+  }
+
+  if (info != NULL && !needs_explicit_null_check) {
+    add_debug_info_for_null_check(offset, info);
+  }
+}
+
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+  const Register return_pc        = R11;
+  const Register polling_page     = R12;
+
+  // Pop the stack before the safepoint code.
+  int frame_size = initial_frame_size_in_bytes();
+  if (Assembler::is_simm(frame_size, 16)) {
+    __ addi(R1_SP, R1_SP, frame_size);
+  } else {
+    __ pop_frame();
+  }
+
+  if (LoadPollAddressFromThread) {
+    // TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread);
+    Unimplemented();
+  } else {
+    __ load_const_optimized(polling_page, (long)(address) os::get_polling_page(), R0); // TODO: PPC port: get_standard_polling_page()
+  }
+
+  // Restore return pc relative to callers' sp.
+  __ ld(return_pc, _abi(lr), R1_SP);
+  // Move return pc to LR.
+  __ mtlr(return_pc);
+
+  // We need to mark the code position where the load from the safepoint
+  // polling page was emitted as relocInfo::poll_return_type here.
+  __ relocate(relocInfo::poll_return_type);
+  __ load_from_polling_page(polling_page);
+
+  // Return.
+  __ blr();
+}
+
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+
+  if (LoadPollAddressFromThread) {
+    const Register poll_addr = tmp->as_register();
+    // TODO: PPC port __ ld(poll_addr, in_bytes(JavaThread::poll_address_offset()), R16_thread);
+    Unimplemented();
+    __ relocate(relocInfo::poll_type); // XXX
+    guarantee(info != NULL, "Shouldn't be NULL");
+    int offset = __ offset();
+    add_debug_info_for_branch(info);
+    __ load_from_polling_page(poll_addr);
+    return offset;
+  }
+
+  __ load_const_optimized(tmp->as_register(), (intptr_t)os::get_polling_page(), R0); // TODO: PPC port: get_standard_polling_page()
+  if (info != NULL) {
+    add_debug_info_for_branch(info);
+  }
+  int offset = __ offset();
+  __ relocate(relocInfo::poll_type);
+  __ load_from_polling_page(tmp->as_register());
+
+  return offset;
+}
+
+
+void LIR_Assembler::emit_static_call_stub() {
+  address call_pc = __ pc();
+  address stub = __ start_a_stub(max_static_call_stub_size);
+  if (stub == NULL) {
+    bailout("static call stub overflow");
+    return;
+  }
+
+  // For java_to_interp stubs we use R11_scratch1 as scratch register
+  // and in call trampoline stubs we use R12_scratch2. This way we
+  // can distinguish them (see is_NativeCallTrampolineStub_at()).
+  const Register reg_scratch = R11_scratch1;
+
+  // Create a static stub relocation which relates this stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  int start = __ offset();
+  __ relocate(static_stub_Relocation::spec(call_pc));
+
+  // Now, create the stub's code:
+  // - load the TOC
+  // - load the inline cache oop from the constant pool
+  // - load the call target from the constant pool
+  // - call
+  __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
+  AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL);
+  bool success = __ load_const_from_method_toc(R19_inline_cache_reg, ic, reg_scratch, /*fixed_size*/ true);
+
+  if (ReoptimizeCallSequences) {
+    __ b64_patchable((address)-1, relocInfo::none);
+  } else {
+    AddressLiteral a((address)-1);
+    success = success && __ load_const_from_method_toc(reg_scratch, a, reg_scratch, /*fixed_size*/ true);
+    __ mtctr(reg_scratch);
+    __ bctr();
+  }
+  if (!success) {
+    bailout("const section overflow");
+    return;
+  }
+
+  assert(__ offset() - start <= max_static_call_stub_size, "stub too big");
+  __ end_a_stub();
+}
+
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+  bool unsigned_comp = (condition == lir_cond_belowEqual || condition == lir_cond_aboveEqual);
+  if (opr1->is_single_fpu()) {
+    __ fcmpu(BOOL_RESULT, opr1->as_float_reg(), opr2->as_float_reg());
+  } else if (opr1->is_double_fpu()) {
+    __ fcmpu(BOOL_RESULT, opr1->as_double_reg(), opr2->as_double_reg());
+  } else if (opr1->is_single_cpu()) {
+    if (opr2->is_constant()) {
+      switch (opr2->as_constant_ptr()->type()) {
+        case T_INT:
+          {
+            jint con = opr2->as_constant_ptr()->as_jint();
+            if (unsigned_comp) {
+              if (Assembler::is_uimm(con, 16)) {
+                __ cmplwi(BOOL_RESULT, opr1->as_register(), con);
+              } else {
+                __ load_const_optimized(R0, con);
+                __ cmplw(BOOL_RESULT, opr1->as_register(), R0);
+              }
+            } else {
+              if (Assembler::is_simm(con, 16)) {
+                __ cmpwi(BOOL_RESULT, opr1->as_register(), con);
+              } else {
+                __ load_const_optimized(R0, con);
+                __ cmpw(BOOL_RESULT, opr1->as_register(), R0);
+              }
+            }
+          }
+          break;
+
+        case T_OBJECT:
+          // There are only equal/notequal comparisons on objects.
+          {
+            assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "oops");
+            jobject con = opr2->as_constant_ptr()->as_jobject();
+            if (con == NULL) {
+              __ cmpdi(BOOL_RESULT, opr1->as_register(), 0);
+            } else {
+              jobject2reg(con, R0);
+              __ cmpd(BOOL_RESULT, opr1->as_register(), R0);
+            }
+          }
+          break;
+
+        default:
+          ShouldNotReachHere();
+          break;
+      }
+    } else {
+      if (opr2->is_address()) {
+        DEBUG_ONLY( Unimplemented(); ) // Seems to be unused at the moment.
+        LIR_Address *addr = opr2->as_address_ptr();
+        BasicType type = addr->type();
+        if (type == T_OBJECT) { __ ld(R0, index_or_disp(addr), addr->base()->as_register()); }
+        else                  { __ lwa(R0, index_or_disp(addr), addr->base()->as_register()); }
+        __ cmpd(BOOL_RESULT, opr1->as_register(), R0);
+      } else {
+        if (unsigned_comp) {
+          __ cmplw(BOOL_RESULT, opr1->as_register(), opr2->as_register());
+        } else {
+          __ cmpw(BOOL_RESULT, opr1->as_register(), opr2->as_register());
+        }
+      }
+    }
+  } else if (opr1->is_double_cpu()) {
+    if (opr2->is_constant()) {
+      jlong con = opr2->as_constant_ptr()->as_jlong();
+      if (unsigned_comp) {
+        if (Assembler::is_uimm(con, 16)) {
+          __ cmpldi(BOOL_RESULT, opr1->as_register_lo(), con);
+        } else {
+          __ load_const_optimized(R0, con);
+          __ cmpld(BOOL_RESULT, opr1->as_register_lo(), R0);
+        }
+      } else {
+        if (Assembler::is_simm(con, 16)) {
+          __ cmpdi(BOOL_RESULT, opr1->as_register_lo(), con);
+        } else {
+          __ load_const_optimized(R0, con);
+          __ cmpd(BOOL_RESULT, opr1->as_register_lo(), R0);
+        }
+      }
+    } else if (opr2->is_register()) {
+      if (unsigned_comp) {
+        __ cmpld(BOOL_RESULT, opr1->as_register_lo(), opr2->as_register_lo());
+      } else {
+        __ cmpd(BOOL_RESULT, opr1->as_register_lo(), opr2->as_register_lo());
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (opr1->is_address()) {
+    DEBUG_ONLY( Unimplemented(); ) // Seems to be unused at the moment.
+    LIR_Address * addr = opr1->as_address_ptr();
+    BasicType type = addr->type();
+    assert (opr2->is_constant(), "Checking");
+    if (type == T_OBJECT) { __ ld(R0, index_or_disp(addr), addr->base()->as_register()); }
+    else                  { __ lwa(R0, index_or_disp(addr), addr->base()->as_register()); }
+    __ cmpdi(BOOL_RESULT, R0, opr2->as_constant_ptr()->as_jint());
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
+  const Register Rdst = dst->as_register();
+  Label done;
+  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+    bool is_unordered_less = (code == lir_ucmp_fd2i);
+    if (left->is_single_fpu()) {
+      __ fcmpu(CCR0, left->as_float_reg(), right->as_float_reg());
+    } else if (left->is_double_fpu()) {
+      __ fcmpu(CCR0, left->as_double_reg(), right->as_double_reg());
+    } else {
+      ShouldNotReachHere();
+    }
+    __ li(Rdst, is_unordered_less ? -1 : 1);
+    __ bso(CCR0, done);
+  } else if (code == lir_cmp_l2i) {
+    __ cmpd(CCR0, left->as_register_lo(), right->as_register_lo());
+  } else {
+    ShouldNotReachHere();
+  }
+  __ mfcr(R0); // set bit 32..33 as follows: <: 0b10, =: 0b00, >: 0b01
+  __ srwi(Rdst, R0, 30);
+  __ srawi(R0, R0, 31);
+  __ orr(Rdst, R0, Rdst); // set result as follows: <: -1, =: 0, >: 1
+  __ bind(done);
+}
+
+
+inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
+  if (src->is_constant()) {
+    lasm->const2reg(src, dst, lir_patch_none, NULL);
+  } else if (src->is_register()) {
+    lasm->reg2reg(src, dst);
+  } else if (src->is_stack()) {
+    lasm->stack2reg(src, dst, dst->type());
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+  if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) {
+    load_to_reg(this, opr1, result); // Condition doesn't matter.
+    return;
+  }
+
+  bool positive = false;
+  Assembler::Condition cond = Assembler::equal;
+  switch (condition) {
+    case lir_cond_equal:        positive = true ; cond = Assembler::equal  ; break;
+    case lir_cond_notEqual:     positive = false; cond = Assembler::equal  ; break;
+    case lir_cond_less:         positive = true ; cond = Assembler::less   ; break;
+    case lir_cond_belowEqual:
+    case lir_cond_lessEqual:    positive = false; cond = Assembler::greater; break;
+    case lir_cond_greater:      positive = true ; cond = Assembler::greater; break;
+    case lir_cond_aboveEqual:
+    case lir_cond_greaterEqual: positive = false; cond = Assembler::less   ; break;
+    default:                    ShouldNotReachHere();
+  }
+
+  // Try to use isel on >=Power7.
+  if (VM_Version::has_isel() && result->is_cpu_register()) {
+    bool o1_is_reg = opr1->is_cpu_register(), o2_is_reg = opr2->is_cpu_register();
+    const Register result_reg = result->is_single_cpu() ? result->as_register() : result->as_register_lo();
+
+    // We can use result_reg to load one operand if not already in register.
+    Register first  = o1_is_reg ? (opr1->is_single_cpu() ? opr1->as_register() : opr1->as_register_lo()) : result_reg,
+             second = o2_is_reg ? (opr2->is_single_cpu() ? opr2->as_register() : opr2->as_register_lo()) : result_reg;
+
+    if (first != second) {
+      if (!o1_is_reg) {
+        load_to_reg(this, opr1, result);
+      }
+
+      if (!o2_is_reg) {
+        load_to_reg(this, opr2, result);
+      }
+
+      __ isel(result_reg, BOOL_RESULT, cond, !positive, first, second);
+      return;
+    }
+  } // isel
+
+  load_to_reg(this, opr1, result);
+
+  Label skip;
+  int bo = positive ? Assembler::bcondCRbiIs1 : Assembler::bcondCRbiIs0;
+  int bi = Assembler::bi0(BOOL_RESULT, cond);
+  __ bc(bo, bi, skip);
+
+  load_to_reg(this, opr2, result);
+  __ bind(skip);
+}
+
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                             CodeEmitInfo* info, bool pop_fpu_stack) {
+  assert(info == NULL, "unused on this code path");
+  assert(left->is_register(), "wrong items state");
+  assert(dest->is_register(), "wrong items state");
+
+  if (right->is_register()) {
+    if (dest->is_float_kind()) {
+
+      FloatRegister lreg, rreg, res;
+      if (right->is_single_fpu()) {
+        lreg = left->as_float_reg();
+        rreg = right->as_float_reg();
+        res  = dest->as_float_reg();
+        switch (code) {
+          case lir_add: __ fadds(res, lreg, rreg); break;
+          case lir_sub: __ fsubs(res, lreg, rreg); break;
+          case lir_mul: // fall through
+          case lir_mul_strictfp: __ fmuls(res, lreg, rreg); break;
+          case lir_div: // fall through
+          case lir_div_strictfp: __ fdivs(res, lreg, rreg); break;
+          default: ShouldNotReachHere();
+        }
+      } else {
+        lreg = left->as_double_reg();
+        rreg = right->as_double_reg();
+        res  = dest->as_double_reg();
+        switch (code) {
+          case lir_add: __ fadd(res, lreg, rreg); break;
+          case lir_sub: __ fsub(res, lreg, rreg); break;
+          case lir_mul: // fall through
+          case lir_mul_strictfp: __ fmul(res, lreg, rreg); break;
+          case lir_div: // fall through
+          case lir_div_strictfp: __ fdiv(res, lreg, rreg); break;
+          default: ShouldNotReachHere();
+        }
+      }
+
+    } else if (dest->is_double_cpu()) {
+
+      Register dst_lo = dest->as_register_lo();
+      Register op1_lo = left->as_pointer_register();
+      Register op2_lo = right->as_pointer_register();
+
+      switch (code) {
+        case lir_add: __ add(dst_lo, op1_lo, op2_lo); break;
+        case lir_sub: __ sub(dst_lo, op1_lo, op2_lo); break;
+        case lir_mul: __ mulld(dst_lo, op1_lo, op2_lo); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      assert (right->is_single_cpu(), "Just Checking");
+
+      Register lreg = left->as_register();
+      Register res  = dest->as_register();
+      Register rreg = right->as_register();
+      switch (code) {
+        case lir_add:  __ add  (res, lreg, rreg); break;
+        case lir_sub:  __ sub  (res, lreg, rreg); break;
+        case lir_mul:  __ mullw(res, lreg, rreg); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else {
+    assert (right->is_constant(), "must be constant");
+
+    if (dest->is_single_cpu()) {
+      Register lreg = left->as_register();
+      Register res  = dest->as_register();
+      int    simm16 = right->as_constant_ptr()->as_jint();
+
+      switch (code) {
+        case lir_sub:  assert(Assembler::is_simm16(-simm16), "cannot encode"); // see do_ArithmeticOp_Int
+                       simm16 = -simm16;
+        case lir_add:  if (res == lreg && simm16 == 0) break;
+                       __ addi(res, lreg, simm16); break;
+        case lir_mul:  if (res == lreg && simm16 == 1) break;
+                       __ mulli(res, lreg, simm16); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      Register lreg = left->as_pointer_register();
+      Register res  = dest->as_register_lo();
+      long con = right->as_constant_ptr()->as_jlong();
+      assert(Assembler::is_simm16(con), "must be simm16");
+
+      switch (code) {
+        case lir_sub:  assert(Assembler::is_simm16(-con), "cannot encode");  // see do_ArithmeticOp_Long
+                       con = -con;
+        case lir_add:  if (res == lreg && con == 0) break;
+                       __ addi(res, lreg, (int)con); break;
+        case lir_mul:  if (res == lreg && con == 1) break;
+                       __ mulli(res, lreg, (int)con); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  }
+}
+
+
+void LIR_Assembler::fpop() {
+  Unimplemented();
+  // do nothing
+}
+
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
+  switch (code) {
+    case lir_sqrt: {
+      __ fsqrt(dest->as_double_reg(), value->as_double_reg());
+      break;
+    }
+    case lir_abs: {
+      __ fabs(dest->as_double_reg(), value->as_double_reg());
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
+  if (right->is_constant()) { // see do_LogicOp
+    long uimm;
+    Register d, l;
+    if (dest->is_single_cpu()) {
+      uimm = right->as_constant_ptr()->as_jint();
+      d = dest->as_register();
+      l = left->as_register();
+    } else {
+      uimm = right->as_constant_ptr()->as_jlong();
+      d = dest->as_register_lo();
+      l = left->as_register_lo();
+    }
+    long uimms  = (unsigned long)uimm >> 16,
+         uimmss = (unsigned long)uimm >> 32;
+
+    switch (code) {
+      case lir_logic_and:
+        if (uimmss != 0 || (uimms != 0 && (uimm & 0xFFFF) != 0) || is_power_of_2_long(uimm)) {
+          __ andi(d, l, uimm); // special cases
+        } else if (uimms != 0) { __ andis_(d, l, uimms); }
+        else { __ andi_(d, l, uimm); }
+        break;
+
+      case lir_logic_or:
+        if (uimms != 0) { assert((uimm & 0xFFFF) == 0, "sanity"); __ oris(d, l, uimms); }
+        else { __ ori(d, l, uimm); }
+        break;
+
+      case lir_logic_xor:
+        if (uimm == -1) { __ nand(d, l, l); } // special case
+        else if (uimms != 0) { assert((uimm & 0xFFFF) == 0, "sanity"); __ xoris(d, l, uimms); }
+        else { __ xori(d, l, uimm); }
+        break;
+
+      default: ShouldNotReachHere();
+    }
+  } else {
+    assert(right->is_register(), "right should be in register");
+
+    if (dest->is_single_cpu()) {
+      switch (code) {
+        case lir_logic_and: __ andr(dest->as_register(), left->as_register(), right->as_register()); break;
+        case lir_logic_or:  __ orr (dest->as_register(), left->as_register(), right->as_register()); break;
+        case lir_logic_xor: __ xorr(dest->as_register(), left->as_register(), right->as_register()); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      Register l = (left->is_single_cpu() && left->is_oop_register()) ? left->as_register() :
+                                                                        left->as_register_lo();
+      Register r = (right->is_single_cpu() && right->is_oop_register()) ? right->as_register() :
+                                                                          right->as_register_lo();
+
+      switch (code) {
+        case lir_logic_and: __ andr(dest->as_register_lo(), l, r); break;
+        case lir_logic_or:  __ orr (dest->as_register_lo(), l, r); break;
+        case lir_logic_xor: __ xorr(dest->as_register_lo(), l, r); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  }
+}
+
+
+int LIR_Assembler::shift_amount(BasicType t) {
+  int elem_size = type2aelembytes(t);
+  switch (elem_size) {
+    case 1 : return 0;
+    case 2 : return 1;
+    case 4 : return 2;
+    case 8 : return 3;
+  }
+  ShouldNotReachHere();
+  return -1;
+}
+
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+  info->add_register_oop(exceptionOop);
+
+  // Reuse the debug info from the safepoint poll for the throw op itself.
+  address pc_for_athrow = __ pc();
+  int pc_for_athrow_offset = __ offset();
+  //RelocationHolder rspec = internal_word_Relocation::spec(pc_for_athrow);
+  //__ relocate(rspec);
+  //__ load_const(exceptionPC->as_register(), pc_for_athrow, R0);
+  __ calculate_address_from_global_toc(exceptionPC->as_register(), pc_for_athrow, true, true, /*add_relocation*/ true);
+  add_call_info(pc_for_athrow_offset, info); // for exception handler
+
+  address stub = Runtime1::entry_for(compilation()->has_fpu_code() ? Runtime1::handle_exception_id
+                                                                   : Runtime1::handle_exception_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ bctr();
+}
+
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+  // Note: Not used with EnableDebuggingOnDemand.
+  assert(exceptionOop->as_register() == R3, "should match");
+  __ b(_unwind_handler_entry);
+}
+
+
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+  Register src = op->src()->as_register();
+  Register dst = op->dst()->as_register();
+  Register src_pos = op->src_pos()->as_register();
+  Register dst_pos = op->dst_pos()->as_register();
+  Register length  = op->length()->as_register();
+  Register tmp = op->tmp()->as_register();
+  Register tmp2 = R0;
+
+  int flags = op->flags();
+  ciArrayKlass* default_type = op->expected_type();
+  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+  if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+  // Set up the arraycopy stub information.
+  ArrayCopyStub* stub = op->stub();
+  const int frame_resize = frame::abi_reg_args_size - sizeof(frame::jit_abi); // C calls need larger frame.
+
+  // Always do stub if no type information is available. It's ok if
+  // the known type isn't loaded since the code sanity checks
+  // in debug mode and the type isn't required when we know the exact type
+  // also check that the type is an array type.
+  if (op->expected_type() == NULL) {
+    assert(src->is_nonvolatile() && src_pos->is_nonvolatile() && dst->is_nonvolatile() && dst_pos->is_nonvolatile() &&
+           length->is_nonvolatile(), "must preserve");
+    // 3 parms are int. Convert to long.
+    __ mr(R3_ARG1, src);
+    __ extsw(R4_ARG2, src_pos);
+    __ mr(R5_ARG3, dst);
+    __ extsw(R6_ARG4, dst_pos);
+    __ extsw(R7_ARG5, length);
+    address copyfunc_addr = StubRoutines::generic_arraycopy();
+
+    if (copyfunc_addr == NULL) { // Use C version if stub was not generated.
+      address entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
+      __ call_c_with_frame_resize(entry, frame_resize);
+    } else {
+#ifndef PRODUCT
+      if (PrintC1Statistics) {
+        address counter = (address)&Runtime1::_generic_arraycopystub_cnt;
+        int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+        __ lwz(R11_scratch1, simm16_offs, tmp);
+        __ addi(R11_scratch1, R11_scratch1, 1);
+        __ stw(R11_scratch1, simm16_offs, tmp);
+      }
+#endif
+      __ call_c_with_frame_resize(copyfunc_addr, /*stub does not need resized frame*/ 0);
+
+      __ nand(tmp, R3_RET, R3_RET);
+      __ subf(length, tmp, length);
+      __ add(src_pos, tmp, src_pos);
+      __ add(dst_pos, tmp, dst_pos);
+    }
+
+    __ cmpwi(CCR0, R3_RET, 0);
+    __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::less), *stub->entry());
+    __ bind(*stub->continuation());
+    return;
+  }
+
+  assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point");
+  Label cont, slow, copyfunc;
+
+  bool simple_check_flag_set = flags & (LIR_OpArrayCopy::src_null_check |
+                                        LIR_OpArrayCopy::dst_null_check |
+                                        LIR_OpArrayCopy::src_pos_positive_check |
+                                        LIR_OpArrayCopy::dst_pos_positive_check |
+                                        LIR_OpArrayCopy::length_positive_check);
+
+  // Use only one conditional branch for simple checks.
+  if (simple_check_flag_set) {
+    ConditionRegister combined_check = CCR1, tmp_check = CCR1;
+
+    // Make sure src and dst are non-null.
+    if (flags & LIR_OpArrayCopy::src_null_check) {
+      __ cmpdi(combined_check, src, 0);
+      tmp_check = CCR0;
+    }
+
+    if (flags & LIR_OpArrayCopy::dst_null_check) {
+      __ cmpdi(tmp_check, dst, 0);
+      if (tmp_check != combined_check) {
+        __ cror(combined_check, Assembler::equal, tmp_check, Assembler::equal);
+      }
+      tmp_check = CCR0;
+    }
+
+    // Clear combined_check.eq if not already used.
+    if (tmp_check == combined_check) {
+      __ crandc(combined_check, Assembler::equal, combined_check, Assembler::equal);
+      tmp_check = CCR0;
+    }
+
+    if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+      // Test src_pos register.
+      __ cmpwi(tmp_check, src_pos, 0);
+      __ cror(combined_check, Assembler::equal, tmp_check, Assembler::less);
+    }
+
+    if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+      // Test dst_pos register.
+      __ cmpwi(tmp_check, dst_pos, 0);
+      __ cror(combined_check, Assembler::equal, tmp_check, Assembler::less);
+    }
+
+    if (flags & LIR_OpArrayCopy::length_positive_check) {
+      // Make sure length isn't negative.
+      __ cmpwi(tmp_check, length, 0);
+      __ cror(combined_check, Assembler::equal, tmp_check, Assembler::less);
+    }
+
+    __ beq(combined_check, slow);
+  }
+
+  // Higher 32bits must be null.
+  __ extsw(length, length);
+
+  __ extsw(src_pos, src_pos);
+  if (flags & LIR_OpArrayCopy::src_range_check) {
+    __ lwz(tmp2, arrayOopDesc::length_offset_in_bytes(), src);
+    __ add(tmp, length, src_pos);
+    __ cmpld(CCR0, tmp2, tmp);
+    __ ble(CCR0, slow);
+  }
+
+  __ extsw(dst_pos, dst_pos);
+  if (flags & LIR_OpArrayCopy::dst_range_check) {
+    __ lwz(tmp2, arrayOopDesc::length_offset_in_bytes(), dst);
+    __ add(tmp, length, dst_pos);
+    __ cmpld(CCR0, tmp2, tmp);
+    __ ble(CCR0, slow);
+  }
+
+  int shift = shift_amount(basic_type);
+
+  if (!(flags & LIR_OpArrayCopy::type_check)) {
+    __ b(cont);
+  } else {
+    // We don't know the array types are compatible.
+    if (basic_type != T_OBJECT) {
+      // Simple test for basic type arrays.
+      if (UseCompressedClassPointers) {
+        // We don't need decode because we just need to compare.
+        __ lwz(tmp, oopDesc::klass_offset_in_bytes(), src);
+        __ lwz(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+        __ cmpw(CCR0, tmp, tmp2);
+      } else {
+        __ ld(tmp, oopDesc::klass_offset_in_bytes(), src);
+        __ ld(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+        __ cmpd(CCR0, tmp, tmp2);
+      }
+      __ beq(CCR0, cont);
+    } else {
+      // For object arrays, if src is a sub class of dst then we can
+      // safely do the copy.
+      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+
+      const Register sub_klass = R5, super_klass = R4; // like CheckCast/InstanceOf
+      assert_different_registers(tmp, tmp2, sub_klass, super_klass);
+
+      __ load_klass(sub_klass, src);
+      __ load_klass(super_klass, dst);
+
+      __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp, tmp2,
+                                       &cont, copyfunc_addr != NULL ? &copyfunc : &slow, NULL);
+
+      address slow_stc = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+      //__ load_const_optimized(tmp, slow_stc, tmp2);
+      __ calculate_address_from_global_toc(tmp, slow_stc, true, true, false);
+      __ mtctr(tmp);
+      __ bctrl(); // sets CR0
+      __ beq(CCR0, cont);
+
+      if (copyfunc_addr != NULL) { // Use stub if available.
+        __ bind(copyfunc);
+        // Src is not a sub class of dst so we have to do a
+        // per-element check.
+        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+        if ((flags & mask) != mask) {
+          assert(flags & mask, "one of the two should be known to be an object array");
+
+          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+            __ load_klass(tmp, src);
+          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+            __ load_klass(tmp, dst);
+          }
+
+          __ lwz(tmp2, in_bytes(Klass::layout_helper_offset()), tmp);
+
+          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+          __ load_const_optimized(tmp, objArray_lh);
+          __ cmpw(CCR0, tmp, tmp2);
+          __ bne(CCR0, slow);
+        }
+
+        Register src_ptr = R3_ARG1;
+        Register dst_ptr = R4_ARG2;
+        Register len     = R5_ARG3;
+        Register chk_off = R6_ARG4;
+        Register super_k = R7_ARG5;
+
+        __ addi(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type));
+        __ addi(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type));
+        if (shift == 0) {
+          __ add(src_ptr, src_pos, src_ptr);
+          __ add(dst_ptr, dst_pos, dst_ptr);
+        } else {
+          __ sldi(tmp, src_pos, shift);
+          __ sldi(tmp2, dst_pos, shift);
+          __ add(src_ptr, tmp, src_ptr);
+          __ add(dst_ptr, tmp2, dst_ptr);
+        }
+
+        __ load_klass(tmp, dst);
+        __ mr(len, length);
+
+        int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+        __ ld(super_k, ek_offset, tmp);
+
+        int sco_offset = in_bytes(Klass::super_check_offset_offset());
+        __ lwz(chk_off, sco_offset, super_k);
+
+        __ call_c_with_frame_resize(copyfunc_addr, /*stub does not need resized frame*/ 0);
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          Label failed;
+          __ cmpwi(CCR0, R3_RET, 0);
+          __ bne(CCR0, failed);
+          address counter = (address)&Runtime1::_arraycopy_checkcast_cnt;
+          int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+          __ lwz(R11_scratch1, simm16_offs, tmp);
+          __ addi(R11_scratch1, R11_scratch1, 1);
+          __ stw(R11_scratch1, simm16_offs, tmp);
+          __ bind(failed);
+        }
+#endif
+
+        __ nand(tmp, R3_RET, R3_RET);
+        __ cmpwi(CCR0, R3_RET, 0);
+        __ beq(CCR0, *stub->continuation());
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          address counter = (address)&Runtime1::_arraycopy_checkcast_attempt_cnt;
+          int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+          __ lwz(R11_scratch1, simm16_offs, tmp);
+          __ addi(R11_scratch1, R11_scratch1, 1);
+          __ stw(R11_scratch1, simm16_offs, tmp);
+        }
+#endif
+
+        __ subf(length, tmp, length);
+        __ add(src_pos, tmp, src_pos);
+        __ add(dst_pos, tmp, dst_pos);
+      }
+    }
+  }
+  __ bind(slow);
+  __ b(*stub->entry());
+  __ bind(cont);
+
+#ifdef ASSERT
+  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+    // Sanity check the known type with the incoming class. For the
+    // primitive case the types must match exactly with src.klass and
+    // dst.klass each exactly matching the default type. For the
+    // object array case, if no type check is needed then either the
+    // dst type is exactly the expected type and the src type is a
+    // subtype which we can't check or src is the same array as dst
+    // but not necessarily exactly of type default_type.
+    Label known_ok, halt;
+    metadata2reg(op->expected_type()->constant_encoding(), tmp);
+    if (UseCompressedClassPointers) {
+      // Tmp holds the default type. It currently comes uncompressed after the
+      // load of a constant, so encode it.
+      __ encode_klass_not_null(tmp);
+      // Load the raw value of the dst klass, since we will be comparing
+      // uncompressed values directly.
+      __ lwz(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+      __ cmpw(CCR0, tmp, tmp2);
+      if (basic_type != T_OBJECT) {
+        __ bne(CCR0, halt);
+        // Load the raw value of the src klass.
+        __ lwz(tmp2, oopDesc::klass_offset_in_bytes(), src);
+        __ cmpw(CCR0, tmp, tmp2);
+        __ beq(CCR0, known_ok);
+      } else {
+        __ beq(CCR0, known_ok);
+        __ cmpw(CCR0, src, dst);
+        __ beq(CCR0, known_ok);
+      }
+    } else {
+      __ ld(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+      __ cmpd(CCR0, tmp, tmp2);
+      if (basic_type != T_OBJECT) {
+        __ bne(CCR0, halt);
+        // Load the raw value of the src klass.
+        __ ld(tmp2, oopDesc::klass_offset_in_bytes(), src);
+        __ cmpd(CCR0, tmp, tmp2);
+        __ beq(CCR0, known_ok);
+      } else {
+        __ beq(CCR0, known_ok);
+        __ cmpd(CCR0, src, dst);
+        __ beq(CCR0, known_ok);
+      }
+    }
+    __ bind(halt);
+    __ stop("incorrect type information in arraycopy");
+    __ bind(known_ok);
+  }
+#endif
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    address counter = Runtime1::arraycopy_count_address(basic_type);
+    int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+    __ lwz(R11_scratch1, simm16_offs, tmp);
+    __ addi(R11_scratch1, R11_scratch1, 1);
+    __ stw(R11_scratch1, simm16_offs, tmp);
+  }
+#endif
+
+  Register src_ptr = R3_ARG1;
+  Register dst_ptr = R4_ARG2;
+  Register len     = R5_ARG3;
+
+  __ addi(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type));
+  __ addi(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type));
+  if (shift == 0) {
+    __ add(src_ptr, src_pos, src_ptr);
+    __ add(dst_ptr, dst_pos, dst_ptr);
+  } else {
+    __ sldi(tmp, src_pos, shift);
+    __ sldi(tmp2, dst_pos, shift);
+    __ add(src_ptr, tmp, src_ptr);
+    __ add(dst_ptr, tmp2, dst_ptr);
+  }
+
+  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+  const char *name;
+  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+
+  // Arraycopy stubs takes a length in number of elements, so don't scale it.
+  __ mr(len, length);
+  __ call_c_with_frame_resize(entry, /*stub does not need resized frame*/ 0);
+
+  __ bind(*stub->continuation());
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  if (dest->is_single_cpu()) {
+    __ rldicl(tmp->as_register(), count->as_register(), 0, 64-5);
+#ifdef _LP64
+    if (left->type() == T_OBJECT) {
+      switch (code) {
+        case lir_shl:  __ sld(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_shr:  __ srad(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_ushr: __ srd(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        default: ShouldNotReachHere();
+      }
+    } else
+#endif
+      switch (code) {
+        case lir_shl:  __ slw(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_shr:  __ sraw(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_ushr: __ srw(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        default: ShouldNotReachHere();
+      }
+  } else {
+    __ rldicl(tmp->as_register(), count->as_register(), 0, 64-6);
+    switch (code) {
+      case lir_shl:  __ sld(dest->as_register_lo(), left->as_register_lo(), tmp->as_register()); break;
+      case lir_shr:  __ srad(dest->as_register_lo(), left->as_register_lo(), tmp->as_register()); break;
+      case lir_ushr: __ srd(dest->as_register_lo(), left->as_register_lo(), tmp->as_register()); break;
+      default: ShouldNotReachHere();
+    }
+  }
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+#ifdef _LP64
+  if (left->type() == T_OBJECT) {
+    count = count & 63;  // Shouldn't shift by more than sizeof(intptr_t).
+    if (count == 0) { __ mr_if_needed(dest->as_register_lo(), left->as_register()); }
+    else {
+      switch (code) {
+        case lir_shl:  __ sldi(dest->as_register_lo(), left->as_register(), count); break;
+        case lir_shr:  __ sradi(dest->as_register_lo(), left->as_register(), count); break;
+        case lir_ushr: __ srdi(dest->as_register_lo(), left->as_register(), count); break;
+        default: ShouldNotReachHere();
+      }
+    }
+    return;
+  }
+#endif
+
+  if (dest->is_single_cpu()) {
+    count = count & 0x1F; // Java spec
+    if (count == 0) { __ mr_if_needed(dest->as_register(), left->as_register()); }
+    else {
+      switch (code) {
+        case lir_shl: __ slwi(dest->as_register(), left->as_register(), count); break;
+        case lir_shr:  __ srawi(dest->as_register(), left->as_register(), count); break;
+        case lir_ushr: __ srwi(dest->as_register(), left->as_register(), count); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else if (dest->is_double_cpu()) {
+    count = count & 63; // Java spec
+    if (count == 0) { __ mr_if_needed(dest->as_pointer_register(), left->as_pointer_register()); }
+    else {
+      switch (code) {
+        case lir_shl:  __ sldi(dest->as_pointer_register(), left->as_pointer_register(), count); break;
+        case lir_shr:  __ sradi(dest->as_pointer_register(), left->as_pointer_register(), count); break;
+        case lir_ushr: __ srdi(dest->as_pointer_register(), left->as_pointer_register(), count); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+  if (op->init_check()) {
+    if (!os::zero_page_read_protected() || !ImplicitNullChecks) {
+      explicit_null_check(op->klass()->as_register(), op->stub()->info());
+    } else {
+      add_debug_info_for_null_check_here(op->stub()->info());
+    }
+    __ lbz(op->tmp1()->as_register(),
+           in_bytes(InstanceKlass::init_state_offset()), op->klass()->as_register());
+    __ cmpwi(CCR0, op->tmp1()->as_register(), InstanceKlass::fully_initialized);
+    __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *op->stub()->entry());
+  }
+  __ allocate_object(op->obj()->as_register(),
+                     op->tmp1()->as_register(),
+                     op->tmp2()->as_register(),
+                     op->tmp3()->as_register(),
+                     op->header_size(),
+                     op->object_size(),
+                     op->klass()->as_register(),
+                     *op->stub()->entry());
+
+  __ bind(*op->stub()->continuation());
+  __ verify_oop(op->obj()->as_register());
+}
+
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+  LP64_ONLY( __ extsw(op->len()->as_register(), op->len()->as_register()); )
+  if (UseSlowPath ||
+      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+    __ b(*op->stub()->entry());
+  } else {
+    __ allocate_array(op->obj()->as_register(),
+                      op->len()->as_register(),
+                      op->tmp1()->as_register(),
+                      op->tmp2()->as_register(),
+                      op->tmp3()->as_register(),
+                      arrayOopDesc::header_size(op->type()),
+                      type2aelembytes(op->type()),
+                      op->klass()->as_register(),
+                      *op->stub()->entry());
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+
+void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias,
+                                        ciMethodData *md, ciProfileData *data,
+                                        Register recv, Register tmp1, Label* update_done) {
+  uint i;
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
+    __ verify_klass_ptr(tmp1);
+    __ cmpd(CCR0, recv, tmp1);
+    __ bne(CCR0, next_test);
+
+    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+    __ addi(tmp1, tmp1, DataLayout::counter_increment);
+    __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+    __ b(*update_done);
+
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in.
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
+    __ cmpdi(CCR0, tmp1, 0);
+    __ bne(CCR0, next_test);
+    __ li(tmp1, DataLayout::counter_increment);
+    __ std(recv, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
+    __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+    __ b(*update_done);
+
+    __ bind(next_test);
+  }
+}
+
+
+void LIR_Assembler::setup_md_access(ciMethod* method, int bci,
+                                    ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) {
+  md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  data = md->bci_to_data(bci);
+  assert(data != NULL,       "need data for checkcast");
+  assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+  if (!Assembler::is_simm16(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) {
+    // The offset is large so bias the mdo by the base of the slot so
+    // that the ld can use simm16s to reference the slots of the data.
+    mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
+  }
+}
+
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register Rtmp1 = op->tmp3()->as_register();
+  Register dst = op->result_opr()->as_register();
+  ciKlass* k = op->klass();
+  bool should_profile = op->should_profile();
+  bool move_obj_to_dst = (op->code() == lir_checkcast);
+  // Attention: do_temp(opTypeCheck->_object) is not used, i.e. obj may be same as one of the temps.
+  bool reg_conflict = (obj == k_RInfo || obj == klass_RInfo || obj == Rtmp1);
+  bool restore_obj = move_obj_to_dst && reg_conflict;
+
+  __ cmpdi(CCR0, obj, 0);
+  if (move_obj_to_dst || reg_conflict) {
+    __ mr_if_needed(dst, obj);
+    if (reg_conflict) { obj = dst; }
+  }
+
+  ciMethodData* md;
+  ciProfileData* data;
+  int mdo_offset_bias = 0;
+  if (should_profile) {
+    ciMethod* method = op->profiled_method();
+    assert(method != NULL, "Should have method");
+    setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
+
+    Register mdo      = k_RInfo;
+    Register data_val = Rtmp1;
+    Label not_null;
+    __ bne(CCR0, not_null);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+    __ lbz(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+    __ ori(data_val, data_val, BitData::null_seen_byte_constant());
+    __ stb(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+    __ b(*obj_is_null);
+    __ bind(not_null);
+  } else {
+    __ beq(CCR0, *obj_is_null);
+  }
+
+  // get object class
+  __ load_klass(klass_RInfo, obj);
+
+  if (k->is_loaded()) {
+    metadata2reg(k->constant_encoding(), k_RInfo);
+  } else {
+    klass2reg_with_patching(k_RInfo, op->info_for_patch());
+  }
+
+  Label profile_cast_failure, failure_restore_obj, profile_cast_success;
+  Label *failure_target = should_profile ? &profile_cast_failure : failure;
+  Label *success_target = should_profile ? &profile_cast_success : success;
+
+  if (op->fast_check()) {
+    assert_different_registers(klass_RInfo, k_RInfo);
+    __ cmpd(CCR0, k_RInfo, klass_RInfo);
+    if (should_profile) {
+      __ bne(CCR0, *failure_target);
+      // Fall through to success case.
+    } else {
+      __ beq(CCR0, *success);
+      // Fall through to failure case.
+    }
+  } else {
+    bool need_slow_path = true;
+    if (k->is_loaded()) {
+      if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset())) {
+        need_slow_path = false;
+      }
+      // Perform the fast part of the checking logic.
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, R0, (need_slow_path ? success_target : NULL),
+                                       failure_target, NULL, RegisterOrConstant(k->super_check_offset()));
+    } else {
+      // Perform the fast part of the checking logic.
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, R0, success_target, failure_target);
+    }
+    if (!need_slow_path) {
+      if (!should_profile) { __ b(*success); }
+    } else {
+      // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+      address entry = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+      //__ load_const_optimized(Rtmp1, entry, R0);
+      __ calculate_address_from_global_toc(Rtmp1, entry, true, true, false);
+      __ mtctr(Rtmp1);
+      __ bctrl(); // sets CR0
+      if (should_profile) {
+        __ bne(CCR0, *failure_target);
+        // Fall through to success case.
+      } else {
+        __ beq(CCR0, *success);
+        // Fall through to failure case.
+      }
+    }
+  }
+
+  if (should_profile) {
+    Register mdo = k_RInfo, recv = klass_RInfo;
+    assert_different_registers(mdo, recv, Rtmp1);
+    __ bind(profile_cast_success);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1, success);
+    __ b(*success);
+
+    // Cast failure case.
+    __ bind(profile_cast_failure);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+    __ ld(Rtmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+    __ addi(Rtmp1, Rtmp1, -DataLayout::counter_increment);
+    __ std(Rtmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+  }
+
+  __ bind(*failure);
+
+  if (restore_obj) {
+    __ mr(op->object()->as_register(), dst);
+    // Fall through to failure case.
+  }
+}
+
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  LIR_Code code = op->code();
+  if (code == lir_store_check) {
+    Register value = op->object()->as_register();
+    Register array = op->array()->as_register();
+    Register k_RInfo = op->tmp1()->as_register();
+    Register klass_RInfo = op->tmp2()->as_register();
+    Register Rtmp1 = op->tmp3()->as_register();
+    bool should_profile = op->should_profile();
+
+    __ verify_oop(value);
+    CodeStub* stub = op->stub();
+    // Check if it needs to be profiled.
+    ciMethodData* md;
+    ciProfileData* data;
+    int mdo_offset_bias = 0;
+    if (should_profile) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
+    }
+    Label profile_cast_success, failure, done;
+    Label *success_target = should_profile ? &profile_cast_success : &done;
+
+    __ cmpdi(CCR0, value, 0);
+    if (should_profile) {
+      Label not_null;
+      __ bne(CCR0, not_null);
+      Register mdo      = k_RInfo;
+      Register data_val = Rtmp1;
+      metadata2reg(md->constant_encoding(), mdo);
+      __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+      __ lbz(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+      __ ori(data_val, data_val, BitData::null_seen_byte_constant());
+      __ stb(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+      __ b(done);
+      __ bind(not_null);
+    } else {
+      __ beq(CCR0, done);
+    }
+    if (!os::zero_page_read_protected() || !ImplicitNullChecks) {
+      explicit_null_check(array, op->info_for_exception());
+    } else {
+      add_debug_info_for_null_check_here(op->info_for_exception());
+    }
+    __ load_klass(k_RInfo, array);
+    __ load_klass(klass_RInfo, value);
+
+    // Get instance klass.
+    __ ld(k_RInfo, in_bytes(ObjArrayKlass::element_klass_offset()), k_RInfo);
+    // Perform the fast part of the checking logic.
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, R0, success_target, &failure, NULL);
+
+    // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    const address slow_path = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+    //__ load_const_optimized(R0, slow_path);
+    __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(slow_path));
+    __ mtctr(R0);
+    __ bctrl(); // sets CR0
+    if (!should_profile) {
+      __ beq(CCR0, done);
+      __ bind(failure);
+    } else {
+      __ bne(CCR0, failure);
+      // Fall through to the success case.
+
+      Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1;
+      assert_different_registers(value, mdo, recv, tmp1);
+      __ bind(profile_cast_success);
+      metadata2reg(md->constant_encoding(), mdo);
+      __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+      __ load_klass(recv, value);
+      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &done);
+      __ b(done);
+
+      // Cast failure case.
+      __ bind(failure);
+      metadata2reg(md->constant_encoding(), mdo);
+      __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+      Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+      __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+      __ addi(tmp1, tmp1, -DataLayout::counter_increment);
+      __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+    }
+    __ b(*stub->entry());
+    __ bind(done);
+
+  } else if (code == lir_checkcast) {
+    Label success, failure;
+    emit_typecheck_helper(op, &success, /*fallthru*/&failure, &success); // Moves obj to dst.
+    __ b(*op->stub()->entry());
+    __ align(32, 12);
+    __ bind(success);
+  } else if (code == lir_instanceof) {
+    Register dst = op->result_opr()->as_register();
+    Label success, failure, done;
+    emit_typecheck_helper(op, &success, /*fallthru*/&failure, &failure);
+    __ li(dst, 0);
+    __ b(done);
+    __ align(32, 12);
+    __ bind(success);
+    __ li(dst, 1);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  Register addr = op->addr()->as_pointer_register();
+  Register cmp_value = noreg, new_value = noreg;
+  bool is_64bit = false;
+
+  if (op->code() == lir_cas_long) {
+    cmp_value = op->cmp_value()->as_register_lo();
+    new_value = op->new_value()->as_register_lo();
+    is_64bit = true;
+  } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
+    cmp_value = op->cmp_value()->as_register();
+    new_value = op->new_value()->as_register();
+    if (op->code() == lir_cas_obj) {
+      if (UseCompressedOops) {
+        Register t1 = op->tmp1()->as_register();
+        Register t2 = op->tmp2()->as_register();
+        cmp_value = __ encode_heap_oop(t1, cmp_value);
+        new_value = __ encode_heap_oop(t2, new_value);
+      } else {
+        is_64bit = true;
+      }
+    }
+  } else {
+    Unimplemented();
+  }
+
+  if (is_64bit) {
+    __ cmpxchgd(BOOL_RESULT, /*current_value=*/R0, cmp_value, new_value, addr,
+                MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, NULL, /*check without ldarx first*/true);
+  } else {
+    __ cmpxchgw(BOOL_RESULT, /*current_value=*/R0, cmp_value, new_value, addr,
+                MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, /*check without ldarx first*/true);
+  }
+}
+
+
+void LIR_Assembler::set_24bit_FPU() {
+  Unimplemented();
+}
+
+void LIR_Assembler::reset_FPU() {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::breakpoint() {
+  __ illtrap();
+}
+
+
+void LIR_Assembler::push(LIR_Opr opr) {
+  Unimplemented();
+}
+
+void LIR_Assembler::pop(LIR_Opr opr) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) {
+  Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no);
+  Register dst = dst_opr->as_register();
+  Register reg = mon_addr.base();
+  int offset = mon_addr.disp();
+  // Compute pointer to BasicLock.
+  __ add_const_optimized(dst, reg, offset);
+}
+
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+  Register obj = op->obj_opr()->as_register();
+  Register hdr = op->hdr_opr()->as_register();
+  Register lock = op->lock_opr()->as_register();
+
+  // Obj may not be an oop.
+  if (op->code() == lir_lock) {
+    MonitorEnterStub* stub = (MonitorEnterStub*)op->stub();
+    if (UseFastLocking) {
+      assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+      // Add debug info for NullPointerException only if one is possible.
+      if (op->info() != NULL) {
+        if (!os::zero_page_read_protected() || !ImplicitNullChecks) {
+          explicit_null_check(obj, op->info());
+        } else {
+          add_debug_info_for_null_check_here(op->info());
+        }
+      }
+      __ lock_object(hdr, obj, lock, op->scratch_opr()->as_register(), *op->stub()->entry());
+    } else {
+      // always do slow locking
+      // note: The slow locking code could be inlined here, however if we use
+      //       slow locking, speed doesn't matter anyway and this solution is
+      //       simpler and requires less duplicated code - additionally, the
+      //       slow locking code is the same in either case which simplifies
+      //       debugging.
+      __ b(*op->stub()->entry());
+    }
+  } else {
+    assert (op->code() == lir_unlock, "Invalid code, expected lir_unlock");
+    if (UseFastLocking) {
+      assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+      __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+    } else {
+      // always do slow unlocking
+      // note: The slow unlocking code could be inlined here, however if we use
+      //       slow unlocking, speed doesn't matter anyway and this solution is
+      //       simpler and requires less duplicated code - additionally, the
+      //       slow unlocking code is the same in either case which simplifies
+      //       debugging.
+      __ b(*op->stub()->entry());
+    }
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+  ciMethod* method = op->profiled_method();
+  int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
+
+  // Update counter for all call types.
+  ciMethodData* md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  ciProfileData* data = md->bci_to_data(bci);
+  assert(data->is_CounterData(), "need CounterData for calls");
+  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo = op->mdo()->as_register();
+#ifdef _LP64
+  assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated");
+  Register tmp1 = op->tmp1()->as_register_lo();
+#else
+  assert(op->tmp1()->is_single_cpu(), "tmp1 must be allocated");
+  Register tmp1 = op->tmp1()->as_register();
+#endif
+  metadata2reg(md->constant_encoding(), mdo);
+  int mdo_offset_bias = 0;
+  if (!Assembler::is_simm16(md->byte_offset_of_slot(data, CounterData::count_offset()) +
+                            data->size_in_bytes())) {
+    // The offset is large so bias the mdo by the base of the slot so
+    // that the ld can use simm16s to reference the slots of the data.
+    mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset());
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+  }
+
+  Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+  // Perform additional virtual call profiling for invokevirtual and
+  // invokeinterface bytecodes.
+  if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // Required for optimized MH invokes.
+      C1ProfileVirtualCalls) {
+    assert(op->recv()->is_single_cpu(), "recv must be allocated");
+    Register recv = op->recv()->as_register();
+    assert_different_registers(mdo, tmp1, recv);
+    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+    ciKlass* known_klass = op->known_holder();
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+      // We know the type that will be seen at this call site; we can
+      // statically update the MethodData* rather than needing to do
+      // dynamic tests on the receiver type.
+
+      // NOTE: we should probably put a lock around this search to
+      // avoid collisions by concurrent compilations.
+      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+      uint i;
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (known_klass->equals(receiver)) {
+          __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          __ addi(tmp1, tmp1, DataLayout::counter_increment);
+          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          return;
+        }
+      }
+
+      // Receiver type not found in profile data; select an empty slot.
+
+      // Note that this is less efficient than it should be because it
+      // always does a write to the receiver part of the
+      // VirtualCallData rather than just the first time.
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (receiver == NULL) {
+          metadata2reg(known_klass->constant_encoding(), tmp1);
+          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) - mdo_offset_bias, mdo);
+
+          __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          __ addi(tmp1, tmp1, DataLayout::counter_increment);
+          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          return;
+        }
+      }
+    } else {
+      __ load_klass(recv, recv);
+      Label update_done;
+      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
+      // Receiver did not match any saved receiver and there is no empty row for it.
+      // Increment total counter to indicate polymorphic case.
+      __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+      __ addi(tmp1, tmp1, DataLayout::counter_increment);
+      __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+
+      __ bind(update_done);
+    }
+  } else {
+    // Static call
+    __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+    __ addi(tmp1, tmp1, DataLayout::counter_increment);
+    __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+  }
+}
+
+
+void LIR_Assembler::align_backward_branch_target() {
+  __ align(32, 12); // Insert up to 3 nops to align with 32 byte boundary.
+}
+
+
+void LIR_Assembler::emit_delay(LIR_OpDelay* op) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+  assert(left->is_register(), "can only handle registers");
+
+  if (left->is_single_cpu()) {
+    __ neg(dest->as_register(), left->as_register());
+  } else if (left->is_single_fpu()) {
+    __ fneg(dest->as_float_reg(), left->as_float_reg());
+  } else if (left->is_double_fpu()) {
+    __ fneg(dest->as_double_reg(), left->as_double_reg());
+  } else {
+    assert (left->is_double_cpu(), "Must be a long");
+    __ neg(dest->as_register_lo(), left->as_register_lo());
+  }
+}
+
+
+void LIR_Assembler::fxch(int i) {
+  Unimplemented();
+}
+
+void LIR_Assembler::fld(int i) {
+  Unimplemented();
+}
+
+void LIR_Assembler::ffree(int i) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest,
+                            const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+  // Stubs: Called via rt_call, but dest is a stub address (no function descriptor).
+  if (dest == Runtime1::entry_for(Runtime1::register_finalizer_id) ||
+      dest == Runtime1::entry_for(Runtime1::new_multi_array_id   )) {
+    //__ load_const_optimized(R0, dest);
+    __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(dest));
+    __ mtctr(R0);
+    __ bctrl();
+    assert(info != NULL, "sanity");
+    add_call_info_here(info);
+    return;
+  }
+
+  __ call_c_with_frame_resize(dest, /*no resizing*/ 0);
+  if (info != NULL) {
+    add_call_info_here(info);
+  }
+}
+
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+  ShouldNotReachHere(); // Not needed on _LP64.
+}
+
+void LIR_Assembler::membar() {
+  __ fence();
+}
+
+void LIR_Assembler::membar_acquire() {
+  __ acquire();
+}
+
+void LIR_Assembler::membar_release() {
+  __ release();
+}
+
+void LIR_Assembler::membar_loadload() {
+  __ membar(Assembler::LoadLoad);
+}
+
+void LIR_Assembler::membar_storestore() {
+  __ membar(Assembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadstore() {
+  __ membar(Assembler::LoadStore);
+}
+
+void LIR_Assembler::membar_storeload() {
+  __ membar(Assembler::StoreLoad);
+}
+
+
+void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
+  LIR_Address* addr = addr_opr->as_address_ptr();
+  assert(addr->scale() == LIR_Address::times_1, "no scaling on this platform");
+  if (addr->index()->is_illegal()) {
+    __ add_const_optimized(dest->as_pointer_register(), addr->base()->as_pointer_register(), addr->disp());
+  } else {
+    assert(addr->disp() == 0, "can't have both: index and disp");
+    __ add(dest->as_pointer_register(), addr->index()->as_pointer_register(), addr->base()->as_pointer_register());
+  }
+}
+
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+  ShouldNotReachHere();
+}
+
+
+#ifdef ASSERT
+// Emit run-time assertion.
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+  Unimplemented();
+}
+#endif
+
+
+void LIR_Assembler::peephole(LIR_List* lir) {
+  // Optimize instruction pairs before emitting.
+  LIR_OpList* inst = lir->instructions_list();
+  for (int i = 1; i < inst->length(); i++) {
+    LIR_Op* op = inst->at(i);
+
+    // 2 register-register-moves
+    if (op->code() == lir_move) {
+      LIR_Opr in2  = ((LIR_Op1*)op)->in_opr(),
+              res2 = ((LIR_Op1*)op)->result_opr();
+      if (in2->is_register() && res2->is_register()) {
+        LIR_Op* prev = inst->at(i - 1);
+        if (prev && prev->code() == lir_move) {
+          LIR_Opr in1  = ((LIR_Op1*)prev)->in_opr(),
+                  res1 = ((LIR_Op1*)prev)->result_opr();
+          if (in1->is_same_register(res2) && in2->is_same_register(res1)) {
+            inst->remove_at(i);
+          }
+        }
+      }
+    }
+
+  }
+  return;
+}
+
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
+  const Register Rptr = src->as_pointer_register(),
+                 Rtmp = tmp->as_register();
+  Register Rco = noreg;
+  if (UseCompressedOops && data->is_oop()) {
+    Rco = __ encode_heap_oop(Rtmp, data->as_register());
+  }
+
+  Label Lretry;
+  __ bind(Lretry);
+
+  if (data->type() == T_INT) {
+    const Register Rold = dest->as_register(),
+                   Rsrc = data->as_register();
+    assert_different_registers(Rptr, Rtmp, Rold, Rsrc);
+    __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (code == lir_xadd) {
+      __ add(Rtmp, Rsrc, Rold);
+      __ stwcx_(Rtmp, Rptr);
+    } else {
+      __ stwcx_(Rsrc, Rptr);
+    }
+  } else if (data->is_oop()) {
+    assert(code == lir_xchg, "xadd for oops");
+    const Register Rold = dest->as_register();
+    if (UseCompressedOops) {
+      assert_different_registers(Rptr, Rold, Rco);
+      __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+      __ stwcx_(Rco, Rptr);
+    } else {
+      const Register Robj = data->as_register();
+      assert_different_registers(Rptr, Rold, Robj);
+      __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+      __ stdcx_(Robj, Rptr);
+    }
+  } else if (data->type() == T_LONG) {
+    const Register Rold = dest->as_register_lo(),
+                   Rsrc = data->as_register_lo();
+    assert_different_registers(Rptr, Rtmp, Rold, Rsrc);
+    __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (code == lir_xadd) {
+      __ add(Rtmp, Rsrc, Rold);
+      __ stdcx_(Rtmp, Rptr);
+    } else {
+      __ stdcx_(Rsrc, Rptr);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    __ bne_predict_not_taken(CCR0, Lretry);
+  } else {
+    __ bne(                  CCR0, Lretry);
+  }
+
+  if (UseCompressedOops && data->is_oop()) {
+    __ decode_heap_oop(dest->as_register());
+  }
+}
+
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+  Register obj = op->obj()->as_register();
+  Register tmp = op->tmp()->as_pointer_register();
+  LIR_Address* mdo_addr = op->mdp()->as_address_ptr();
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label Lupdate, Ldo_update, Ldone;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+
+  __ verify_oop(obj);
+
+  if (do_null) {
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ cmpdi(CCR0, obj, 0);
+      __ bne(CCR0, Lupdate);
+      __ ld(R0, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+      __ ori(R0, R0, TypeEntries::null_seen);
+      if (do_update) {
+        __ b(Ldo_update);
+      } else {
+        __ std(R0, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+      }
+    } else {
+      if (do_update) {
+        __ cmpdi(CCR0, obj, 0);
+        __ beq(CCR0, Ldone);
+      }
+    }
+#ifdef ASSERT
+  } else {
+    __ cmpdi(CCR0, obj, 0);
+    __ bne(CCR0, Lupdate);
+    __ stop("unexpect null obj", 0x9652);
+#endif
+  }
+
+  __ bind(Lupdate);
+  if (do_update) {
+    Label Lnext;
+    const Register klass = R29_TOC; // kill and reload
+    bool klass_reg_used = false;
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      Label ok;
+      klass_reg_used = true;
+      __ load_klass(klass, obj);
+      metadata2reg(exact_klass->constant_encoding(), R0);
+      __ cmpd(CCR0, klass, R0);
+      __ beq(CCR0, ok);
+      __ stop("exact klass and actual klass differ", 0x8564);
+      __ bind(ok);
+    }
+#endif
+
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        klass_reg_used = true;
+        if (exact_klass != NULL) {
+          __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+          metadata2reg(exact_klass->constant_encoding(), klass);
+        } else {
+          __ load_klass(klass, obj);
+          __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register()); // may kill obj
+        }
+
+        // Like InterpreterMacroAssembler::profile_obj_type
+        __ clrrdi(R0, tmp, exact_log2(-TypeEntries::type_klass_mask));
+        // Basically same as andi(R0, tmp, TypeEntries::type_klass_mask);
+        __ cmpd(CCR1, R0, klass);
+        // Klass seen before, nothing to do (regardless of unknown bit).
+        //beq(CCR1, do_nothing);
+
+        __ andi_(R0, klass, TypeEntries::type_unknown);
+        // Already unknown. Nothing to do anymore.
+        //bne(CCR0, do_nothing);
+        __ crorc(CCR0, Assembler::equal, CCR1, Assembler::equal); // cr0 eq = cr1 eq or cr0 ne
+        __ beq(CCR0, Lnext);
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ clrrdi_(R0, tmp, exact_log2(-TypeEntries::type_mask));
+          __ orr(R0, klass, tmp); // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+          __ beq(CCR0, Ldo_update); // First time here. Set profile type.
+        }
+
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+        __ andi_(R0, tmp, TypeEntries::type_unknown);
+        // Already unknown. Nothing to do anymore.
+        __ bne(CCR0, Lnext);
+      }
+
+      // Different than before. Cannot keep accurate profile.
+      __ ori(R0, tmp, TypeEntries::type_unknown);
+    } else {
+      // There's a single possible klass at this profile point
+      assert(exact_klass != NULL, "should be");
+      __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+
+      if (TypeEntries::is_type_none(current_klass)) {
+        klass_reg_used = true;
+        metadata2reg(exact_klass->constant_encoding(), klass);
+
+        __ clrrdi(R0, tmp, exact_log2(-TypeEntries::type_klass_mask));
+        // Basically same as andi(R0, tmp, TypeEntries::type_klass_mask);
+        __ cmpd(CCR1, R0, klass);
+        // Klass seen before, nothing to do (regardless of unknown bit).
+        __ beq(CCR1, Lnext);
+#ifdef ASSERT
+        {
+          Label ok;
+          __ clrrdi_(R0, tmp, exact_log2(-TypeEntries::type_mask));
+          __ beq(CCR0, ok); // First time here.
+
+          __ stop("unexpected profiling mismatch", 0x7865);
+          __ bind(ok);
+        }
+#endif
+        // First time here. Set profile type.
+        __ orr(R0, klass, tmp); // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        // Already unknown. Nothing to do anymore.
+        __ andi_(R0, tmp, TypeEntries::type_unknown);
+        __ bne(CCR0, Lnext);
+
+        // Different than before. Cannot keep accurate profile.
+        __ ori(R0, tmp, TypeEntries::type_unknown);
+      }
+    }
+
+    __ bind(Ldo_update);
+    __ std(R0, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+
+    __ bind(Lnext);
+    if (klass_reg_used) { __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); } // reinit
+  }
+  __ bind(Ldone);
+}
+
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  assert(op->crc()->is_single_cpu(), "crc must be register");
+  assert(op->val()->is_single_cpu(), "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register res = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, res);
+
+  __ load_const_optimized(res, StubRoutines::crc_table_addr(), R0);
+  __ nand(crc, crc, crc); // ~crc
+  __ update_byte_crc32(crc, val, res);
+  __ nand(res, crc, crc); // ~crc
+}
+
+#undef __

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
+#define CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
+
+ private:
+
+  //////////////////////////////////////////////////////////////////////////////
+  // PPC64 load/store emission
+  //
+  // The PPC ld/st instructions cannot accomodate displacements > 16 bits long.
+  // The following "pseudo" instructions (load/store) make it easier to
+  // use the indexed addressing mode by allowing 32 bit displacements:
+  //
+
+  void explicit_null_check(Register addr, CodeEmitInfo* info);
+
+  int store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned);
+  int store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide);
+
+  int load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned);
+  int load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide);
+
+  int shift_amount(BasicType t);
+
+  // Record the type of the receiver in ReceiverTypeData.
+  void type_profile_helper(Register mdo, int mdo_offset_bias,
+                           ciMethodData *md, ciProfileData *data,
+                           Register recv, Register tmp1, Label* update_done);
+  // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
+  void setup_md_access(ciMethod* method, int bci,
+                       ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
+ public:
+  static const ConditionRegister BOOL_RESULT;
+
+  // Emit trampoline stub for call. Call bailout() if failed. Return true on success.
+  bool emit_trampoline_stub_for_call(address target, Register Rtoc = noreg);
+
+enum {
+  max_static_call_stub_size = 4 * BytesPerInstWord + MacroAssembler::b64_patchable_size,
+  call_stub_size = max_static_call_stub_size + MacroAssembler::trampoline_stub_size, // or smaller
+  exception_handler_size = MacroAssembler::b64_patchable_size, // or smaller
+  deopt_handler_size = MacroAssembler::bl64_patchable_size
+};
+
+#endif // CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,1429 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_ppc.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+void LIRItem::load_byte_item() {
+  // Byte loads use same registers as other loads.
+  load_item();
+}
+
+
+void LIRItem::load_nonconstant() {
+  LIR_Opr r = value()->operand();
+  if (_gen->can_inline_as_constant(value())) {
+    if (!r->is_constant()) {
+      r = LIR_OprFact::value_type(value()->type());
+    }
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+
+inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
+  LIR_Opr r = li.value()->operand();
+  if (r->is_register()) {
+    LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
+    ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
+  } else {
+    // Constants or memory get loaded with sign extend on this platform.
+    ll->move(li.result(), dst);
+  }
+}
+
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+
+LIR_Opr LIRGenerator::exceptionOopOpr()              { return FrameMap::R3_oop_opr; }
+LIR_Opr LIRGenerator::exceptionPcOpr()               { return FrameMap::R4_opr; }
+LIR_Opr LIRGenerator::syncLockOpr()                  { return FrameMap::R5_opr; }     // Need temp effect for MonitorEnterStub.
+LIR_Opr LIRGenerator::syncTempOpr()                  { return FrameMap::R4_oop_opr; } // Need temp effect for MonitorEnterStub.
+LIR_Opr LIRGenerator::getThreadTemp()                { return LIR_OprFact::illegalOpr; } // not needed
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+  case intTag:     opr = FrameMap::R3_opr;         break;
+  case objectTag:  opr = FrameMap::R3_oop_opr;     break;
+  case longTag:    opr = FrameMap::R3_long_opr;    break;
+  case floatTag:   opr = FrameMap::F1_opr;         break;
+  case doubleTag:  opr = FrameMap::F1_double_opr;  break;
+
+  case addressTag:
+  default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+  }
+
+  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+  return opr;
+}
+
+LIR_Opr LIRGenerator::rlock_callee_saved(BasicType type) {
+  ShouldNotReachHere();
+  return LIR_OprFact::illegalOpr;
+}
+
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  return new_register(T_INT);
+}
+
+
+//--------- loading items into registers --------------------------------
+
+// PPC cannot inline all constants.
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    return Assembler::is_simm16(v->type()->as_IntConstant()->value());
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return Assembler::is_simm16(v->type()->as_LongConstant()->value());
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+
+// Only simm16 constants can be inlined.
+bool LIRGenerator::can_inline_as_constant(Value i) const {
+  return can_store_as_constant(i, as_BasicType(i->type()));
+}
+
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
+  if (c->type() == T_INT) {
+    return Assembler::is_simm16(c->as_jint());
+  }
+  if (c->type() == T_LONG) {
+    return Assembler::is_simm16(c->as_jlong());
+  }
+  if (c->type() == T_OBJECT) {
+    return c->as_jobject() == NULL;
+  }
+  return false;
+}
+
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return new_register(T_INT);
+}
+
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+  assert(base->is_register(), "must be");
+
+  // Accumulate fixed displacements.
+  if (index->is_constant()) {
+    disp += index->as_constant_ptr()->as_jint() << shift;
+    index = LIR_OprFact::illegalOpr;
+  }
+
+  if (index->is_register()) {
+    // Apply the shift and accumulate the displacement.
+    if (shift > 0) {
+      LIR_Opr tmp = new_pointer_register();
+      __ shift_left(index, shift, tmp);
+      index = tmp;
+    }
+    if (disp != 0) {
+      LIR_Opr tmp = new_pointer_register();
+      if (Assembler::is_simm16(disp)) {
+        __ add(index, LIR_OprFact::intptrConst(disp), tmp);
+        index = tmp;
+      } else {
+        __ move(LIR_OprFact::intptrConst(disp), tmp);
+        __ add(tmp, index, tmp);
+        index = tmp;
+      }
+      disp = 0;
+    }
+  } else if (!Assembler::is_simm16(disp)) {
+    // Index is illegal so replace it with the displacement loaded into a register.
+    index = new_pointer_register();
+    __ move(LIR_OprFact::intptrConst(disp), index);
+    disp = 0;
+  }
+
+  // At this point we either have base + index or base + displacement.
+  if (disp == 0) {
+    return new LIR_Address(base, index, type);
+  } else {
+    assert(Assembler::is_simm16(disp), "must be");
+    return new LIR_Address(base, disp, type);
+  }
+}
+
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+                                              BasicType type, bool needs_card_mark) {
+  int elem_size = type2aelembytes(type);
+  int shift = exact_log2(elem_size);
+
+  LIR_Opr base_opr;
+  int offset = arrayOopDesc::base_offset_in_bytes(type);
+
+  if (index_opr->is_constant()) {
+    int i = index_opr->as_constant_ptr()->as_jint();
+    int array_offset = i * elem_size;
+    if (Assembler::is_simm16(array_offset + offset)) {
+      base_opr = array_opr;
+      offset = array_offset + offset;
+    } else {
+      base_opr = new_pointer_register();
+      if (Assembler::is_simm16(array_offset)) {
+        __ add(array_opr, LIR_OprFact::intptrConst(array_offset), base_opr);
+      } else {
+        __ move(LIR_OprFact::intptrConst(array_offset), base_opr);
+        __ add(base_opr, array_opr, base_opr);
+      }
+    }
+  } else {
+#ifdef _LP64
+    if (index_opr->type() == T_INT) {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ convert(Bytecodes::_i2l, index_opr, tmp);
+      index_opr = tmp;
+    }
+#endif
+
+    base_opr = new_pointer_register();
+    assert (index_opr->is_register(), "Must be register");
+    if (shift > 0) {
+      __ shift_left(index_opr, shift, base_opr);
+      __ add(base_opr, array_opr, base_opr);
+    } else {
+      __ add(index_opr, array_opr, base_opr);
+    }
+  }
+  if (needs_card_mark) {
+    LIR_Opr ptr = new_pointer_register();
+    __ add(base_opr, LIR_OprFact::intptrConst(offset), ptr);
+    return new LIR_Address(ptr, type);
+  } else {
+    return new LIR_Address(base_opr, offset, type);
+  }
+}
+
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r = NULL;
+  if (type == T_LONG) {
+    r = LIR_OprFact::longConst(x);
+  } else if (type == T_INT) {
+    r = LIR_OprFact::intConst(x);
+  } else {
+    ShouldNotReachHere();
+  }
+  if (!Assembler::is_simm16(x)) {
+    LIR_Opr tmp = new_register(type);
+    __ move(r, tmp);
+    return tmp;
+  }
+  return r;
+}
+
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+  LIR_Opr pointer = new_pointer_register();
+  __ move(LIR_OprFact::intptrConst(counter), pointer);
+  LIR_Address* addr = new LIR_Address(pointer, type);
+  increment_counter(addr, step);
+}
+
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  LIR_Opr temp = new_register(addr->type());
+  __ move(addr, temp);
+  __ add(temp, load_immediate(step, addr->type()), temp);
+  __ move(temp, addr);
+}
+
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+  LIR_Opr tmp = FrameMap::R0_opr;
+  __ load(new LIR_Address(base, disp, T_INT), tmp, info);
+  __ cmp(condition, tmp, c);
+}
+
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+                               int disp, BasicType type, CodeEmitInfo* info) {
+  LIR_Opr tmp = FrameMap::R0_opr;
+  __ load(new LIR_Address(base, disp, type), tmp, info);
+  __ cmp(condition, reg, tmp);
+}
+
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+                               LIR_Opr disp, BasicType type, CodeEmitInfo* info) {
+  LIR_Opr tmp = FrameMap::R0_opr;
+  __ load(new LIR_Address(base, disp, type), tmp, info);
+  __ cmp(condition, reg, tmp);
+}
+
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+  assert(left != result, "should be different registers");
+  if (is_power_of_2(c + 1)) {
+    __ shift_left(left, log2_intptr(c + 1), result);
+    __ sub(result, left, result);
+    return true;
+  } else if (is_power_of_2(c - 1)) {
+    __ shift_left(left, log2_intptr(c - 1), result);
+    __ add(result, left, result);
+    return true;
+  }
+  return false;
+}
+
+
+void LIRGenerator::store_stack_parameter(LIR_Opr item, ByteSize offset_from_sp) {
+  BasicType t = item->type();
+  LIR_Opr sp_opr = FrameMap::SP_opr;
+  if ((t == T_LONG || t == T_DOUBLE) &&
+      ((in_bytes(offset_from_sp) - STACK_BIAS) % 8 != 0)) {
+    __ unaligned_move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t));
+  } else {
+    __ move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t));
+  }
+}
+
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+  assert(x->is_pinned(),"");
+  bool needs_range_check = x->compute_needs_range_check();
+  bool use_length = x->length() != NULL;
+  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+                                         !get_jobject_constant(x->value())->is_null_object() ||
+                                         x->should_profile());
+
+  LIRItem array(x->array(), this);
+  LIRItem index(x->index(), this);
+  LIRItem value(x->value(), this);
+  LIRItem length(this);
+
+  array.load_item();
+  index.load_nonconstant();
+
+  if (use_length && needs_range_check) {
+    length.set_instruction(x->length());
+    length.load_item();
+  }
+  if (needs_store_check) {
+    value.load_item();
+  } else {
+    value.load_for_store(x->elt_type());
+  }
+
+  set_no_result(x);
+
+  // The CodeEmitInfo must be duplicated for each different
+  // LIR-instruction because spilling can occur anywhere between two
+  // instructions and so the debug information must be different.
+  CodeEmitInfo* range_check_info = state_for(x);
+  CodeEmitInfo* null_check_info = NULL;
+  if (x->needs_null_check()) {
+    null_check_info = new CodeEmitInfo(range_check_info);
+  }
+
+  // Emit array address setup early so it schedules better.
+  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+
+  if (GenerateRangeChecks && needs_range_check) {
+    if (use_length) {
+      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+    } else {
+      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+      // Range_check also does the null check.
+      null_check_info = NULL;
+    }
+  }
+
+  if (GenerateArrayStoreCheck && needs_store_check) {
+    // Following registers are used by slow_subtype_check:
+    LIR_Opr tmp1 = FrameMap::R4_opr; // super_klass
+    LIR_Opr tmp2 = FrameMap::R5_opr; // sub_klass
+    LIR_Opr tmp3 = FrameMap::R6_opr; // temp
+
+    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3,
+                   store_check_info, x->profiled_method(), x->profiled_bci());
+  }
+
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+  }
+  __ move(value.result(), array_addr, null_check_info);
+  if (obj_store) {
+    // Precise card mark.
+    post_barrier(LIR_OprFact::address(array_addr), value.result());
+  }
+}
+
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // We use R4+R5 in order to get a temp effect. These regs are used in slow path (MonitorEnterStub).
+  LIR_Opr lock    = FrameMap::R5_opr;
+  LIR_Opr scratch = FrameMap::R4_opr;
+  LIR_Opr hdr     = FrameMap::R6_opr;
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for(x);
+  }
+
+  // This CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expects object to be unlocked).
+  CodeEmitInfo* info = state_for(x, x->state(), true);
+  monitor_enter(obj.result(), lock, hdr, scratch, x->monitor_no(), info_for_exception, info);
+}
+
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  set_no_result(x);
+  LIR_Opr lock     = FrameMap::R5_opr;
+  LIR_Opr hdr      = FrameMap::R4_opr; // Used for slow path (MonitorExitStub).
+  LIR_Opr obj_temp = FrameMap::R6_opr;
+  monitor_exit(obj_temp, lock, hdr, LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+
+// _ineg, _lneg, _fneg, _dneg
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+  LIRItem value(x->x(), this);
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+  __ negate(value.result(), reg);
+}
+
+
+// for  _fadd, _fmul, _fsub, _fdiv, _frem
+//      _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+  switch (x->op()) {
+  case Bytecodes::_fadd:
+  case Bytecodes::_fmul:
+  case Bytecodes::_fsub:
+  case Bytecodes::_fdiv:
+  case Bytecodes::_dadd:
+  case Bytecodes::_dmul:
+  case Bytecodes::_dsub:
+  case Bytecodes::_ddiv: {
+    LIRItem left(x->x(), this);
+    LIRItem right(x->y(), this);
+    left.load_item();
+    right.load_item();
+    rlock_result(x);
+    arithmetic_op_fpu(x->op(), x->operand(), left.result(), right.result(), x->is_strictfp());
+  }
+  break;
+
+  case Bytecodes::_frem:
+  case Bytecodes::_drem: {
+    address entry = NULL;
+    switch (x->op()) {
+    case Bytecodes::_frem:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+      break;
+    case Bytecodes::_drem:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL);
+    set_result(x, result);
+  }
+  break;
+
+  default: ShouldNotReachHere();
+  }
+}
+
+
+// for  _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+  bool is_div_rem = x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem;
+
+  LIRItem right(x->y(), this);
+  // Missing test if instr is commutative and if we should swap.
+  if (right.value()->type()->as_LongConstant() &&
+      (x->op() == Bytecodes::_lsub && right.value()->type()->as_LongConstant()->value() == ((-1)<<15)) ) {
+    // Sub is implemented by addi and can't support min_simm16 as constant..
+    right.load_item();
+  } else {
+    right.load_nonconstant();
+  }
+  assert(right.is_constant() || right.is_register(), "wrong state of right");
+
+  if (is_div_rem) {
+    LIR_Opr divisor = right.result();
+    if (divisor->is_register()) {
+      CodeEmitInfo* null_check_info = state_for(x);
+      __ cmp(lir_cond_equal, divisor, LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(null_check_info));
+    } else {
+      jlong const_divisor = divisor->as_constant_ptr()->as_jlong();
+      if (const_divisor == 0) {
+        CodeEmitInfo* null_check_info = state_for(x);
+        __ jump(new DivByZeroStub(null_check_info));
+        rlock_result(x);
+        __ move(LIR_OprFact::longConst(0), x->operand()); // dummy
+        return;
+      }
+      if (x->op() == Bytecodes::_lrem && !is_power_of_2(const_divisor) && const_divisor != -1) {
+        // Remainder computation would need additional tmp != R0.
+        right.load_item();
+      }
+    }
+  }
+
+  LIRItem left(x->x(), this);
+  left.load_item();
+  rlock_result(x);
+  if (is_div_rem) {
+    CodeEmitInfo* info = NULL; // Null check already done above.
+    LIR_Opr tmp = FrameMap::R0_opr;
+    if (x->op() == Bytecodes::_lrem) {
+      __ irem(left.result(), right.result(), x->operand(), tmp, info);
+    } else if (x->op() == Bytecodes::_ldiv) {
+      __ idiv(left.result(), right.result(), x->operand(), tmp, info);
+    }
+  } else {
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+  }
+}
+
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+  bool is_div_rem = x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem;
+
+  LIRItem right(x->y(), this);
+  // Missing test if instr is commutative and if we should swap.
+  if (right.value()->type()->as_IntConstant() &&
+      (x->op() == Bytecodes::_isub && right.value()->type()->as_IntConstant()->value() == ((-1)<<15)) ) {
+    // Sub is implemented by addi and can't support min_simm16 as constant.
+    right.load_item();
+  } else {
+    right.load_nonconstant();
+  }
+  assert(right.is_constant() || right.is_register(), "wrong state of right");
+
+  if (is_div_rem) {
+    LIR_Opr divisor = right.result();
+    if (divisor->is_register()) {
+      CodeEmitInfo* null_check_info = state_for(x);
+      __ cmp(lir_cond_equal, divisor, LIR_OprFact::intConst(0));
+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(null_check_info));
+    } else {
+      jint const_divisor = divisor->as_constant_ptr()->as_jint();
+      if (const_divisor == 0) {
+        CodeEmitInfo* null_check_info = state_for(x);
+        __ jump(new DivByZeroStub(null_check_info));
+        rlock_result(x);
+        __ move(LIR_OprFact::intConst(0), x->operand()); // dummy
+        return;
+      }
+      if (x->op() == Bytecodes::_irem && !is_power_of_2(const_divisor) && const_divisor != -1) {
+        // Remainder computation would need additional tmp != R0.
+        right.load_item();
+      }
+    }
+  }
+
+  LIRItem left(x->x(), this);
+  left.load_item();
+  rlock_result(x);
+  if (is_div_rem) {
+    CodeEmitInfo* info = NULL; // Null check already done above.
+    LIR_Opr tmp = FrameMap::R0_opr;
+    if (x->op() == Bytecodes::_irem) {
+      __ irem(left.result(), right.result(), x->operand(), tmp, info);
+    } else if (x->op() == Bytecodes::_idiv) {
+      __ idiv(left.result(), right.result(), x->operand(), tmp, info);
+    }
+  } else {
+    arithmetic_op_int(x->op(), x->operand(), left.result(), right.result(), FrameMap::R0_opr);
+  }
+}
+
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag: do_ArithmeticOp_FPU(x);  return;
+    case longTag:   do_ArithmeticOp_Long(x); return;
+    case intTag:    do_ArithmeticOp_Int(x);  return;
+  }
+  ShouldNotReachHere();
+}
+
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+  LIRItem value(x->x(), this);
+  LIRItem count(x->y(), this);
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+  LIR_Opr mcount;
+  if (count.result()->is_register()) {
+    mcount = FrameMap::R0_opr;
+  } else {
+    mcount = LIR_OprFact::illegalOpr;
+  }
+  shift_op(x->op(), reg, value.result(), count.result(), mcount);
+}
+
+
+inline bool can_handle_logic_op_as_uimm(ValueType *type, Bytecodes::Code bc) {
+  jlong int_or_long_const;
+  if (type->as_IntConstant()) {
+    int_or_long_const = type->as_IntConstant()->value();
+  } else if (type->as_LongConstant()) {
+    int_or_long_const = type->as_LongConstant()->value();
+  } else if (type->as_ObjectConstant()) {
+    return type->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+
+  if (Assembler::is_uimm(int_or_long_const, 16)) return true;
+  if ((int_or_long_const & 0xFFFF) == 0 &&
+      Assembler::is_uimm((jlong)((julong)int_or_long_const >> 16), 16)) return true;
+
+  // see Assembler::andi
+  if (bc == Bytecodes::_iand &&
+      (is_power_of_2_long(int_or_long_const+1) ||
+       is_power_of_2_long(int_or_long_const) ||
+       is_power_of_2_long(-int_or_long_const))) return true;
+  if (bc == Bytecodes::_land &&
+      (is_power_of_2_long(int_or_long_const+1) ||
+       (Assembler::is_uimm(int_or_long_const, 32) && is_power_of_2_long(int_or_long_const)) ||
+       (int_or_long_const != min_jlong && is_power_of_2_long(-int_or_long_const)))) return true;
+
+  // special case: xor -1
+  if ((bc == Bytecodes::_ixor || bc == Bytecodes::_lxor) &&
+      int_or_long_const == -1) return true;
+  return false;
+}
+
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  Value rval = right.value();
+  LIR_Opr r = rval->operand();
+  ValueType *type = rval->type();
+  // Logic instructions use unsigned immediate values.
+  if (can_handle_logic_op_as_uimm(type, x->op())) {
+    if (!r->is_constant()) {
+      r = LIR_OprFact::value_type(type);
+      rval->set_operand(r);
+    }
+    right.set_result(r);
+  } else {
+    right.load_item();
+  }
+
+  LIR_Opr reg = rlock_result(x);
+
+  logic_op(x->op(), reg, left.result(), right.result());
+}
+
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    Unimplemented();
+  }
+}
+
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+  assert(x->number_of_arguments() == 4, "wrong type");
+  LIRItem obj   (x->argument_at(0), this);  // object
+  LIRItem offset(x->argument_at(1), this);  // offset of field
+  LIRItem cmp   (x->argument_at(2), this);  // Value to compare with field.
+  LIRItem val   (x->argument_at(3), this);  // Replace field with val if matches cmp.
+
+  LIR_Opr t1 = LIR_OprFact::illegalOpr;
+  LIR_Opr t2 = LIR_OprFact::illegalOpr;
+  LIR_Opr addr = new_pointer_register();
+
+  // Get address of field.
+  obj.load_item();
+  offset.load_item();
+  cmp.load_item();
+  val.load_item();
+
+  __ add(obj.result(), offset.result(), addr);
+
+  // Volatile load may be followed by Unsafe CAS.
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+    __ membar(); // To be safe. Unsafe semantics are unclear.
+  } else {
+    __ membar_release();
+  }
+
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Only cmp value can get overwritten, no do_load required.
+    pre_barrier(LIR_OprFact::illegalOpr /* addr */, cmp.result() /* pre_val */,
+                false /* do_load */, false /* patch */, NULL);
+  }
+
+  if (type == objectType) {
+    if (UseCompressedOops) {
+      t1 = new_register(T_OBJECT);
+      t2 = new_register(T_OBJECT);
+    }
+    __ cas_obj(addr, cmp.result(), val.result(), t1, t2);
+  } else if (type == intType) {
+    __ cas_int(addr, cmp.result(), val.result(), t1, t2);
+  } else if (type == longType) {
+    __ cas_long(addr, cmp.result(), val.result(), t1, t2);
+  } else {
+    ShouldNotReachHere();
+  }
+  // Benerate conditional move of boolean result.
+  LIR_Opr result = rlock_result(x);
+  __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0),
+           result, as_BasicType(type));
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Precise card mark since could either be object or array.
+    post_barrier(addr, val.result());
+  }
+}
+
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+  switch (x->id()) {
+    case vmIntrinsics::_dabs: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+      LIRItem value(x->argument_at(0), this);
+      value.load_item();
+      LIR_Opr dst = rlock_result(x);
+      __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+      break;
+    }
+    case vmIntrinsics::_dsqrt: {
+      if (VM_Version::has_fsqrt()) {
+        assert(x->number_of_arguments() == 1, "wrong type");
+        LIRItem value(x->argument_at(0), this);
+        value.load_item();
+        LIR_Opr dst = rlock_result(x);
+        __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      } // else fallthru
+    }
+    case vmIntrinsics::_dlog10: // fall through
+    case vmIntrinsics::_dlog: // fall through
+    case vmIntrinsics::_dsin: // fall through
+    case vmIntrinsics::_dtan: // fall through
+    case vmIntrinsics::_dcos: // fall through
+    case vmIntrinsics::_dexp: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+
+      address runtime_entry = NULL;
+      switch (x->id()) {
+      case vmIntrinsics::_dsqrt:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt);
+        break;
+      case vmIntrinsics::_dsin:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+        break;
+      case vmIntrinsics::_dcos:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+        break;
+      case vmIntrinsics::_dtan:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+        break;
+      case vmIntrinsics::_dlog:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+        break;
+      case vmIntrinsics::_dlog10:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+        break;
+      case vmIntrinsics::_dexp:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+
+      LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+    case vmIntrinsics::_dpow: {
+      assert(x->number_of_arguments() == 2, "wrong type");
+      address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+      LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+  }
+}
+
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+  assert(x->number_of_arguments() == 5, "wrong type");
+
+  // Make all state_for calls early since they can emit code.
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem src     (x->argument_at(0), this);
+  LIRItem src_pos (x->argument_at(1), this);
+  LIRItem dst     (x->argument_at(2), this);
+  LIRItem dst_pos (x->argument_at(3), this);
+  LIRItem length  (x->argument_at(4), this);
+
+  // Load all values in callee_save_registers (C calling convention),
+  // as this makes the parameter passing to the fast case simpler.
+  src.load_item_force     (FrameMap::R14_oop_opr);
+  src_pos.load_item_force (FrameMap::R15_opr);
+  dst.load_item_force     (FrameMap::R17_oop_opr);
+  dst_pos.load_item_force (FrameMap::R18_opr);
+  length.load_item_force  (FrameMap::R19_opr);
+  LIR_Opr tmp =            FrameMap::R20_opr;
+
+  int flags;
+  ciArrayKlass* expected_type;
+  arraycopy_helper(x, &flags, &expected_type);
+
+  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
+               length.result(), tmp,
+               expected_type, flags, info);
+  set_no_result(x);
+}
+
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+  switch (x->op()) {
+
+    // int -> float: force spill
+    case Bytecodes::_l2f: {
+      if (!VM_Version::has_fcfids()) { // fcfids is >= Power7 only
+        // fcfid+frsp needs fixup code to avoid rounding incompatibility.
+        address entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
+        LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL);
+        set_result(x, result);
+        break;
+      } // else fallthru
+    }
+    case Bytecodes::_l2d: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.load_item();
+      LIR_Opr tmp = force_to_spill(value.result(), T_DOUBLE);
+      __ convert(x->op(), tmp, reg);
+      break;
+    }
+    case Bytecodes::_i2f:
+    case Bytecodes::_i2d: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.load_item();
+      // Convert i2l first.
+      LIR_Opr tmp1 = new_register(T_LONG);
+      __ convert(Bytecodes::_i2l, value.result(), tmp1);
+      LIR_Opr tmp2 = force_to_spill(tmp1, T_DOUBLE);
+      __ convert(x->op(), tmp2, reg);
+      break;
+    }
+
+    // float -> int: result will be stored
+    case Bytecodes::_f2l:
+    case Bytecodes::_d2l: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.set_destroys_register(); // USE_KILL
+      value.load_item();
+      set_vreg_flag(reg, must_start_in_memory);
+      __ convert(x->op(), value.result(), reg);
+      break;
+    }
+    case Bytecodes::_f2i:
+    case Bytecodes::_d2i: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.set_destroys_register(); // USE_KILL
+      value.load_item();
+      // Convert l2i afterwards.
+      LIR_Opr tmp1 = new_register(T_LONG);
+      set_vreg_flag(tmp1, must_start_in_memory);
+      __ convert(x->op(), value.result(), tmp1);
+      __ convert(Bytecodes::_l2i, tmp1, reg);
+      break;
+    }
+
+    // Within same category: just register conversions.
+    case Bytecodes::_i2b:
+    case Bytecodes::_i2c:
+    case Bytecodes::_i2s:
+    case Bytecodes::_i2l:
+    case Bytecodes::_l2i:
+    case Bytecodes::_f2d:
+    case Bytecodes::_d2f: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.load_item();
+      __ convert(x->op(), value.result(), reg);
+      break;
+    }
+
+    default: ShouldNotReachHere();
+  }
+}
+
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+  // This instruction can be deoptimized in the slow path.
+  const LIR_Opr reg = result_register_for(x->type());
+#ifndef PRODUCT
+  if (PrintNotLoaded && !x->klass()->is_loaded()) {
+    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
+  }
+#endif
+  CodeEmitInfo* info = state_for(x, x->state());
+  LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path (NewInstanceStub).
+  LIR_Opr tmp1 = FrameMap::R5_oop_opr;
+  LIR_Opr tmp2 = FrameMap::R6_oop_opr;
+  LIR_Opr tmp3 = FrameMap::R7_oop_opr;
+  LIR_Opr tmp4 = FrameMap::R8_oop_opr;
+  new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem length(x->length(), this);
+  length.load_item();
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path (NewTypeArrayStub).
+  // We use R5 in order to get a temp effect. This reg is used in slow path (NewTypeArrayStub).
+  LIR_Opr tmp1 = FrameMap::R5_oop_opr;
+  LIR_Opr tmp2 = FrameMap::R6_oop_opr;
+  LIR_Opr tmp3 = FrameMap::R7_oop_opr;
+  LIR_Opr tmp4 = FrameMap::R8_oop_opr;
+  LIR_Opr len = length.result();
+  BasicType elem_type = x->elt_type();
+
+  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* info = state_for(x, x->state());
+  // In case of patching (i.e., object class is not yet loaded),
+  // we need to reexecute the instruction and therefore provide
+  // the state before the parameters have been consumed.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+  }
+
+  LIRItem length(x->length(), this);
+  length.load_item();
+
+  const LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path (NewObjectArrayStub).
+  // We use R5 in order to get a temp effect. This reg is used in slow path (NewObjectArrayStub).
+  LIR_Opr tmp1 = FrameMap::R5_oop_opr;
+  LIR_Opr tmp2 = FrameMap::R6_oop_opr;
+  LIR_Opr tmp3 = FrameMap::R7_oop_opr;
+  LIR_Opr tmp4 = FrameMap::R8_oop_opr;
+  LIR_Opr len = length.result();
+
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  ciMetadata* obj = ciObjArrayKlass::make(x->klass());
+  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+  }
+  klass2reg_with_patching(klass_reg, obj, patching_info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+  Values* dims = x->dims();
+  int i = dims->length();
+  LIRItemList* items = new LIRItemList(dims->length(), NULL);
+  while (i-- > 0) {
+    LIRItem* size = new LIRItem(dims->at(i), this);
+    items->at_put(i, size);
+  }
+
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+
+    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+    // clone all handlers (NOTE: Usually this is handled transparently
+    // by the CodeEmitInfo cloning logic in CodeStub constructors but
+    // is done explicitly here because a stub isn't being used).
+    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+  }
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  i = dims->length();
+  while (i-- > 0) {
+    LIRItem* size = items->at(i);
+    size->load_nonconstant();
+    // FrameMap::_reserved_argument_area_size includes the dimensions
+    // varargs, because it's initialized to hir()->max_stack() when the
+    // FrameMap is created.
+    store_stack_parameter(size->result(), in_ByteSize(i*sizeof(jint) + FrameMap::first_available_sp_in_frame));
+  }
+
+  const LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path.
+  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+  LIR_Opr rank = FrameMap::R5_opr; // Used by slow path.
+  __ move(LIR_OprFact::intConst(x->rank()), rank);
+
+  LIR_Opr varargs = FrameMap::as_pointer_opr(R6); // Used by slow path.
+  __ leal(LIR_OprFact::address(new LIR_Address(FrameMap::SP_opr, FrameMap::first_available_sp_in_frame, T_INT)),
+          varargs);
+
+  // Note: This instruction can be deoptimized in the slow path.
+  LIR_OprList* args = new LIR_OprList(3);
+  args->append(klass_reg);
+  args->append(rank);
+  args->append(varargs);
+  const LIR_Opr reg = result_register_for(x->type());
+  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+                  LIR_OprFact::illegalOpr,
+                  reg, args, info);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // nothing to do for now
+}
+
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+  LIRItem obj(x->obj(), this);
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+    // Must do this before locking the destination register as
+    // an oop register, and before the obj is loaded (so x->obj()->item()
+    // is valid for creating a debug info location).
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+  LIR_Opr out_reg = rlock_result(x);
+  CodeStub* stub;
+  CodeEmitInfo* info_for_exception = state_for(x);
+
+  if (x->is_incompatible_class_change_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
+                                   LIR_OprFact::illegalOpr, info_for_exception);
+  } else {
+    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+  }
+  // Following registers are used by slow_subtype_check:
+  LIR_Opr tmp1 = FrameMap::R4_oop_opr; // super_klass
+  LIR_Opr tmp2 = FrameMap::R5_oop_opr; // sub_klass
+  LIR_Opr tmp3 = FrameMap::R6_oop_opr; // temp
+  __ checkcast(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3,
+               x->direct_compare(), info_for_exception, patching_info, stub,
+               x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+  LIRItem obj(x->obj(), this);
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+  }
+  // Ensure the result register is not the input register because the
+  // result is initialized before the patching safepoint.
+  obj.load_item();
+  LIR_Opr out_reg = rlock_result(x);
+  // Following registers are used by slow_subtype_check:
+  LIR_Opr tmp1 = FrameMap::R4_oop_opr; // super_klass
+  LIR_Opr tmp2 = FrameMap::R5_oop_opr; // sub_klass
+  LIR_Opr tmp3 = FrameMap::R6_oop_opr; // temp
+  __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3,
+                x->direct_compare(), patching_info,
+                x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_If(If* x) {
+  assert(x->number_of_sux() == 2, "inconsistency");
+  ValueTag tag = x->x()->type()->tag();
+  LIRItem xitem(x->x(), this);
+  LIRItem yitem(x->y(), this);
+  LIRItem* xin = &xitem;
+  LIRItem* yin = &yitem;
+  If::Condition cond = x->cond();
+
+  LIR_Opr left = LIR_OprFact::illegalOpr;
+  LIR_Opr right = LIR_OprFact::illegalOpr;
+
+  xin->load_item();
+  left = xin->result();
+
+  if (yin->result()->is_constant() && yin->result()->type() == T_INT &&
+      Assembler::is_simm16(yin->result()->as_constant_ptr()->as_jint())) {
+    // Inline int constants which are small enough to be immediate operands.
+    right = LIR_OprFact::value_type(yin->value()->type());
+  } else if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 &&
+             (cond == If::eql || cond == If::neq)) {
+    // Inline long zero.
+    right = LIR_OprFact::value_type(yin->value()->type());
+  } else if (tag == objectTag && yin->is_constant() && (yin->get_jobject_constant()->is_null_object())) {
+    right = LIR_OprFact::value_type(yin->value()->type());
+  } else {
+    yin->load_item();
+    right = yin->result();
+  }
+  set_no_result(x);
+
+  // Add safepoint before generating condition code so it can be recomputed.
+  if (x->is_safepoint()) {
+    // Increment backedge counter if needed.
+    increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
+    __ safepoint(safepoint_poll_register(), state_for(x, x->state_before()));
+  }
+
+  __ cmp(lir_cond(cond), left, right);
+  // Generate branch profiling. Profiling code doesn't kill flags.
+  profile_branch(x, cond);
+  move_to_phi(x->state());
+  if (x->x()->type()->is_float_kind()) {
+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+  } else {
+    __ branch(lir_cond(cond), right->type(), x->tsux());
+  }
+  assert(x->default_sux() == x->fsux(), "wrong destination above");
+  __ jump(x->default_sux());
+}
+
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+  return FrameMap::as_pointer_opr(R16_thread);
+}
+
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) {
+  LIR_Opr arg1 = FrameMap::R3_opr; // ARG1
+  __ move(LIR_OprFact::intConst(block->block_id()), arg1);
+  LIR_OprList* args = new LIR_OprList(1);
+  args->append(arg1);
+  address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry);
+  __ call_runtime_leaf(func, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, args);
+}
+
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+                                        CodeEmitInfo* info) {
+#ifdef _LP64
+  __ store(value, address, info);
+#else
+  Unimplemented();
+//  __ volatile_store_mem_reg(value, address, info);
+#endif
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+                                       CodeEmitInfo* info) {
+#ifdef _LP64
+  __ load(address, result, info);
+#else
+  Unimplemented();
+//  __ volatile_load_mem_reg(address, result, info);
+#endif
+}
+
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+                                     BasicType type, bool is_volatile) {
+  LIR_Opr base_op = src;
+  LIR_Opr index_op = offset;
+
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+#ifndef _LP64
+  if (is_volatile && type == T_LONG) {
+    __ volatile_store_unsafe_reg(data, src, offset, type, NULL, lir_patch_none);
+  } else
+#endif
+  {
+    if (type == T_BOOLEAN) {
+      type = T_BYTE;
+    }
+    LIR_Address* addr;
+    if (type == T_ARRAY || type == T_OBJECT) {
+      LIR_Opr tmp = new_pointer_register();
+      __ add(base_op, index_op, tmp);
+      addr = new LIR_Address(tmp, type);
+    } else {
+      addr = new LIR_Address(base_op, index_op, type);
+    }
+
+    if (is_obj) {
+      pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+          true /* do_load */, false /* patch */, NULL);
+      // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
+    }
+    __ move(data, addr);
+    if (is_obj) {
+      // This address is precise.
+      post_barrier(LIR_OprFact::address(addr), data);
+    }
+  }
+}
+
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+                                     BasicType type, bool is_volatile) {
+#ifndef _LP64
+  if (is_volatile && type == T_LONG) {
+    __ volatile_load_unsafe_reg(src, offset, dst, type, NULL, lir_patch_none);
+  } else
+#endif
+    {
+    LIR_Address* addr = new LIR_Address(src, offset, type);
+    __ load(addr, dst);
+  }
+}
+
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  BasicType type = x->basic_type();
+  LIRItem src(x->object(), this);
+  LIRItem off(x->offset(), this);
+  LIRItem value(x->value(), this);
+
+  src.load_item();
+  value.load_item();
+  off.load_nonconstant();
+
+  LIR_Opr dst = rlock_result(x, type);
+  LIR_Opr data = value.result();
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+
+  LIR_Opr tmp = FrameMap::R0_opr;
+  LIR_Opr ptr = new_pointer_register();
+  __ add(src.result(), off.result(), ptr);
+
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+    __ membar();
+  } else {
+    __ membar_release();
+  }
+
+  if (x->is_add()) {
+    __ xadd(ptr, data, dst, tmp);
+  } else {
+    const bool can_move_barrier = true; // TODO: port GraphKit::can_move_pre_barrier() from C2
+    if (!can_move_barrier && is_obj) {
+      // Do the pre-write barrier, if any.
+      pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */,
+                  true /* do_load */, false /* patch */, NULL);
+    }
+    __ xchg(ptr, data, dst, tmp);
+    if (is_obj) {
+      // Seems to be a precise address.
+      post_barrier(ptr, data);
+      if (can_move_barrier) {
+        pre_barrier(LIR_OprFact::illegalOpr, dst /* pre_val */,
+                    false /* do_load */, false /* patch */, NULL);
+      }
+    }
+  }
+
+  __ membar();
+}
+
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  assert(UseCRC32Intrinsics, "or should not be here");
+  LIR_Opr result = rlock_result(x);
+
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      // Registers destroyed by update_crc32.
+      crc.set_destroys_register();
+      val.set_destroys_register();
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if (off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+        offset += off.result()->as_jint();
+      }
+      LIR_Opr base_op = buf.result();
+      LIR_Address* a = NULL;
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+        __ add(index, LIR_OprFact::intptrConst(offset), index);
+        a = new LIR_Address(base_op, index, T_BYTE);
+      } else {
+        a = new LIR_Address(base_op, offset, T_BYTE);
+      }
+
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr arg1 = cc->at(0),
+              arg2 = cc->at(1),
+              arg3 = cc->at(2);
+
+      // CCallingConventionRequiresIntsAsLongs
+      crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
+      __ leal(LIR_OprFact::address(a), arg2);
+      load_int_as_long(gen()->lir(), len, arg3);
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args());
+      __ move(result_reg, result);
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/bitMap.inline.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+  Unimplemented();
+  // No FPU stack on PPC
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_LINEARSCAN_PPC_HPP
+#define CPU_PPC_VM_C1_LINEARSCAN_PPC_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+  assert(FrameMap::R0_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 1, "wrong assumption below");
+  assert(FrameMap::R1_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 2, "wrong assumption below");
+  assert(FrameMap::R13_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 3, "wrong assumption below");
+  assert(FrameMap::R16_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 4, "wrong assumption below");
+  assert(FrameMap::R29_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 5, "wrong assumption below");
+  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+  return 1;
+}
+
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+  return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+  return true; // assigned_reg < pd_first_callee_saved_reg;
+}
+
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+  // No special case behaviours yet
+}
+
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
+    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
+    _first_reg = pd_first_callee_saved_reg;
+    _last_reg = pd_last_callee_saved_reg;
+    ShouldNotReachHere(); // Currently no callee saved regs.
+    return true;
+  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
+             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
+    _first_reg = pd_first_cpu_reg;
+    _last_reg = pd_last_cpu_reg;
+    return true;
+  }
+  return false;
+}
+
+#endif // CPU_PPC_VM_C1_LINEARSCAN_PPC_HPP

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+  const Register temp_reg = R12_scratch2;
+  verify_oop(receiver);
+  load_klass(temp_reg, receiver);
+  if (TrapBasedICMissChecks) {
+    trap_ic_miss_check(temp_reg, iCache);
+  } else {
+    Label L;
+    cmpd(CCR0, temp_reg, iCache);
+    beq(CCR0, L);
+    //load_const_optimized(temp_reg, SharedRuntime::get_ic_miss_stub(), R0);
+    calculate_address_from_global_toc(temp_reg, SharedRuntime::get_ic_miss_stub(), true, true, false);
+    mtctr(temp_reg);
+    bctr();
+    align(32, 12);
+    bind(L);
+  }
+}
+
+
+void C1_MacroAssembler::explicit_null_check(Register base) {
+  Unimplemented();
+}
+
+
+void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) {
+  assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+  // Make sure there is enough stack space for this method's activation.
+  generate_stack_overflow_check(bang_size_in_bytes);
+
+  // Create the frame.
+  const Register return_pc  = R0;
+
+  mflr(return_pc);
+  // Get callers sp.
+  std(return_pc, _abi(lr), R1_SP);           // SP->lr = return_pc
+  push_frame(frame_size_in_bytes, R0);       // SP -= frame_size_in_bytes
+}
+
+
+void C1_MacroAssembler::unverified_entry(Register receiver, Register ic_klass) {
+  Unimplemented(); // Currently unused.
+  //if (C1Breakpoint) illtrap();
+  //inline_cache_check(receiver, ic_klass);
+}
+
+
+void C1_MacroAssembler::verified_entry() {
+  if (C1Breakpoint) illtrap();
+  // build frame
+}
+
+
+void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox, Register Rscratch, Label& slow_case) {
+  assert_different_registers(Rmark, Roop, Rbox, Rscratch);
+
+  Label done, cas_failed, slow_int;
+
+  // The following move must be the first instruction of emitted since debug
+  // information may be generated for it.
+  // Load object header.
+  ld(Rmark, oopDesc::mark_offset_in_bytes(), Roop);
+
+  verify_oop(Roop);
+
+  // Save object being locked into the BasicObjectLock...
+  std(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox);
+
+  if (UseBiasedLocking) {
+    biased_locking_enter(CCR0, Roop, Rmark, Rscratch, R0, done, &slow_int);
+  }
+
+  // ... and mark it unlocked.
+  ori(Rmark, Rmark, markOopDesc::unlocked_value);
+
+  // Save unlocked object header into the displaced header location on the stack.
+  std(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox);
+
+  // Compare object markOop with Rmark and if equal exchange Rscratch with object markOop.
+  assert(oopDesc::mark_offset_in_bytes() == 0, "cas must take a zero displacement");
+  cmpxchgd(/*flag=*/CCR0,
+           /*current_value=*/Rscratch,
+           /*compare_value=*/Rmark,
+           /*exchange_value=*/Rbox,
+           /*where=*/Roop/*+0==mark_offset_in_bytes*/,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+           MacroAssembler::cmpxchgx_hint_acquire_lock(),
+           noreg,
+           &cas_failed,
+           /*check without membar and ldarx first*/true);
+  // If compare/exchange succeeded we found an unlocked object and we now have locked it
+  // hence we are done.
+  b(done);
+
+  bind(slow_int);
+  b(slow_case); // far
+
+  bind(cas_failed);
+  // We did not find an unlocked object so see if this is a recursive case.
+  sub(Rscratch, Rscratch, R1_SP);
+  load_const_optimized(R0, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+  and_(R0/*==0?*/, Rscratch, R0);
+  std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), Rbox);
+  bne(CCR0, slow_int);
+
+  bind(done);
+}
+
+
+void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case) {
+  assert_different_registers(Rmark, Roop, Rbox);
+
+  Label slow_int, done;
+
+  Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
+  assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+
+  if (UseBiasedLocking) {
+    // Load the object out of the BasicObjectLock.
+    ld(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox);
+    verify_oop(Roop);
+    biased_locking_exit(CCR0, Roop, R0, done);
+  }
+  // Test first it it is a fast recursive unlock.
+  ld(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox);
+  cmpdi(CCR0, Rmark, 0);
+  beq(CCR0, done);
+  if (!UseBiasedLocking) {
+    // Load object.
+    ld(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox);
+    verify_oop(Roop);
+  }
+
+  // Check if it is still a light weight lock, this is is true if we see
+  // the stack address of the basicLock in the markOop of the object.
+  cmpxchgd(/*flag=*/CCR0,
+           /*current_value=*/R0,
+           /*compare_value=*/Rbox,
+           /*exchange_value=*/Rmark,
+           /*where=*/Roop,
+           MacroAssembler::MemBarRel,
+           MacroAssembler::cmpxchgx_hint_release_lock(),
+           noreg,
+           &slow_int);
+  b(done);
+  bind(slow_int);
+  b(slow_case); // far
+
+  // Done
+  bind(done);
+}
+
+
+void C1_MacroAssembler::try_allocate(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,          // object size in bytes if   known at compile time
+  Register t1,                         // temp register, must be global register for incr_allocated_bytes
+  Register t2,                         // temp register
+  Label&   slow_case                   // continuation point if fast allocation fails
+) {
+  if (UseTLAB) {
+    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+  } else {
+    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+    RegisterOrConstant size_in_bytes = var_size_in_bytes->is_valid()
+                                       ? RegisterOrConstant(var_size_in_bytes)
+                                       : RegisterOrConstant(con_size_in_bytes);
+    incr_allocated_bytes(size_in_bytes, t1, t2);
+  }
+}
+
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
+  assert_different_registers(obj, klass, len, t1, t2);
+  if (UseBiasedLocking && !len->is_valid()) {
+    ld(t1, in_bytes(Klass::prototype_header_offset()), klass);
+  } else {
+    load_const_optimized(t1, (intx)markOopDesc::prototype());
+  }
+  std(t1, oopDesc::mark_offset_in_bytes(), obj);
+  store_klass(obj, klass);
+  if (len->is_valid()) {
+    stw(len, arrayOopDesc::length_offset_in_bytes(), obj);
+  } else if (UseCompressedClassPointers) {
+    // Otherwise length is in the class gap.
+    store_klass_gap(obj);
+  }
+}
+
+
+void C1_MacroAssembler::initialize_body(Register base, Register index) {
+  assert_different_registers(base, index);
+  srdi(index, index, LogBytesPerWord);
+  clear_memory_doubleword(base, index);
+}
+
+void C1_MacroAssembler::initialize_body(Register obj, Register tmp1, Register tmp2,
+                                        int obj_size_in_bytes, int hdr_size_in_bytes) {
+  const int index = (obj_size_in_bytes - hdr_size_in_bytes) / HeapWordSize;
+
+  const int cl_size         = VM_Version::L1_data_cache_line_size(),
+            cl_dwords       = cl_size>>3,
+            cl_dw_addr_bits = exact_log2(cl_dwords);
+
+  const Register tmp = R0,
+                 base_ptr = tmp1,
+                 cnt_dwords = tmp2;
+
+  if (index <= 6) {
+    // Use explicit NULL stores.
+    if (index > 0) { li(tmp, 0); }
+    for (int i = 0; i < index; ++i) { std(tmp, hdr_size_in_bytes + i * HeapWordSize, obj); }
+
+  } else if (index < (2<<cl_dw_addr_bits)-1) {
+    // simple loop
+    Label loop;
+
+    li(cnt_dwords, index);
+    addi(base_ptr, obj, hdr_size_in_bytes); // Compute address of first element.
+    li(tmp, 0);
+    mtctr(cnt_dwords);                      // Load counter.
+  bind(loop);
+    std(tmp, 0, base_ptr);                  // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(loop);
+
+  } else {
+    // like clear_memory_doubleword
+    Label startloop, fast, fastloop, restloop, done;
+
+    addi(base_ptr, obj, hdr_size_in_bytes);           // Compute address of first element.
+    load_const_optimized(cnt_dwords, index);
+    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
+    beq(CCR0, fast);                                  // Already 128byte aligned.
+
+    subfic(tmp, tmp, cl_dwords);
+    mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
+    subf(cnt_dwords, tmp, cnt_dwords); // rest.
+    li(tmp, 0);
+
+  bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(startloop);
+
+  bind(fast);                                  // Clear 128byte blocks.
+    srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
+    andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
+    mtctr(tmp);                                // Load counter.
+
+  bind(fastloop);
+    dcbz(base_ptr);                    // Clear 128byte aligned block.
+    addi(base_ptr, base_ptr, cl_size);
+    bdnz(fastloop);
+
+    cmpdi(CCR0, cnt_dwords, 0);        // size 0?
+    beq(CCR0, done);                   // rest == 0
+    li(tmp, 0);
+    mtctr(cnt_dwords);                 // Load counter.
+
+  bind(restloop);                      // Clear rest.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(restloop);
+
+  bind(done);
+  }
+}
+
+void C1_MacroAssembler::allocate_object(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register t1,                         // temp register
+  Register t2,                         // temp register
+  Register t3,                         // temp register
+  int      hdr_size,                   // object header size in words
+  int      obj_size,                   // object size in words
+  Register klass,                      // object klass
+  Label&   slow_case                   // continuation point if fast allocation fails
+) {
+  assert_different_registers(obj, t1, t2, t3, klass);
+
+  // allocate space & initialize header
+  if (!is_simm16(obj_size * wordSize)) {
+    // Would need to use extra register to load
+    // object size => go the slow case for now.
+    b(slow_case);
+    return;
+  }
+  try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case);
+
+  initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2);
+}
+
+void C1_MacroAssembler::initialize_object(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register klass,                      // object klass
+  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,          // object size in bytes if   known at compile time
+  Register t1,                         // temp register
+  Register t2                          // temp register
+  ) {
+  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+  initialize_header(obj, klass, noreg, t1, t2);
+
+#ifdef ASSERT
+  {
+    lwz(t1, in_bytes(Klass::layout_helper_offset()), klass);
+    if (var_size_in_bytes != noreg) {
+      cmpw(CCR0, t1, var_size_in_bytes);
+    } else {
+      cmpwi(CCR0, t1, con_size_in_bytes);
+    }
+    asm_assert_eq("bad size in initialize_object", 0x753);
+  }
+#endif
+
+  // Initialize body.
+  if (var_size_in_bytes != noreg) {
+    // Use a loop.
+    addi(t1, obj, hdr_size_in_bytes);                // Compute address of first element.
+    addi(t2, var_size_in_bytes, -hdr_size_in_bytes); // Compute size of body.
+    initialize_body(t1, t2);
+  } else if (con_size_in_bytes > hdr_size_in_bytes) {
+    // Use a loop.
+    initialize_body(obj, t1, t2, con_size_in_bytes, hdr_size_in_bytes);
+  }
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    Unimplemented();
+//    assert(obj == O0, "must be");
+//    call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)),
+//         relocInfo::runtime_call_type);
+  }
+
+  verify_oop(obj);
+}
+
+
+void C1_MacroAssembler::allocate_array(
+  Register obj,                        // result: pointer to array after successful allocation
+  Register len,                        // array length
+  Register t1,                         // temp register
+  Register t2,                         // temp register
+  Register t3,                         // temp register
+  int      hdr_size,                   // object header size in words
+  int      elt_size,                   // element size in bytes
+  Register klass,                      // object klass
+  Label&   slow_case                   // continuation point if fast allocation fails
+) {
+  assert_different_registers(obj, len, t1, t2, t3, klass);
+
+  // Determine alignment mask.
+  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
+  int log2_elt_size = exact_log2(elt_size);
+
+  // Check for negative or excessive length.
+  size_t max_length = max_array_allocation_length >> log2_elt_size;
+  if (UseTLAB) {
+    size_t max_tlab = align_size_up(ThreadLocalAllocBuffer::max_size() >> log2_elt_size, 64*K);
+    if (max_tlab < max_length) { max_length = max_tlab; }
+  }
+  load_const_optimized(t1, max_length);
+  cmpld(CCR0, len, t1);
+  bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_case);
+
+  // compute array size
+  // note: If 0 <= len <= max_length, len*elt_size + header + alignment is
+  //       smaller or equal to the largest integer; also, since top is always
+  //       aligned, we can do the alignment here instead of at the end address
+  //       computation.
+  const Register arr_size = t1;
+  Register arr_len_in_bytes = len;
+  if (elt_size != 1) {
+    sldi(t1, len, log2_elt_size);
+    arr_len_in_bytes = t1;
+  }
+  addi(arr_size, arr_len_in_bytes, hdr_size * wordSize + MinObjAlignmentInBytesMask); // Add space for header & alignment.
+  clrrdi(arr_size, arr_size, LogMinObjAlignmentInBytes);                              // Align array size.
+
+  // Allocate space & initialize header.
+  if (UseTLAB) {
+    tlab_allocate(obj, arr_size, 0, t2, slow_case);
+  } else {
+    eden_allocate(obj, arr_size, 0, t2, t3, slow_case);
+  }
+  initialize_header(obj, klass, len, t2, t3);
+
+  // Initialize body.
+  const Register base  = t2;
+  const Register index = t3;
+  addi(base, obj, hdr_size * wordSize);               // compute address of first element
+  addi(index, arr_size, -(hdr_size * wordSize));      // compute index = number of bytes to clear
+  initialize_body(base, index);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    Unimplemented();
+    //assert(obj == O0, "must be");
+    //call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)),
+    //     relocInfo::runtime_call_type);
+  }
+
+  verify_oop(obj);
+}
+
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+  verify_oop_addr((RegisterOrConstant)(stack_offset + STACK_BIAS), R1_SP, "broken oop in stack slot");
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+  Label not_null;
+  cmpdi(CCR0, r, 0);
+  bne(CCR0, not_null);
+  stop("non-null oop required");
+  bind(not_null);
+  if (!VerifyOops) return;
+  verify_oop(r);
+}
+
+#endif // PRODUCT
+
+void C1_MacroAssembler::null_check(Register r, Label* Lnull) {
+  if (TrapBasedNullChecks) { // SIGTRAP based
+    trap_null_check(r);
+  } else { // explicit
+    //const address exception_entry = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+    assert(Lnull != NULL, "must have Label for explicit check");
+    cmpdi(CCR0, r, 0);
+    bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::equal), *Lnull);
+  }
+}
+
+address C1_MacroAssembler::call_c_with_frame_resize(address dest, int frame_resize) {
+  if (frame_resize) { resize_frame(-frame_resize, R0); }
+#if defined(ABI_ELFv2)
+  address return_pc = call_c(dest, relocInfo::runtime_call_type);
+#else
+  address return_pc = call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, dest), relocInfo::runtime_call_type);
+#endif
+  if (frame_resize) { resize_frame(frame_resize, R0); }
+  return return_pc;
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_MACROASSEMBLER_PPC_HPP
+#define CPU_PPC_VM_C1_MACROASSEMBLER_PPC_HPP
+
+  void pd_init() { /* nothing to do */ }
+
+ public:
+   void try_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
+  void initialize_body(Register base, Register index);
+  void initialize_body(Register obj, Register tmp1, Register tmp2, int obj_size_in_bytes, int hdr_size_in_bytes);
+
+  // locking/unlocking
+  void lock_object  (Register Rmark, Register Roop, Register Rbox, Register Rscratch, Label& slow_case);
+  void unlock_object(Register Rmark, Register Roop, Register Rbox,                    Label& slow_case);
+
+  void initialize_object(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register klass,                    // object klass
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2                        // temp register
+  );
+
+  // Allocation of fixed-size objects
+  // (Can also be used to allocate fixed-size arrays, by setting
+  // hdr_size correctly and storing the array length afterwards.)
+  void allocate_object(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Register t3,                       // temp register
+    int      hdr_size,                 // object header size in words
+    int      obj_size,                 // object size in words
+    Register klass,                    // object klass
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  enum {
+    max_array_allocation_length = 0x40000000 // ppc friendly value, requires lis only
+  };
+
+  // Allocation of arrays
+  void allocate_array(
+    Register obj,                      // result: pointer to array after successful allocation
+    Register len,                      // array length
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Register t3,                       // temp register
+    int      hdr_size,                 // object header size in words
+    int      elt_size,                 // element size in bytes
+    Register klass,                    // object klass
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  void null_check(Register r, Label *Lnull = NULL);
+
+  address call_c_with_frame_resize(address dest, int frame_resize);
+
+#endif // CPU_PPC_VM_C1_MACROASSEMBLER_PPC_HPP

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_Runtime1_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,1020 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_ppc.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_ppc.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_ppc.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
+                           address entry_point, int number_of_arguments) {
+  set_num_rt_args(0); // Nothing on stack
+  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) ||
+         oop_result1 != metadata_result, "registers must be different");
+
+  // Currently no stack banging. We assume that there are enough
+  // StackShadowPages (which have been banged in generate_stack_overflow_check)
+  // for the stub frame and the runtime frames.
+
+  set_last_Java_frame(R1_SP, noreg);
+
+  // ARG1 must hold thread address.
+  mr(R3_ARG1, R16_thread);
+
+  address return_pc = call_c_with_frame_resize(entry_point, /*No resize, we have a C compatible frame.*/0);
+
+  reset_last_Java_frame();
+
+  // Check for pending exceptions.
+  {
+    ld(R0, in_bytes(Thread::pending_exception_offset()), R16_thread);
+    cmpdi(CCR0, R0, 0);
+
+    // This used to conditionally jump to forward_exception however it is
+    // possible if we relocate that the branch will not reach. So we must jump
+    // around so we can always reach.
+
+    Label ok;
+    beq(CCR0, ok);
+
+    // Make sure that the vm_results are cleared.
+    if (oop_result1->is_valid() || metadata_result->is_valid()) {
+      li(R0, 0);
+      if (oop_result1->is_valid()) {
+        std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
+      }
+      if (metadata_result->is_valid()) {
+        std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
+      }
+    }
+
+    if (frame_size() == no_frame_size) {
+      ShouldNotReachHere(); // We always have a frame size.
+      //pop_frame(); // pop the stub frame
+      //ld(R0, _abi(lr), R1_SP);
+      //mtlr(R0);
+      //load_const_optimized(R0, StubRoutines::forward_exception_entry());
+      //mtctr(R0);
+      //bctr();
+    } else if (_stub_id == Runtime1::forward_exception_id) {
+      should_not_reach_here();
+    } else {
+      // keep stub frame for next call_RT
+      //load_const_optimized(R0, Runtime1::entry_for(Runtime1::forward_exception_id));
+      add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(Runtime1::entry_for(Runtime1::forward_exception_id)));
+      mtctr(R0);
+      bctr();
+    }
+
+    bind(ok);
+  }
+
+  // Get oop results if there are any and reset the values in the thread.
+  if (oop_result1->is_valid()) {
+    get_vm_result(oop_result1);
+  }
+  if (metadata_result->is_valid()) {
+    get_vm_result_2(metadata_result);
+  }
+
+  return (int)(return_pc - code_section()->start());
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
+  mr_if_needed(R4_ARG2, arg1);
+  return call_RT(oop_result1, metadata_result, entry, 1);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
+  mr_if_needed(R4_ARG2, arg1);
+  mr_if_needed(R5_ARG3, arg2); assert(arg2 != R4_ARG2, "smashed argument");
+  return call_RT(oop_result1, metadata_result, entry, 2);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
+  mr_if_needed(R4_ARG2, arg1);
+  mr_if_needed(R5_ARG3, arg2); assert(arg2 != R4_ARG2, "smashed argument");
+  mr_if_needed(R6_ARG4, arg3); assert(arg3 != R4_ARG2 && arg3 != R5_ARG3, "smashed argument");
+  return call_RT(oop_result1, metadata_result, entry, 3);
+}
+
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
+static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
+static int frame_size_in_bytes = -1;
+
+static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
+  assert(frame_size_in_bytes > frame::abi_reg_args_size, "init");
+  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+  int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+
+  int i;
+  for (i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    if (FrameMap::reg_needs_save(r)) {
+      int sp_offset = cpu_reg_save_offsets[i];
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset>>2), r->as_VMReg());
+      oop_map->set_callee_saved(VMRegImpl::stack2reg((sp_offset>>2) + 1), r->as_VMReg()->next());
+    }
+  }
+
+  if (save_fpu_registers) {
+    for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      FloatRegister r = as_FloatRegister(i);
+      int sp_offset = fpu_reg_save_offsets[i];
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset>>2), r->as_VMReg());
+      oop_map->set_callee_saved(VMRegImpl::stack2reg((sp_offset>>2) + 1), r->as_VMReg()->next());
+    }
+  }
+
+  return oop_map;
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true,
+                                   Register ret_pc = noreg, int stack_preserve = 0) {
+  if (ret_pc == noreg) {
+    ret_pc = R0;
+    __ mflr(ret_pc);
+  }
+  __ std(ret_pc, _abi(lr), R1_SP); // C code needs pc in C1 method.
+  __ push_frame(frame_size_in_bytes + stack_preserve, R0);
+
+  // Record volatile registers as callee-save values in an OopMap so
+  // their save locations will be propagated to the caller frame's
+  // RegisterMap during StackFrameStream construction (needed for
+  // deoptimization; see compiledVFrame::create_stack_value).
+  // OopMap frame sizes are in c2 stack slot sizes (sizeof(jint)).
+
+  int i;
+  for (i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    if (FrameMap::reg_needs_save(r)) {
+      int sp_offset = cpu_reg_save_offsets[i];
+      __ std(r, sp_offset + STACK_BIAS, R1_SP);
+    }
+  }
+
+  if (save_fpu_registers) {
+    for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      FloatRegister r = as_FloatRegister(i);
+      int sp_offset = fpu_reg_save_offsets[i];
+      __ stfd(r, sp_offset + STACK_BIAS, R1_SP);
+    }
+  }
+
+  return generate_oop_map(sasm, save_fpu_registers);
+}
+
+static void restore_live_registers(StubAssembler* sasm, Register result1, Register result2,
+                                   bool restore_fpu_registers = true) {
+  for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    if (FrameMap::reg_needs_save(r) && r != result1 && r != result2) {
+      int sp_offset = cpu_reg_save_offsets[i];
+      __ ld(r, sp_offset + STACK_BIAS, R1_SP);
+    }
+  }
+
+  if (restore_fpu_registers) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      FloatRegister r = as_FloatRegister(i);
+      int sp_offset = fpu_reg_save_offsets[i];
+      __ lfd(r, sp_offset + STACK_BIAS, R1_SP);
+    }
+  }
+
+  __ pop_frame();
+  __ ld(R0, _abi(lr), R1_SP);
+  __ mtlr(R0);
+}
+
+
+void Runtime1::initialize_pd() {
+  int i;
+  int sp_offset = frame::abi_reg_args_size;
+
+  for (i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    if (FrameMap::reg_needs_save(r)) {
+      cpu_reg_save_offsets[i] = sp_offset;
+      sp_offset += BytesPerWord;
+    }
+  }
+
+  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    fpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += BytesPerWord;
+  }
+  frame_size_in_bytes = align_size_up(sp_offset, frame::alignment_in_bytes);
+}
+
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+  // Make a frame and preserve the caller's caller-save registers.
+  OopMap* oop_map = save_live_registers(sasm);
+
+  int call_offset;
+  if (!has_argument) {
+    call_offset = __ call_RT(noreg, noreg, target);
+  } else {
+    call_offset = __ call_RT(noreg, noreg, target, R4_ARG2);
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  __ should_not_reach_here();
+  return oop_maps;
+}
+
+static OopMapSet* generate_exception_throw_with_stack_parms(StubAssembler* sasm, address target,
+                                                            int stack_parms) {
+  // Make a frame and preserve the caller's caller-save registers.
+  const int parm_size_in_bytes = align_size_up(stack_parms << LogBytesPerWord, frame::alignment_in_bytes);
+  const int padding = parm_size_in_bytes - (stack_parms << LogBytesPerWord);
+  OopMap* oop_map = save_live_registers(sasm, true, noreg, parm_size_in_bytes);
+
+  int call_offset = 0;
+  switch (stack_parms) {
+    case 3:
+    __ ld(R6_ARG4, frame_size_in_bytes + padding + 16, R1_SP);
+    case 2:
+    __ ld(R5_ARG3, frame_size_in_bytes + padding + 8, R1_SP);
+    case 1:
+    __ ld(R4_ARG2, frame_size_in_bytes + padding + 0, R1_SP);
+    call_offset = __ call_RT(noreg, noreg, target);
+    break;
+    default: Unimplemented(); break;
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  __ should_not_reach_here();
+  return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_stub_call(StubAssembler* sasm, Register result, address target,
+                                        Register arg1, Register arg2, Register arg3) {
+  // Make a frame and preserve the caller's caller-save registers.
+  OopMap* oop_map = save_live_registers(sasm);
+
+  int call_offset;
+  if (arg1 == noreg) {
+    call_offset = __ call_RT(result, noreg, target);
+  } else if (arg2 == noreg) {
+    call_offset = __ call_RT(result, noreg, target, arg1);
+  } else if (arg3 == noreg) {
+    call_offset = __ call_RT(result, noreg, target, arg1, arg2);
+  } else {
+    call_offset = __ call_RT(result, noreg, target, arg1, arg2, arg3);
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  restore_live_registers(sasm, result, noreg);
+  __ blr();
+  return oop_maps;
+}
+
+static OopMapSet* stub_call_with_stack_parms(StubAssembler* sasm, Register result, address target,
+                                             int stack_parms, bool do_return = true) {
+  // Make a frame and preserve the caller's caller-save registers.
+  const int parm_size_in_bytes = align_size_up(stack_parms << LogBytesPerWord, frame::alignment_in_bytes);
+  const int padding = parm_size_in_bytes - (stack_parms << LogBytesPerWord);
+  OopMap* oop_map = save_live_registers(sasm, true, noreg, parm_size_in_bytes);
+
+  int call_offset = 0;
+  switch (stack_parms) {
+    case 3:
+    __ ld(R6_ARG4, frame_size_in_bytes + padding + 16, R1_SP);
+    case 2:
+    __ ld(R5_ARG3, frame_size_in_bytes + padding + 8, R1_SP);
+    case 1:
+    __ ld(R4_ARG2, frame_size_in_bytes + padding + 0, R1_SP);
+    call_offset = __ call_RT(result, noreg, target);
+    break;
+    default: Unimplemented(); break;
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  restore_live_registers(sasm, result, noreg);
+  if (do_return) __ blr();
+  return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+  // Make a frame and preserve the caller's caller-save registers.
+  OopMap* oop_map = save_live_registers(sasm);
+
+  // Call the runtime patching routine, returns non-zero if nmethod got deopted.
+  int call_offset = __ call_RT(noreg, noreg, target);
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+  __ cmpdi(CCR0, R3_RET, 0);
+
+  // Re-execute the patched instruction or, if the nmethod was deoptmized,
+  // return to the deoptimization handler entry that will cause re-execution
+  // of the current bytecode.
+  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+  assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+  // Return to the deoptimization handler entry for unpacking and rexecute.
+  // If we simply returned the we'd deopt as if any call we patched had just
+  // returned.
+
+  restore_live_registers(sasm, noreg, noreg);
+  // Return if patching routine returned 0.
+  __ bclr(Assembler::bcondCRbiIs1, Assembler::bi0(CCR0, Assembler::equal), Assembler::bhintbhBCLRisReturn);
+
+  address stub = deopt_blob->unpack_with_reexecution();
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ bctr();
+
+  return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+  OopMapSet* oop_maps = NULL;
+
+  // For better readability.
+  const bool must_gc_arguments = true;
+  const bool dont_gc_arguments = false;
+
+  // Stub code & info for the different stubs.
+  switch (id) {
+    case forward_exception_id:
+      {
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case new_instance_id:
+    case fast_new_instance_id:
+    case fast_new_instance_init_check_id:
+      {
+        if (id == new_instance_id) {
+          __ set_info("new_instance", dont_gc_arguments);
+        } else if (id == fast_new_instance_id) {
+          __ set_info("fast new_instance", dont_gc_arguments);
+        } else {
+          assert(id == fast_new_instance_init_check_id, "bad StubID");
+          __ set_info("fast new_instance init check", dont_gc_arguments);
+        }
+        // We don't support eden allocation.
+//        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
+//            UseTLAB && FastTLABRefill) {
+//          if (id == fast_new_instance_init_check_id) {
+//            // make sure the klass is initialized
+//            __ lbz(R0, in_bytes(InstanceKlass::init_state_offset()), R3_ARG1);
+//            __ cmpwi(CCR0, R0, InstanceKlass::fully_initialized);
+//            __ bne(CCR0, slow_path);
+//          }
+//#ifdef ASSERT
+//          // assert object can be fast path allocated
+//          {
+//            Label ok, not_ok;
+//          __ lwz(R0, in_bytes(Klass::layout_helper_offset()), R3_ARG1);
+//          // make sure it's an instance (LH > 0)
+//          __ cmpwi(CCR0, R0, 0);
+//          __ ble(CCR0, not_ok);
+//          __ testbitdi(CCR0, R0, R0, Klass::_lh_instance_slow_path_bit);
+//          __ beq(CCR0, ok);
+//
+//          __ bind(not_ok);
+//          __ stop("assert(can be fast path allocated)");
+//          __ bind(ok);
+//          }
+//#endif // ASSERT
+//          // We don't support eden allocation.
+//          __ bind(slow_path);
+//        }
+        oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_instance), R4_ARG2);
+      }
+      break;
+
+    case counter_overflow_id:
+        // Bci and method are on stack.
+        oop_maps = stub_call_with_stack_parms(sasm, noreg, CAST_FROM_FN_PTR(address, counter_overflow), 2);
+      break;
+
+    case new_type_array_id:
+    case new_object_array_id:
+      {
+        if (id == new_type_array_id) {
+          __ set_info("new_type_array", dont_gc_arguments);
+        } else {
+          __ set_info("new_object_array", dont_gc_arguments);
+        }
+
+#ifdef ASSERT
+        // Assert object type is really an array of the proper kind.
+        {
+          int tag = (id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value;
+          Label ok;
+          __ lwz(R0, in_bytes(Klass::layout_helper_offset()), R4_ARG2);
+          __ srawi(R0, R0, Klass::_lh_array_tag_shift);
+          __ cmpwi(CCR0, R0, tag);
+          __ beq(CCR0, ok);
+          __ stop("assert(is an array klass)");
+          __ should_not_reach_here();
+          __ bind(ok);
+        }
+#endif // ASSERT
+
+        // We don't support eden allocation.
+
+        if (id == new_type_array_id) {
+          oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_type_array), R4_ARG2, R5_ARG3);
+        } else {
+          oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_object_array), R4_ARG2, R5_ARG3);
+        }
+      }
+      break;
+
+    case new_multi_array_id:
+      {
+        // R4: klass
+        // R5: rank
+        // R6: address of 1st dimension
+        __ set_info("new_multi_array", dont_gc_arguments);
+        oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_multi_array), R4_ARG2, R5_ARG3, R6_ARG4);
+      }
+      break;
+
+    case register_finalizer_id:
+      {
+        __ set_info("register_finalizer", dont_gc_arguments);
+        // This code is called via rt_call. Hence, caller-save registers have been saved.
+        Register t = R11_scratch1;
+
+        // Load the klass and check the has finalizer flag.
+        __ load_klass(t, R3_ARG1);
+        __ lwz(t, in_bytes(Klass::access_flags_offset()), t);
+        __ testbitdi(CCR0, R0, t, exact_log2(JVM_ACC_HAS_FINALIZER));
+        // Return if has_finalizer bit == 0 (CR0.eq).
+        __ bclr(Assembler::bcondCRbiIs1, Assembler::bi0(CCR0, Assembler::equal), Assembler::bhintbhBCLRisReturn);
+
+        __ mflr(R0);
+        __ std(R0, _abi(lr), R1_SP);
+        __ push_frame(frame::abi_reg_args_size, R0); // Empty dummy frame (no callee-save regs).
+        sasm->set_frame_size(frame::abi_reg_args_size / BytesPerWord);
+        OopMap* oop_map = new OopMap(frame::abi_reg_args_size / sizeof(jint), 0);
+        int call_offset = __ call_RT(noreg, noreg,
+                                     CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), R3_ARG1);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        __ pop_frame();
+        __ ld(R0, _abi(lr), R1_SP);
+        __ mtlr(R0);
+        __ blr();
+      }
+      break;
+
+    case throw_range_check_failed_id:
+      {
+        __ set_info("range_check_failed", dont_gc_arguments); // Arguments will be discarded.
+        __ std(R0, -8, R1_SP); // Pass index on stack.
+        oop_maps = generate_exception_throw_with_stack_parms(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), 1);
+      }
+      break;
+
+    case throw_index_exception_id:
+      {
+        __ set_info("index_range_check_failed", dont_gc_arguments); // Arguments will be discarded.
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+      }
+      break;
+
+    case throw_div0_exception_id:
+      {
+        __ set_info("throw_div0_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+      }
+      break;
+
+    case throw_null_pointer_exception_id:
+      {
+        __ set_info("throw_null_pointer_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+      }
+      break;
+
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      {
+        __ set_info("handle_exception", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case handle_exception_from_callee_id:
+      {
+        __ set_info("handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case unwind_exception_id:
+      {
+        const Register Rexception    = R3 /*LIRGenerator::exceptionOopOpr()*/,
+                       Rexception_pc = R4 /*LIRGenerator::exceptionPcOpr()*/,
+                       Rexception_save = R31, Rcaller_sp = R30;
+        __ set_info("unwind_exception", dont_gc_arguments);
+
+        __ ld(Rcaller_sp, 0, R1_SP);
+        __ push_frame_reg_args(0, R0); // dummy frame for C call
+        __ mr(Rexception_save, Rexception); // save over C call
+        __ ld(Rexception_pc, _abi(lr), Rcaller_sp); // return pc
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, Rexception_pc);
+        __ verify_not_null_oop(Rexception_save);
+        __ mtctr(R3_RET);
+        __ ld(Rexception_pc, _abi(lr), Rcaller_sp); // return pc
+        __ mr(R1_SP, Rcaller_sp); // Pop both frames at once.
+        __ mr(Rexception, Rexception_save); // restore
+        __ mtlr(Rexception_pc);
+        __ bctr();
+      }
+      break;
+
+    case throw_array_store_exception_id:
+      {
+        __ set_info("throw_array_store_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+      }
+      break;
+
+    case throw_class_cast_exception_id:
+      {
+        __ set_info("throw_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+      }
+      break;
+
+    case throw_incompatible_class_change_error_id:
+      {
+        __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+      }
+      break;
+
+    case slow_subtype_check_id:
+      { // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super );
+        const Register sub_klass = R5,
+                       super_klass = R4,
+                       temp1_reg = R6,
+                       temp2_reg = R0;
+        __ check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg); // returns with CR0.eq if successful
+        __ crandc(CCR0, Assembler::equal, CCR0, Assembler::equal); // failed: CR0.ne
+        __ blr();
+      }
+      break;
+
+    case monitorenter_nofpu_id:
+    case monitorenter_id:
+      {
+        __ set_info("monitorenter", dont_gc_arguments);
+
+        int save_fpu_registers = (id == monitorenter_id);
+        // Make a frame and preserve the caller's caller-save registers.
+        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), R4_ARG2, R5_ARG3);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        restore_live_registers(sasm, noreg, noreg, save_fpu_registers);
+        __ blr();
+      }
+      break;
+
+    case monitorexit_nofpu_id:
+    case monitorexit_id:
+      {
+        // note: Really a leaf routine but must setup last java sp
+        //       => use call_RT for now (speed can be improved by
+        //       doing last java sp setup manually).
+        __ set_info("monitorexit", dont_gc_arguments);
+
+        int save_fpu_registers = (id == monitorexit_id);
+        // Make a frame and preserve the caller's caller-save registers.
+        OopMap* oop_map = save_live_registers(sasm, save_fpu_registers);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), R4_ARG2);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        restore_live_registers(sasm, noreg, noreg, save_fpu_registers);
+        __ blr();
+      }
+      break;
+
+    case deoptimize_id:
+      {
+        __ set_info("deoptimize", dont_gc_arguments);
+        __ std(R0, -8, R1_SP); // Pass trap_request on stack.
+        oop_maps = stub_call_with_stack_parms(sasm, noreg, CAST_FROM_FN_PTR(address, deoptimize), 1, /*do_return*/ false);
+
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        address stub = deopt_blob->unpack_with_reexecution();
+        //__ load_const_optimized(R0, stub);
+        __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+        __ mtctr(R0);
+        __ bctr();
+      }
+      break;
+
+    case access_field_patching_id:
+      {
+        __ set_info("access_field_patching", dont_gc_arguments);
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+      }
+      break;
+
+    case load_klass_patching_id:
+      {
+        __ set_info("load_klass_patching", dont_gc_arguments);
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+      }
+      break;
+
+    case load_mirror_patching_id:
+      {
+        __ set_info("load_mirror_patching", dont_gc_arguments);
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+      }
+      break;
+
+    case load_appendix_patching_id:
+      {
+        __ set_info("load_appendix_patching", dont_gc_arguments);
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+      }
+      break;
+
+    case dtrace_object_alloc_id:
+      { // O0: object
+        __ unimplemented("stub dtrace_object_alloc_id");
+        __ set_info("dtrace_object_alloc", dont_gc_arguments);
+//        // We can't gc here so skip the oopmap but make sure that all
+//        // the live registers get saved.
+//        save_live_registers(sasm);
+//
+//        __ save_thread(L7_thread_cache);
+//        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc),
+//                relocInfo::runtime_call_type);
+//        __ delayed()->mov(I0, O0);
+//        __ restore_thread(L7_thread_cache);
+//
+//        restore_live_registers(sasm);
+//        __ ret();
+//        __ delayed()->restore();
+      }
+      break;
+
+#if INCLUDE_ALL_GCS
+    case g1_pre_barrier_slow_id:
+      {
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          goto unimplemented_entry;
+        }
+
+        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+        // Using stack slots: pre_val (pre-pushed), spill tmp, spill tmp2.
+        const int stack_slots = 3;
+        Register pre_val = R0; // previous value of memory
+        Register tmp  = R14;
+        Register tmp2 = R15;
+
+        Label refill, restart;
+        int satb_q_index_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   SATBMarkQueue::byte_offset_of_index());
+        int satb_q_buf_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   SATBMarkQueue::byte_offset_of_buf());
+
+        // Spill
+        __ std(tmp, -16, R1_SP);
+        __ std(tmp2, -24, R1_SP);
+
+        __ bind(restart);
+        // Load the index into the SATB buffer. SATBMarkQueue::_index is a
+        // size_t so ld_ptr is appropriate.
+        __ ld(tmp, satb_q_index_byte_offset, R16_thread);
+
+        // index == 0?
+        __ cmpdi(CCR0, tmp, 0);
+        __ beq(CCR0, refill);
+
+        __ ld(tmp2, satb_q_buf_byte_offset, R16_thread);
+        __ ld(pre_val, -8, R1_SP); // Load from stack.
+        __ addi(tmp, tmp, -oopSize);
+
+        __ std(tmp, satb_q_index_byte_offset, R16_thread);
+        __ stdx(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card>
+
+        // Restore temp registers and return-from-leaf.
+        __ ld(tmp2, -24, R1_SP);
+        __ ld(tmp, -16, R1_SP);
+        __ blr();
+
+        __ bind(refill);
+        const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
+        __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
+        __ mflr(R0);
+        __ std(R0, _abi(lr), R1_SP);
+        __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SATBMarkQueueSet::handle_zero_index_for_thread), R16_thread);
+        __ pop_frame();
+        __ ld(R0, _abi(lr), R1_SP);
+        __ mtlr(R0);
+        __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
+        __ b(restart);
+      }
+      break;
+
+  case g1_post_barrier_slow_id:
+    {
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          goto unimplemented_entry;
+        }
+
+        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+        // Using stack slots: spill addr, spill tmp2
+        const int stack_slots = 2;
+        Register tmp = R0;
+        Register addr = R14;
+        Register tmp2 = R15;
+        jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
+
+        Label restart, refill, ret;
+
+        // Spill
+        __ std(addr, -8, R1_SP);
+        __ std(tmp2, -16, R1_SP);
+
+        __ srdi(addr, R0, CardTableModRefBS::card_shift); // Addr is passed in R0.
+        __ load_const_optimized(/*cardtable*/ tmp2, byte_map_base, tmp);
+        __ add(addr, tmp2, addr);
+        __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
+
+        // Return if young card.
+        __ cmpwi(CCR0, tmp, G1SATBCardTableModRefBS::g1_young_card_val());
+        __ beq(CCR0, ret);
+
+        // Return if sequential consistent value is already dirty.
+        __ membar(Assembler::StoreLoad);
+        __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
+
+        __ cmpwi(CCR0, tmp, G1SATBCardTableModRefBS::dirty_card_val());
+        __ beq(CCR0, ret);
+
+        // Not dirty.
+
+        // First, dirty it.
+        __ li(tmp, G1SATBCardTableModRefBS::dirty_card_val());
+        __ stb(tmp, 0, addr);
+
+        int dirty_card_q_index_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   DirtyCardQueue::byte_offset_of_index());
+        int dirty_card_q_buf_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   DirtyCardQueue::byte_offset_of_buf());
+
+        __ bind(restart);
+
+        // Get the index into the update buffer. DirtyCardQueue::_index is
+        // a size_t so ld_ptr is appropriate here.
+        __ ld(tmp2, dirty_card_q_index_byte_offset, R16_thread);
+
+        // index == 0?
+        __ cmpdi(CCR0, tmp2, 0);
+        __ beq(CCR0, refill);
+
+        __ ld(tmp, dirty_card_q_buf_byte_offset, R16_thread);
+        __ addi(tmp2, tmp2, -oopSize);
+
+        __ std(tmp2, dirty_card_q_index_byte_offset, R16_thread);
+        __ add(tmp2, tmp, tmp2);
+        __ std(addr, 0, tmp2); // [_buf + index] := <address_of_card>
+
+        // Restore temp registers and return-from-leaf.
+        __ bind(ret);
+        __ ld(tmp2, -16, R1_SP);
+        __ ld(addr, -8, R1_SP);
+        __ blr();
+
+        __ bind(refill);
+        const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
+        __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
+        __ mflr(R0);
+        __ std(R0, _abi(lr), R1_SP);
+        __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, DirtyCardQueueSet::handle_zero_index_for_thread), R16_thread);
+        __ pop_frame();
+        __ ld(R0, _abi(lr), R1_SP);
+        __ mtlr(R0);
+        __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
+        __ b(restart);
+      }
+      break;
+#endif // INCLUDE_ALL_GCS
+
+    case predicate_failed_trap_id:
+      {
+        __ set_info("predicate_failed_trap", dont_gc_arguments);
+        OopMap* oop_map = save_live_registers(sasm);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        restore_live_registers(sasm, noreg, noreg);
+
+        address stub = deopt_blob->unpack_with_reexecution();
+        //__ load_const_optimized(R0, stub);
+        __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+        __ mtctr(R0);
+        __ bctr();
+      }
+      break;
+
+  default:
+  unimplemented_entry:
+      {
+        __ set_info("unimplemented entry", dont_gc_arguments);
+        __ mflr(R0);
+        __ std(R0, _abi(lr), R1_SP);
+        __ push_frame(frame::abi_reg_args_size, R0); // empty dummy frame
+        sasm->set_frame_size(frame::abi_reg_args_size / BytesPerWord);
+        OopMap* oop_map = new OopMap(frame::abi_reg_args_size / sizeof(jint), 0);
+
+        __ load_const_optimized(R4_ARG2, (int)id);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), R4_ARG2);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        __ should_not_reach_here();
+      }
+      break;
+  }
+  return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
+  __ block_comment("generate_handle_exception");
+
+  // Save registers, if required.
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* oop_map = NULL;
+  const Register Rexception    = R3 /*LIRGenerator::exceptionOopOpr()*/,
+                 Rexception_pc = R4 /*LIRGenerator::exceptionPcOpr()*/;
+
+  switch (id) {
+  case forward_exception_id:
+    // We're handling an exception in the context of a compiled frame.
+    // The registers have been saved in the standard places. Perform
+    // an exception lookup in the caller and dispatch to the handler
+    // if found. Otherwise unwind and dispatch to the callers
+    // exception handler.
+    oop_map = generate_oop_map(sasm, true);
+    // Transfer the pending exception to the exception_oop.
+    // Also load the PC which is typically at SP + frame_size_in_bytes + _abi(lr),
+    // but we support additional slots in the frame for parameter passing.
+    __ ld(Rexception_pc, 0, R1_SP);
+    __ ld(Rexception, in_bytes(JavaThread::pending_exception_offset()), R16_thread);
+    __ li(R0, 0);
+    __ ld(Rexception_pc, _abi(lr), Rexception_pc);
+    __ std(R0, in_bytes(JavaThread::pending_exception_offset()), R16_thread);
+    break;
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // At this point all registers MAY be live.
+    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id, Rexception_pc);
+    break;
+  case handle_exception_from_callee_id:
+    // At this point all registers except exception oop and exception pc are dead.
+    oop_map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
+    sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+    __ std(Rexception_pc, _abi(lr), R1_SP);
+    __ push_frame(frame_size_in_bytes, R0);
+    break;
+  default:  ShouldNotReachHere();
+  }
+
+  __ verify_not_null_oop(Rexception);
+
+#ifdef ASSERT
+  // Check that fields in JavaThread for exception oop and issuing pc are
+  // empty before writing to them.
+  __ ld(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+  __ cmpdi(CCR0, R0, 0);
+  __ asm_assert_eq("exception oop already set", 0x963);
+  __ ld(R0, in_bytes(JavaThread::exception_pc_offset() ), R16_thread);
+  __ cmpdi(CCR0, R0, 0);
+  __ asm_assert_eq("exception pc already set", 0x962);
+#endif
+
+  // Save the exception and issuing pc in the thread.
+  __ std(Rexception,    in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+  __ std(Rexception_pc, in_bytes(JavaThread::exception_pc_offset() ), R16_thread);
+
+  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  __ mtctr(R3_RET);
+
+  // Note: if nmethod has been deoptimized then regardless of
+  // whether it had a handler or not we will deoptimize
+  // by entering the deopt blob with a pending exception.
+
+  // Restore the registers that were saved at the beginning, remove
+  // the frame and jump to the exception handler.
+  switch (id) {
+  case forward_exception_id:
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    restore_live_registers(sasm, noreg, noreg, id != handle_exception_nofpu_id);
+    __ bctr();
+    break;
+  case handle_exception_from_callee_id: {
+    __ pop_frame();
+    __ ld(Rexception_pc, _abi(lr), R1_SP);
+    __ mtlr(Rexception_pc);
+    __ bctr();
+    break;
+  }
+  default:  ShouldNotReachHere();
+  }
+
+  return oop_maps;
+}
+
+const char *Runtime1::pd_name_for_address(address entry) {
+  return "<unknown function>";
+}
+
+#undef __

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/ppc/vm/c1_globals_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_GLOBALS_PPC_HPP
+#define CPU_PPC_VM_C1_GLOBALS_PPC_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+
+#ifndef TIERED
+define_pd_global(bool, BackgroundCompilation,        true );
+define_pd_global(bool, CICompileOSR,                 true );
+define_pd_global(bool, InlineIntrinsics,             true );
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 false);
+define_pd_global(bool, UseOnStackReplacement,        true );
+define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(intx, CompileThreshold,             1000 );
+
+define_pd_global(intx, OnStackReplacePercentage,     1400 );
+define_pd_global(bool, UseTLAB,                      true );
+define_pd_global(bool, ProfileInterpreter,           false);
+define_pd_global(intx, FreqInlineSize,               325  );
+define_pd_global(bool, ResizeTLAB,                   true );
+define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+define_pd_global(uintx,CodeCacheMinBlockLength,      1);
+define_pd_global(uintx,MetaspaceSize,                12*M );
+define_pd_global(bool, NeverActAsServerClassMachine, true );
+define_pd_global(intx, NewSizeThreadIncrease,        16*K );
+define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
+define_pd_global(intx, InitialCodeCacheSize,         160*K);
+#endif // !TIERED
+
+define_pd_global(bool, UseTypeProfile,               false);
+define_pd_global(bool, RoundFPResults,               false);
+
+define_pd_global(bool, LIRFillDelaySlots,            false);
+define_pd_global(bool, OptimizeSinglePrecision,      false);
+define_pd_global(bool, CSEArrayLength,               true );
+define_pd_global(bool, TwoOperandLIRForm,            false);
+
+#endif // CPU_PPC_VM_C1_GLOBALS_PPC_HPP

--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -39,7 +39,7 @@
 define_pd_global(bool, ProfileTraps,                 true);
 define_pd_global(bool, UseOnStackReplacement,        true);
 define_pd_global(bool, ProfileInterpreter,           true);
-define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(bool, TieredCompilation,            true);
 define_pd_global(intx, CompileThreshold,             10000);
 
 define_pd_global(intx, OnStackReplacePercentage,     140);

--- a/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,4 +45,8 @@
       FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);
     }
   }
+
+  if (!VM_Version::has_isel() && FLAG_IS_DEFAULT(ConditionalMoveLimit)) {
+    FLAG_SET_ERGO(intx, ConditionalMoveLimit, 0);
+  }
 }

--- a/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,13 +130,20 @@
   // - call
   __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
   AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL);
-  __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()), ic, reg_scratch);
+  bool success = __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()),
+                                               ic, reg_scratch, /*fixed_size*/ true);
+  if (!success) {
+    return NULL; // CodeCache is full
+  }
 
   if (ReoptimizeCallSequences) {
     __ b64_patchable((address)-1, relocInfo::none);
   } else {
     AddressLiteral a((address)-1);
-    __ load_const_from_method_toc(reg_scratch, a, reg_scratch);
+    success = __ load_const_from_method_toc(reg_scratch, a, reg_scratch, /*fixed_size*/ true);
+    if (!success) {
+      return NULL; // CodeCache is full
+    }
     __ mtctr(reg_scratch);
     __ bctr();
   }
@@ -153,6 +161,7 @@
   return stub;
 #else
   ShouldNotReachHere();
+  return NULL;
 #endif
 }
 #undef __

--- a/hotspot/src/cpu/ppc/vm/frame_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -271,39 +271,6 @@
 }
 #endif
 
-void frame::adjust_unextended_sp() {
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP. The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
-
-  if (is_compiled_frame() && false /*is_at_mh_callsite()*/) {  // TODO PPC port
-    // If the sender PC is a deoptimization point, get the original
-    // PC. For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    _unextended_sp = _fp - _cb->frame_size();
-
-#ifdef ASSERT
-    nmethod *sender_nm = _cb->as_nmethod_or_null();
-    assert(sender_nm && *_sp == *_unextended_sp, "backlink changed");
-
-    intptr_t* sp = _unextended_sp;  // check if stack can be walked from here
-    for (int x = 0; x < 5; ++x) {   // check up to a couple of backlinks
-      intptr_t* prev_sp = *(intptr_t**)sp;
-      if (prev_sp == 0) break;      // end of stack
-      assert(prev_sp>sp, "broken stack");
-      sp = prev_sp;
-    }
-
-    if (sender_nm->is_deopt_mh_entry(_pc)) { // checks for deoptimization
-      address original_pc = sender_nm->get_original_pc(this);
-      assert(sender_nm->insts_contains(original_pc), "original PC must be in nmethod");
-      assert(sender_nm->is_method_handle_return(original_pc), "must be");
-    }
-#endif
-  }
-}
-
 intptr_t *frame::initial_deoptimization_info() {
   // unused... but returns fp() to minimize changes introduced by 7087445
   return fp();

--- a/hotspot/src/cpu/ppc/vm/frame_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -465,7 +465,6 @@
   // The frame's stack pointer before it has been extended by a c2i adapter;
   // needed by deoptimization
   intptr_t* _unextended_sp;
-  void adjust_unextended_sp();
 
  public:

--- a/hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -39,9 +39,6 @@
   _pc = pc;   // Must be set for get_deopt_original_pc()
 
   _fp = (intptr_t*)own_abi()->callers_sp;
-  // Use _fp - frame_size, needs to be done between _cb and _pc initialization
-  // and get_deopt_original_pc.
-  adjust_unextended_sp();
 
   address original_pc = nmethod::get_deopt_original_pc(this);
   if (original_pc != NULL) {

--- a/hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -36,4 +36,7 @@
 // The PPC CPUs are NOT multiple-copy-atomic.
 #define CPU_NOT_MULTIPLE_COPY_ATOMIC
 
+// The expected size in bytes of a cache line, used to pad data structures.
+#define DEFAULT_CACHE_LINE_SIZE 128
+
 #endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP

--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -93,9 +93,9 @@
 // own dispatch. The dispatch address in R24_dispatch_addr is used for the
 // dispatch.
 void InterpreterMacroAssembler::dispatch_epilog(TosState state, int bcp_incr) {
+  if (bcp_incr) { addi(R14_bcp, R14_bcp, bcp_incr); }
   mtctr(R24_dispatch_addr);
-  addi(R14_bcp, R14_bcp, bcp_incr);
-  bctr();
+  bcctr(bcondAlways, 0, bhintbhBCCTRisNotPredictable);
 }
 
 void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) {
@@ -212,9 +212,6 @@
     unimplemented("dispatch_Lbyte_code: verify"); // See Sparc Implementation to implement this
   }
 
-#ifdef FAST_DISPATCH
-  unimplemented("dispatch_Lbyte_code FAST_DISPATCH");
-#else
   assert_different_registers(bytecode, R11_scratch1);
 
   // Calc dispatch table address.
@@ -225,8 +222,7 @@
 
   // Jump off!
   mtctr(R11_scratch1);
-  bctr();
-#endif
+  bcctr(bcondAlways, 0, bhintbhBCCTRisNotPredictable);
 }
 
 void InterpreterMacroAssembler::load_receiver(Register Rparam_count, Register Rrecv_dst) {
@@ -546,8 +542,8 @@
   sldi(RsxtIndex, RsxtIndex, index_shift);
   blt(CCR0, LnotOOR);
   // Index should be in R17_tos, array should be in R4_ARG2.
-  mr(R17_tos, Rindex);
-  mr(R4_ARG2, Rarray);
+  mr_if_needed(R17_tos, Rindex);
+  mr_if_needed(R4_ARG2, Rarray);
   load_dispatch_table(Rtmp, (address*)Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
   mtctr(Rtmp);
   bctr();
@@ -842,7 +838,6 @@
 
     // Must fence, otherwise, preceding store(s) may float below cmpxchg.
     // CmpxchgX sets CCR0 to cmpX(current, displaced).
-    fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
     cmpxchgd(/*flag=*/CCR0,
              /*current_value=*/current_header,
              /*compare_value=*/displaced_header, /*exchange_value=*/monitor,
@@ -850,7 +845,8 @@
              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
              MacroAssembler::cmpxchgx_hint_acquire_lock(),
              noreg,
-             &cas_failed);
+             &cas_failed,
+             /*check without membar and ldarx first*/true);
 
     // If the compare-and-exchange succeeded, then we found an unlocked
     // object and we have now locked it.
@@ -868,9 +864,7 @@
     sub(current_header, current_header, R1_SP);
 
     assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-    load_const_optimized(tmp,
-                         (address) (~(os::vm_page_size()-1) |
-                                    markOopDesc::lock_mask_in_place));
+    load_const_optimized(tmp, ~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place);
 
     and_(R0/*==0?*/, current_header, tmp);
     // If condition is true we are done and hence we can store 0 in the displaced
@@ -1107,6 +1101,7 @@
 }
 
 void InterpreterMacroAssembler::test_invocation_counter_for_mdp(Register invocation_count,
+                                                                Register method_counters,
                                                                 Register Rscratch,
                                                                 Label &profile_continue) {
   assert(ProfileInterpreter, "must be profiling interpreter");
@@ -1115,12 +1110,11 @@
   Label done;
 
   // If no method data exists, and the counter is high enough, make one.
-  int ipl_offs = load_const_optimized(Rscratch, &InvocationCounter::InterpreterProfileLimit, R0, true);
-  lwz(Rscratch, ipl_offs, Rscratch);
+  lwz(Rscratch, in_bytes(MethodCounters::interpreter_profile_limit_offset()), method_counters);
 
   cmpdi(CCR0, R28_mdx, 0);
   // Test to see if we should create a method data oop.
-  cmpd(CCR1, Rscratch /* InterpreterProfileLimit */, invocation_count);
+  cmpd(CCR1, Rscratch, invocation_count);
   bne(CCR0, done);
   bge(CCR1, profile_continue);
 
@@ -1133,15 +1127,15 @@
   bind(done);
 }
 
-void InterpreterMacroAssembler::test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp) {
-  assert_different_registers(backedge_count, Rtmp, branch_bcp);
+void InterpreterMacroAssembler::test_backedge_count_for_osr(Register backedge_count, Register method_counters,
+                                                            Register target_bcp, Register disp, Register Rtmp) {
+  assert_different_registers(backedge_count, target_bcp, disp, Rtmp, R4_ARG2);
   assert(UseOnStackReplacement,"Must UseOnStackReplacement to test_backedge_count_for_osr");
 
   Label did_not_overflow;
   Label overflow_with_error;
 
-  int ibbl_offs = load_const_optimized(Rtmp, &InvocationCounter::InterpreterBackwardBranchLimit, R0, true);
-  lwz(Rtmp, ibbl_offs, Rtmp);
+  lwz(Rtmp, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()), method_counters);
   cmpw(CCR0, backedge_count, Rtmp);
 
   blt(CCR0, did_not_overflow);
@@ -1153,17 +1147,15 @@
   // the overflow function is called only once every overflow_frequency.
   if (ProfileInterpreter) {
     const int overflow_frequency = 1024;
-    li(Rtmp, overflow_frequency-1);
-    andr(Rtmp, Rtmp, backedge_count);
-    cmpwi(CCR0, Rtmp, 0);
+    andi_(Rtmp, backedge_count, overflow_frequency-1);
     bne(CCR0, did_not_overflow);
   }
 
   // Overflow in loop, pass branch bytecode.
-  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), branch_bcp, true);
+  subf(R4_ARG2, disp, target_bcp); // Compute branch bytecode (previous bcp).
+  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);
 
   // Was an OSR adapter generated?
-  // O0 = osr nmethod
   cmpdi(CCR0, R3_RET, 0);
   beq(CCR0, overflow_with_error);
 
@@ -1324,7 +1316,7 @@
   assert_different_registers(Rdst, Rtmp1);
   const Register invocation_counter = Rtmp1;
   const Register counter = Rdst;
-  // TODO ppc port assert(4 == InvocationCounter::sz_counter(), "unexpected field size.");
+  // TODO: PPC port: assert(4 == InvocationCounter::sz_counter(), "unexpected field size.");
 
   // Load backedge counter.
   lwz(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
@@ -1337,8 +1329,7 @@
   addi(counter, counter, InvocationCounter::count_increment);
 
   // Mask the invocation counter.
-  li(Rscratch, InvocationCounter::count_mask_value);
-  andr(invocation_counter, invocation_counter, Rscratch);
+  andi(invocation_counter, invocation_counter, InvocationCounter::count_mask_value);
 
   // Store new counter value.
   stw(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
@@ -1817,15 +1808,13 @@
     test_method_data_pointer(profile_continue);
 
     if (MethodData::profile_return_jsr292_only()) {
-      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
-
       // If we don't profile all invoke bytecodes we must make sure
       // it's a bytecode we indeed profile. We can't go back to the
       // begining of the ProfileData we intend to update to check its
       // type because we're right after it and we don't known its
       // length.
       lbz(tmp1, 0, R14_bcp);
-      lhz(tmp2, Method::intrinsic_id_offset_in_bytes(), R19_method);
+      lbz(tmp2, Method::intrinsic_id_offset_in_bytes(), R19_method);
       cmpwi(CCR0, tmp1, Bytecodes::_invokedynamic);
       cmpwi(CCR1, tmp1, Bytecodes::_invokehandle);
       cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
@@ -2207,9 +2196,7 @@
   // Load the backedge counter.
   lwz(backedge_count, be_counter_offset, Rcounters); // is unsigned int
   // Mask the backedge counter.
-  Register tmp = invocation_count;
-  li(tmp, InvocationCounter::count_mask_value);
-  andr(backedge_count, tmp, backedge_count); // Cannot use andi, need sign extension of count_mask_value.
+  andi(backedge_count, backedge_count, InvocationCounter::count_mask_value);
 
   // Load the invocation counter.
   lwz(invocation_count, inv_counter_offset, Rcounters); // is unsigned int
@@ -2266,7 +2253,7 @@
   bne(CCR0, test);
 
   address fd = CAST_FROM_FN_PTR(address, verify_return_address);
-  const int nbytes_save = 11*8; // volatile gprs except R0
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
   save_volatile_gprs(R1_SP, -nbytes_save); // except R0
   save_LR_CR(Rtmp); // Save in old frame.
   push_frame_reg_args(nbytes_save, Rtmp);

--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -203,7 +203,7 @@
   void restore_interpreter_state(Register scratch, bool bcp_and_mdx_only = false);
 
   void increment_backedge_counter(const Register Rcounters, Register Rtmp, Register Rtmp2, Register Rscratch);
-  void test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp);
+  void test_backedge_count_for_osr(Register backedge_count, Register method_counters, Register target_bcp, Register disp, Register Rtmp);
 
   void record_static_call_in_profile(Register Rentry, Register Rtmp);
   void record_receiver_call_in_profile(Register Rklass, Register Rentry, Register Rtmp);
@@ -222,7 +222,7 @@
   void set_method_data_pointer_for_bcp();
   void test_method_data_pointer(Label& zero_continue);
   void verify_method_data_pointer();
-  void test_invocation_counter_for_mdp(Register invocation_count, Register Rscratch, Label &profile_continue);
+  void test_invocation_counter_for_mdp(Register invocation_count, Register method_counters, Register Rscratch, Label &profile_continue);
 
   void set_mdp_data_at(int constant, Register value);

--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -30,6 +30,7 @@
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
+#include "nativeInst_ppc.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/icache.hpp"
@@ -114,7 +115,7 @@
   }
 
   if (hi16) {
-    addis(dst, R29, MacroAssembler::largeoffset_si16_si16_hi(offset));
+    addis(dst, R29_TOC, MacroAssembler::largeoffset_si16_si16_hi(offset));
   }
   if (lo16) {
     if (add_relocation) {
@@ -256,7 +257,9 @@
 }
 #endif // _LP64
 
-void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) {
+// Returns true if successful.
+bool MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a,
+                                                Register toc, bool fixed_size) {
   int toc_offset = 0;
   // Use RelocationHolder::none for the constant pool entry, otherwise
   // we will end up with a failing NativeCall::verify(x) where x is
@@ -264,11 +267,13 @@
   // FIXME: We should insert relocation information for oops at the constant
   // pool entries instead of inserting it at the loads; patching of a constant
   // pool entry should be less expensive.
-  address oop_address = address_constant((address)a.value(), RelocationHolder::none);
+  address const_address = address_constant((address)a.value(), RelocationHolder::none);
+  if (const_address == NULL) { return false; } // allocation failure
   // Relocate at the pc of the load.
   relocate(a.rspec());
-  toc_offset = (int)(oop_address - code()->consts()->start());
-  ld_largeoffset_unchecked(dst, toc_offset, toc, true);
+  toc_offset = (int)(const_address - code()->consts()->start());
+  ld_largeoffset_unchecked(dst, toc_offset, toc, fixed_size);
+  return true;
 }
 
 bool MacroAssembler::is_load_const_from_method_toc_at(address a) {
@@ -446,6 +451,15 @@
   assert(dest.is_bound() || target_pc == b_pc, "postcondition");
 }
 
+// 1 or 2 instructions
+void MacroAssembler::bc_far_optimized(int boint, int biint, Label& dest) {
+  if (dest.is_bound() && is_within_range_of_bcxx(target(dest), pc())) {
+    bc(boint, biint, dest);
+  } else {
+    bc_far(boint, biint, dest, MacroAssembler::bc_far_optimize_on_relocate);
+  }
+}
+
 bool MacroAssembler::is_bc_far_at(address instruction_addr) {
   return is_bc_far_variant1_at(instruction_addr) ||
          is_bc_far_variant2_at(instruction_addr) ||
@@ -496,7 +510,7 @@
       // variant 1, the 1st instruction contains the destination address:
       //
       //    bcxx  DEST
-      //    endgroup
+      //    nop
       //
       const int instruction_1 = *(int*)(instruction_addr);
       boint = inv_bo_field(instruction_1);
@@ -523,10 +537,10 @@
       // variant 1:
       //
       //    bcxx  DEST
-      //    endgroup
+      //    nop
       //
       masm.bc(boint, biint, dest);
-      masm.endgroup();
+      masm.nop();
     } else {
       // variant 2:
       //
@@ -810,7 +824,22 @@
   std(R9,  offset, dst);   offset += 8;
   std(R10, offset, dst);   offset += 8;
   std(R11, offset, dst);   offset += 8;
-  std(R12, offset, dst);
+  std(R12, offset, dst);   offset += 8;
+
+  stfd(F0, offset, dst);   offset += 8;
+  stfd(F1, offset, dst);   offset += 8;
+  stfd(F2, offset, dst);   offset += 8;
+  stfd(F3, offset, dst);   offset += 8;
+  stfd(F4, offset, dst);   offset += 8;
+  stfd(F5, offset, dst);   offset += 8;
+  stfd(F6, offset, dst);   offset += 8;
+  stfd(F7, offset, dst);   offset += 8;
+  stfd(F8, offset, dst);   offset += 8;
+  stfd(F9, offset, dst);   offset += 8;
+  stfd(F10, offset, dst);  offset += 8;
+  stfd(F11, offset, dst);  offset += 8;
+  stfd(F12, offset, dst);  offset += 8;
+  stfd(F13, offset, dst);
 }
 
 // For verify_oops.
@@ -825,7 +854,22 @@
   ld(R9,  offset, src);   offset += 8;
   ld(R10, offset, src);   offset += 8;
   ld(R11, offset, src);   offset += 8;
-  ld(R12, offset, src);
+  ld(R12, offset, src);   offset += 8;
+
+  lfd(F0, offset, src);   offset += 8;
+  lfd(F1, offset, src);   offset += 8;
+  lfd(F2, offset, src);   offset += 8;
+  lfd(F3, offset, src);   offset += 8;
+  lfd(F4, offset, src);   offset += 8;
+  lfd(F5, offset, src);   offset += 8;
+  lfd(F6, offset, src);   offset += 8;
+  lfd(F7, offset, src);   offset += 8;
+  lfd(F8, offset, src);   offset += 8;
+  lfd(F9, offset, src);   offset += 8;
+  lfd(F10, offset, src);  offset += 8;
+  lfd(F11, offset, src);  offset += 8;
+  lfd(F12, offset, src);  offset += 8;
+  lfd(F13, offset, src);
 }
 
 void MacroAssembler::save_LR_CR(Register tmp) {
@@ -908,7 +952,7 @@
   if (is_simm(-offset, 16)) {
     stdu(R1_SP, -offset, R1_SP);
   } else {
-    load_const(tmp, -offset);
+    load_const_optimized(tmp, -offset);
     stdux(R1_SP, R1_SP, tmp);
   }
 }
@@ -1090,20 +1134,21 @@
     assert(fd->entry() != NULL, "function must be linked");
 
     AddressLiteral fd_entry(fd->entry());
-    load_const_from_method_toc(R11, fd_entry, toc);
+    bool success = load_const_from_method_toc(R11, fd_entry, toc, /*fixed_size*/ true);
     mtctr(R11);
     if (fd->env() == NULL) {
       li(R11, 0);
       nop();
     } else {
       AddressLiteral fd_env(fd->env());
-      load_const_from_method_toc(R11, fd_env, toc);
+      success = success && load_const_from_method_toc(R11, fd_env, toc, /*fixed_size*/ true);
     }
     AddressLiteral fd_toc(fd->toc());
-    load_toc_from_toc(R2_TOC, fd_toc, toc);
-    // R2_TOC is killed.
+    // Set R2_TOC (load from toc)
+    success = success && load_const_from_method_toc(R2_TOC, fd_toc, toc, /*fixed_size*/ true);
     bctrl();
     _last_calls_return_pc = pc();
+    if (!success) { return NULL; }
   } else {
     // It's a friend function, load the entry point and don't care about
     // toc and env. Use an optimizable call instruction, but ensure the
@@ -1367,11 +1412,6 @@
   bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
                             int_flag_success != exchange_value && int_flag_success != addr_base);
 
-  // release/fence semantics
-  if (semantics & MemBarRel) {
-    release();
-  }
-
   if (use_result_reg && preset_result_reg) {
     li(int_flag_success, 0); // preset (assume cas failed)
   }
@@ -1383,6 +1423,11 @@
     bne(flag, failed);
   }
 
+  // release/fence semantics
+  if (semantics & MemBarRel) {
+    release();
+  }
+
   // atomic emulation loop
   bind(retry);
 
@@ -1462,11 +1507,6 @@
                             int_flag_success!=exchange_value && int_flag_success!=addr_base);
   assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
 
-  // release/fence semantics
-  if (semantics & MemBarRel) {
-    release();
-  }
-
   if (use_result_reg && preset_result_reg) {
     li(int_flag_success, 0); // preset (assume cas failed)
   }
@@ -1478,6 +1518,11 @@
     bne(flag, failed);
   }
 
+  // release/fence semantics
+  if (semantics & MemBarRel) {
+    release();
+  }
+
   // atomic emulation loop
   bind(retry);
 
@@ -1501,8 +1546,6 @@
     li(int_flag_success, 1);
   }
 
-  // POWER6 doesn't need isync in CAS.
-  // Always emit isync to be on the safe side.
   if (semantics & MemBarFenceAfter) {
     fence();
   } else if (semantics & MemBarAcq) {
@@ -1627,13 +1670,14 @@
 }
 
 /////////////////////////////////////////// subtype checking ////////////////////////////////////////////
-
 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
                                                    Register super_klass,
                                                    Register temp1_reg,
                                                    Register temp2_reg,
-                                                   Label& L_success,
-                                                   Label& L_failure) {
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                                   RegisterOrConstant super_check_offset) {
 
   const Register check_cache_offset = temp1_reg;
   const Register cached_super       = temp2_reg;
@@ -1643,6 +1687,18 @@
   int sco_offset = in_bytes(Klass::super_check_offset_offset());
   int sc_offset  = in_bytes(Klass::secondary_super_cache_offset());
 
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  bool need_slow_path = (must_load_sco || super_check_offset.constant_or_zero() == sco_offset);
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1 ||
+         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
+         "at most one NULL in the batch, usually");
+
   // If the pointers are equal, we are done (e.g., String[] elements).
   // This self-check enables sharing of secondary supertype arrays among
   // non-primary types such as array-of-interface. Otherwise, each such
@@ -1651,15 +1707,20 @@
   // type checks are in fact trivially successful in this manner,
   // so we get a nicely predicted branch right at the start of the check.
   cmpd(CCR0, sub_klass, super_klass);
-  beq(CCR0, L_success);
+  beq(CCR0, *L_success);
 
   // Check the supertype display:
+  if (must_load_sco) {
+    // The super check offset is always positive...
   lwz(check_cache_offset, sco_offset, super_klass);
+    super_check_offset = RegisterOrConstant(check_cache_offset);
+    // super_check_offset is register.
+    assert_different_registers(sub_klass, super_klass, cached_super, super_check_offset.as_register());
+  }
   // The loaded value is the offset from KlassOopDesc.
 
-  ldx(cached_super, check_cache_offset, sub_klass);
+  ld(cached_super, super_check_offset, sub_klass);
   cmpd(CCR0, cached_super, super_klass);
-  beq(CCR0, L_success);
 
   // This check has worked decisively for primary supers.
   // Secondary supers are sought in the super_cache ('super_cache_addr').
@@ -1672,9 +1733,39 @@
   // So if it was a primary super, we can just fail immediately.
   // Otherwise, it's the slow path for us (no success at this point).
 
-  cmpwi(CCR0, check_cache_offset, sc_offset);
-  bne(CCR0, L_failure);
-  // bind(slow_path); // fallthru
+#define FINAL_JUMP(label) if (&(label) != &L_fallthrough) { b(label); }
+
+  if (super_check_offset.is_register()) {
+    beq(CCR0, *L_success);
+    cmpwi(CCR0, super_check_offset.as_register(), sc_offset);
+    if (L_failure == &L_fallthrough) {
+      beq(CCR0, *L_slow_path);
+    } else {
+      bne(CCR0, *L_failure);
+      FINAL_JUMP(*L_slow_path);
+    }
+  } else {
+    if (super_check_offset.as_constant() == sc_offset) {
+      // Need a slow path; fast failure is impossible.
+      if (L_slow_path == &L_fallthrough) {
+        beq(CCR0, *L_success);
+      } else {
+        bne(CCR0, *L_slow_path);
+        FINAL_JUMP(*L_success);
+      }
+    } else {
+      // No slow path; it's a fast decision.
+      if (L_failure == &L_fallthrough) {
+        beq(CCR0, *L_success);
+      } else {
+        bne(CCR0, *L_failure);
+        FINAL_JUMP(*L_success);
+      }
+    }
+  }
+
+  bind(L_fallthrough);
+#undef FINAL_JUMP
 }
 
 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
@@ -1698,7 +1789,7 @@
 
   ld(array_ptr, source_offset, sub_klass);
 
-  //assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated.");
+  // TODO: PPC port: assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated.");
   lwz(temp, length_offset, array_ptr);
   cmpwi(CCR0, temp, 0);
   beq(CCR0, result_reg!=noreg ? failure : fallthru); // length 0
@@ -1719,8 +1810,9 @@
 
   bind(hit);
   std(super_klass, target_offset, sub_klass); // save result to cache
-  if (result_reg != noreg) li(result_reg, 0); // load zero result (indicates a hit)
-  if (L_success != NULL) b(*L_success);
+  if (result_reg != noreg) { li(result_reg, 0); } // load zero result (indicates a hit)
+  if (L_success != NULL) { b(*L_success); }
+  else if (result_reg == noreg) { blr(); } // return with CR0.eq if neither label nor result reg provided
 
   bind(fallthru);
 }
@@ -1732,7 +1824,7 @@
                          Register temp2_reg,
                          Label& L_success) {
   Label L_failure;
-  check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, L_failure);
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success, &L_failure);
   check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success);
   bind(L_failure); // Fallthru if not successful.
 }
@@ -1765,6 +1857,7 @@
   }
 }
 
+// Supports temp2_reg = R0.
 void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj_reg,
                                           Register mark_reg, Register temp_reg,
                                           Register temp2_reg, Label& done, Label* slow_case) {
@@ -1788,10 +1881,10 @@
          "biased locking makes assumptions about bit layout");
 
   if (PrintBiasedLockingStatistics) {
-    load_const(temp_reg, (address) BiasedLocking::total_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::total_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
   }
 
   andi(temp_reg, mark_reg, markOopDesc::biased_lock_mask_in_place);
@@ -1809,10 +1902,10 @@
   if (PrintBiasedLockingStatistics) {
     Label l;
     bne(cr_reg, l);
-    load_const(mark_reg, (address) BiasedLocking::biased_lock_entry_count_addr());
-    lwz(temp2_reg, 0, mark_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, mark_reg);
+    load_const(temp2_reg, (address) BiasedLocking::biased_lock_entry_count_addr());
+    lwzx(mark_reg, temp2_reg);
+    addi(mark_reg, mark_reg, 1);
+    stwx(mark_reg, temp2_reg);
     // restore mark_reg
     ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
     bind(l);
@@ -1878,10 +1971,10 @@
   // need to revoke that bias. The revocation will occur in the
   // interpreter runtime in the slow case.
   if (PrintBiasedLockingStatistics) {
-    load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
   }
   b(done);
 
@@ -1892,15 +1985,14 @@
   // value as the comparison value when doing the cas to acquire the
   // bias in the current epoch. In other words, we allow transfer of
   // the bias from one thread to another directly in this situation.
-  andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
-  orr(temp_reg, R16_thread, temp_reg);
-  load_klass(temp2_reg, obj_reg);
-  ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
-  orr(temp_reg, temp_reg, temp2_reg);
+  load_klass(temp_reg, obj_reg);
+  andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
+  orr(temp2_reg, R16_thread, temp2_reg);
+  ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
+  orr(temp_reg, temp2_reg, temp_reg);
 
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
-  // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                  /*where=*/obj_reg,
@@ -1913,10 +2005,10 @@
   // need to revoke that bias. The revocation will occur in the
   // interpreter runtime in the slow case.
   if (PrintBiasedLockingStatistics) {
-    load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
   }
   b(done);
 
@@ -1952,10 +2044,10 @@
   if (PrintBiasedLockingStatistics) {
     Label l;
     bne(cr_reg, l);
-    load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
     bind(l);
   }
 
@@ -1977,6 +2069,109 @@
   beq(cr_reg, done);
 }
 
+// allocation (for C1)
+void MacroAssembler::eden_allocate(
+  Register obj,                      // result: pointer to object after successful allocation
+  Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,        // object size in bytes if   known at compile time
+  Register t1,                       // temp register
+  Register t2,                       // temp register
+  Label&   slow_case                 // continuation point if fast allocation fails
+) {
+  b(slow_case);
+}
+
+void MacroAssembler::tlab_allocate(
+  Register obj,                      // result: pointer to object after successful allocation
+  Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,        // object size in bytes if   known at compile time
+  Register t1,                       // temp register
+  Label&   slow_case                 // continuation point if fast allocation fails
+) {
+  // make sure arguments make sense
+  assert_different_registers(obj, var_size_in_bytes, t1);
+  assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size");
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
+
+  const Register new_top = t1;
+  //verify_tlab(); not implemented
+
+  ld(obj, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
+  ld(R0, in_bytes(JavaThread::tlab_end_offset()), R16_thread);
+  if (var_size_in_bytes == noreg) {
+    addi(new_top, obj, con_size_in_bytes);
+  } else {
+    add(new_top, obj, var_size_in_bytes);
+  }
+  cmpld(CCR0, new_top, R0);
+  bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_case);
+
+#ifdef ASSERT
+  // make sure new free pointer is properly aligned
+  {
+    Label L;
+    andi_(R0, new_top, MinObjAlignmentInBytesMask);
+    beq(CCR0, L);
+    stop("updated TLAB free is not properly aligned", 0x934);
+    bind(L);
+  }
+#endif // ASSERT
+
+  // update the tlab top pointer
+  std(new_top, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
+  //verify_tlab(); not implemented
+}
+void MacroAssembler::tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case) {
+  unimplemented("tlab_refill");
+}
+void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2) {
+  unimplemented("incr_allocated_bytes");
+}
+
+address MacroAssembler::emit_trampoline_stub(int destination_toc_offset,
+                                             int insts_call_instruction_offset, Register Rtoc) {
+  // Start the stub.
+  address stub = start_a_stub(64);
+  if (stub == NULL) { return NULL; } // CodeCache full: bail out
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + insts_call_instruction_offset));
+  const int stub_start_offset = offset();
+
+  // For java_to_interp stubs we use R11_scratch1 as scratch register
+  // and in call trampoline stubs we use R12_scratch2. This way we
+  // can distinguish them (see is_NativeCallTrampolineStub_at()).
+  Register reg_scratch = R12_scratch2;
+
+  // Now, create the trampoline stub's code:
+  // - load the TOC
+  // - load the call target from the constant pool
+  // - call
+  if (Rtoc == noreg) {
+    calculate_address_from_global_toc(reg_scratch, method_toc());
+    Rtoc = reg_scratch;
+  }
+
+  ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, Rtoc, false);
+  mtctr(reg_scratch);
+  bctr();
+
+  const address stub_start_addr = addr_at(stub_start_offset);
+
+  // Assert that the encoded destination_toc_offset can be identified and that it is correct.
+  assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
+         "encoded offset into the constant pool must match");
+  // Trampoline_stub_size should be good.
+  assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+  // End the stub.
+  end_a_stub();
+  return stub;
+}
+
 // TM on PPC64.
 void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
   Label retry;
@@ -2387,17 +2582,16 @@
 
   // Must fence, otherwise, preceding store(s) may float below cmpxchg.
   // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
-  // CmpxchgX sets cr_reg to cmpX(current, displaced).
-  membar(Assembler::StoreStore);
   cmpxchgd(/*flag=*/flag,
            /*current_value=*/current_header,
            /*compare_value=*/displaced_header,
            /*exchange_value=*/box,
            /*where=*/oop,
-           MacroAssembler::MemBarAcq,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
            MacroAssembler::cmpxchgx_hint_acquire_lock(),
            noreg,
-           &cas_failed);
+           &cas_failed,
+           /*check without membar and ldarx first*/true);
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // If the compare-and-exchange succeeded, then we found an unlocked
@@ -2410,8 +2604,7 @@
   // Check if the owner is self by comparing the value in the markOop of object
   // (current_header) with the stack pointer.
   sub(current_header, current_header, R1_SP);
-  load_const_optimized(temp, (address) (~(os::vm_page_size()-1) |
-                                        markOopDesc::lock_mask_in_place));
+  load_const_optimized(temp, ~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place);
 
   and_(R0/*==0?*/, current_header, temp);
   // If condition is true we are cont and hence we can store 0 as the
@@ -2437,8 +2630,6 @@
 
     // Try to CAS m->owner from NULL to current thread.
     addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
-    li(displaced_header, 0);
-    // CmpxchgX sets flag to cmpX(current, displaced).
     cmpxchgd(/*flag=*/flag,
              /*current_value=*/current_header,
              /*compare_value=*/(intptr_t)0,
@@ -2928,31 +3119,12 @@
   }
 }
 
-void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) {
-  if (!os::zero_page_read_protected()) {
-    if (TrapBasedNullChecks) {
-      trap_null_check(src);
-    }
-  }
-  load_klass(dst, src);
-}
-
-void MacroAssembler::reinit_heapbase(Register d, Register tmp) {
-  if (Universe::heap() != NULL) {
-    load_const_optimized(R30, Universe::narrow_ptrs_base(), tmp);
-  } else {
-    // Heap not yet allocated. Load indirectly.
-    int simm16_offset = load_const_optimized(R30, Universe::narrow_ptrs_base_addr(), tmp, true);
-    ld(R30, simm16_offset, R30);
-  }
-}
-
 // Clear Array
 // Kills both input registers. tmp == R0 is allowed.
 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
   // Procedure for large arrays (uses data cache block zero instruction).
     Label startloop, fast, fastloop, small_rest, restloop, done;
-    const int cl_size         = VM_Version::get_cache_line_size(),
+    const int cl_size         = VM_Version::L1_data_cache_line_size(),
               cl_dwords       = cl_size>>3,
               cl_dw_addr_bits = exact_log2(cl_dwords),
               dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.
@@ -4025,7 +4197,7 @@
   bind(L_check_1);
 
   addi(idx, idx, 0x2);
-  andi_(idx, idx, 0x1) ;
+  andi_(idx, idx, 0x1);
   addic_(idx, idx, -1);
   blt(CCR0, L_post_third_loop_done);
 
@@ -4255,17 +4427,42 @@
 
   address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
   const Register tmp = R11; // Will be preserved.
-  const int nbytes_save = 11*8; // Volatile gprs except R0.
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
   save_volatile_gprs(R1_SP, -nbytes_save); // except R0
 
-  if (oop == tmp) mr(R4_ARG2, oop);
+  mr_if_needed(R4_ARG2, oop);
   save_LR_CR(tmp); // save in old frame
   push_frame_reg_args(nbytes_save, tmp);
   // load FunctionDescriptor** / entry_address *
   load_const_optimized(tmp, fd, R0);
   // load FunctionDescriptor* / entry_address
   ld(tmp, 0, tmp);
-  if (oop != tmp) mr_if_needed(R4_ARG2, oop);
+  load_const_optimized(R3_ARG1, (address)msg, R0);
+  // Call destination for its side effect.
+  call_c(tmp);
+
+  pop_frame();
+  restore_LR_CR(tmp);
+  restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
+}
+
+void MacroAssembler::verify_oop_addr(RegisterOrConstant offs, Register base, const char* msg) {
+  if (!VerifyOops) {
+    return;
+  }
+
+  address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
+  const Register tmp = R11; // Will be preserved.
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
+  save_volatile_gprs(R1_SP, -nbytes_save); // except R0
+
+  ld(R4_ARG2, offs, base);
+  save_LR_CR(tmp); // save in old frame
+  push_frame_reg_args(nbytes_save, tmp);
+  // load FunctionDescriptor** / entry_address *
+  load_const_optimized(tmp, fd, R0);
+  // load FunctionDescriptor* / entry_address
+  ld(tmp, 0, tmp);
   load_const_optimized(R3_ARG1, (address)msg, R0);
   // Call destination for its side effect.
   call_c(tmp);

--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -119,11 +119,8 @@
 
   // Emits an oop const to the constant pool, loads the constant, and
   // sets a relocation info with address current_pc.
-  void load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc);
-  void load_toc_from_toc(Register dst, AddressLiteral& a, Register toc) {
-    assert(dst == R2_TOC, "base register must be TOC");
-    load_const_from_method_toc(dst, a, toc);
-  }
+  // Returns true if successful.
+  bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false);
 
   static bool is_load_const_from_method_toc_at(address a);
   static int get_offset_of_load_const_from_method_toc_at(address a);
@@ -174,6 +171,7 @@
   // optimize: flag for telling the conditional far branch to optimize
   //           itself when relocated.
   void bc_far(int boint, int biint, Label& dest, int optimize);
+  void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions
   // Relocation of conditional far branches.
   static bool    is_bc_far_at(address instruction_addr);
   static address get_dest_of_bc_far_at(address instruction_addr);
@@ -262,6 +260,7 @@
   // some ABI-related functions
   void save_nonvolatile_gprs(   Register dst_base, int offset);
   void restore_nonvolatile_gprs(Register src_base, int offset);
+  enum { num_volatile_regs = 11 + 14 }; // GPR + FPR
   void save_volatile_gprs(   Register dst_base, int offset);
   void restore_volatile_gprs(Register src_base, int offset);
   void save_LR_CR(   Register tmp);     // tmp contains LR on return.
@@ -461,8 +460,10 @@
                                      Register super_klass,
                                      Register temp1_reg,
                                      Register temp2_reg,
-                                     Label& L_success,
-                                     Label& L_failure);
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path = NULL, // default fall through
+                                     RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
 
   // The rest of the type check; must be wired to a corresponding fast path.
   // It does not repeat the fast path logic, so don't use it standalone.
@@ -507,6 +508,28 @@
   // biased locking exit case failed.
   void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
 
+  // allocation (for C1)
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+  void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2);
+
+  enum { trampoline_stub_size = 6 * 4 };
+  address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
+
   void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
   void atomic_ori_int(Register addr, Register result, int uimm16);
 
@@ -597,9 +620,7 @@
 
   // Implicit or explicit null check, jumps to static address exception_entry.
   inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
-
-  // Check accessed object for null. Use SIGTRAP-based null checks on AIX.
-  inline void load_with_trap_null_check(Register d, int si16, Register s1);
+  inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided
 
   // Load heap oop and decompress. Loaded oop may not be null.
   // Specify tmp to save one cycle.
@@ -619,20 +640,17 @@
   inline Register decode_heap_oop_not_null(Register d, Register src = noreg);
 
   // Null allowed.
+  inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier!
   inline void decode_heap_oop(Register d);
 
   // Load/Store klass oop from klass field. Compress.
   void load_klass(Register dst, Register src);
-  void load_klass_with_trap_null_check(Register dst, Register src);
   void store_klass(Register dst_oop, Register klass, Register tmp = R0);
   void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
   static int instr_size_for_decode_klass_not_null();
   void decode_klass_not_null(Register dst, Register src = noreg);
   Register encode_klass_not_null(Register dst, Register src = noreg);
 
-  // Load common heap base into register.
-  void reinit_heapbase(Register d, Register tmp = noreg);
-
   // SIGTRAP-based range checks for arrays.
   inline void trap_range_check_l(Register a, Register b);
   inline void trap_range_check_l(Register a, int si16);
@@ -750,6 +768,7 @@
 
   // Emit code to verify that reg contains a valid oop if +VerifyOops is set.
   void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop");
 
   // TODO: verify method and klass metadata (compare against vptr?)
   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}

--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -70,9 +70,11 @@
 }
 
 inline void MacroAssembler::membar(int bits) {
-  // TODO: use elemental_membar(bits) for Power 8 and disable optimization of acquire-release
-  // (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))
-  if (bits & StoreLoad) sync(); else lwsync();
+  // Comment: Usage of elemental_membar(bits) is not recommended for Power 8.
+  // If elemental_membar(bits) is used, disable optimization of acquire-release
+  // (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))!
+  if (bits & StoreLoad) { sync(); }
+  else if (bits) { lwsync(); }
 }
 inline void MacroAssembler::release() { membar(LoadStore | StoreStore); }
 inline void MacroAssembler::acquire() { membar(LoadLoad | LoadStore); }
@@ -86,7 +88,7 @@
 // Offset of given address to the global TOC.
 inline int MacroAssembler::offset_to_global_toc(const address addr) {
   intptr_t offset = (intptr_t)addr - (intptr_t)MacroAssembler::global_toc();
-  assert(Assembler::is_simm((long)offset, 31) && offset >= 0, "must be in range");
+  assert(Assembler::is_uimm((long)offset, 31), "must be in range");
   return (int)offset;
 }
 
@@ -98,7 +100,7 @@
 // Offset of given address to current method's TOC.
 inline int MacroAssembler::offset_to_method_toc(address addr) {
   intptr_t offset = (intptr_t)addr - (intptr_t)method_toc();
-  assert(is_simm((long)offset, 31) && offset >= 0, "must be in range");
+  assert(Assembler::is_uimm((long)offset, 31), "must be in range");
   return (int)offset;
 }
 
@@ -190,13 +192,13 @@
   // Variant 1, the 1st instruction contains the destination address:
   //
   //    bcxx  DEST
-  //    endgroup
+  //    nop
   //
   const int instruction_1 = *(int*)(instruction_addr);
   const int instruction_2 = *(int*)(instruction_addr + 4);
   return is_bcxx(instruction_1) &&
          (inv_bd_field(instruction_1, (intptr_t)instruction_addr) != (intptr_t)(instruction_addr + 2*4)) &&
-         is_endgroup(instruction_2);
+         is_nop(instruction_2);
 }
 
 // Relocation of conditional far branches.
@@ -302,13 +304,17 @@
   }
 }
 
-inline void MacroAssembler::load_with_trap_null_check(Register d, int si16, Register s1) {
-  if (!os::zero_page_read_protected()) {
+inline void MacroAssembler::null_check(Register a, int offset, Label *Lis_null) {
+  if (!ImplicitNullChecks || needs_explicit_null_check(offset) || !os::zero_page_read_protected()) {
     if (TrapBasedNullChecks) {
-      trap_null_check(s1);
+      assert(UseSIGTRAP, "sanity");
+      trap_null_check(a);
+    } else if (Lis_null){
+      Label ok;
+      cmpdi(CCR0, a, 0);
+      beq(CCR0, *Lis_null);
     }
   }
-  ld(d, si16, s1);
 }
 
 inline void MacroAssembler::load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1, Register tmp) {
@@ -365,6 +371,26 @@
   return current; // Encoded oop is in this register.
 }
 
+inline Register MacroAssembler::encode_heap_oop(Register d, Register src) {
+  if (Universe::narrow_oop_base() != NULL) {
+    if (VM_Version::has_isel()) {
+      cmpdi(CCR0, src, 0);
+      Register co = encode_heap_oop_not_null(d, src);
+      assert(co == d, "sanity");
+      isel_0(d, CCR0, Assembler::equal);
+    } else {
+      Label isNull;
+      or_(d, src, src); // move and compare 0
+      beq(CCR0, isNull);
+      encode_heap_oop_not_null(d, src);
+      bind(isNull);
+    }
+    return d;
+  } else {
+    return encode_heap_oop_not_null(d, src);
+  }
+}
+
 inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
   if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
       Universe::narrow_oop_shift() != 0) {

--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -504,8 +504,7 @@
       frame cur_frame = os::current_frame();
 
       // Robust search of trace_calling_frame (independant of inlining).
-      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
-      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
+      assert(cur_frame.sp() <= saved_regs, "registers not saved on stack ?");
       frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
       while (trace_calling_frame.fp() < saved_regs) {
         trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
@@ -539,7 +538,7 @@
   BLOCK_COMMENT("trace_method_handle {");
 
   const Register tmp = R11; // Will be preserved.
-  const int nbytes_save = 11*8; // volatile gprs except R0
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
   __ save_LR_CR(tmp); // save in old frame

--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,13 +65,17 @@
   address destination = Assembler::bxx_destination(addr);
 
   // Do we use a trampoline stub for this call?
-  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
-  assert(cb && cb->is_nmethod(), "sanity");
-  nmethod *nm = (nmethod *)cb;
-  if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
-    // Yes we do, so get the destination from the trampoline stub.
-    const address trampoline_stub_addr = destination;
-    destination = NativeCallTrampolineStub_at(trampoline_stub_addr)->destination(nm);
+  // Trampoline stubs are located behind the main code.
+  if (destination > addr) {
+    // Filter out recursive method invocation (call to verified/unverified entry point).
+    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
+    assert(cb && cb->is_nmethod(), "sanity");
+    nmethod *nm = (nmethod *)cb;
+    if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
+      // Yes we do, so get the destination from the trampoline stub.
+      const address trampoline_stub_addr = destination;
+      destination = NativeCallTrampolineStub_at(trampoline_stub_addr)->destination(nm);
+    }
   }
 
   return destination;
@@ -267,7 +271,7 @@
           oop_addr = r->oop_addr();
           *oop_addr = cast_to_oop(data);
         } else {
-          assert(oop_addr == r->oop_addr(), "must be only one set-oop here") ;
+          assert(oop_addr == r->oop_addr(), "must be only one set-oop here");
         }
       }
       if (iter.type() == relocInfo::metadata_type) {
@@ -351,6 +355,27 @@
 }
 #endif // ASSERT
 
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  CodeBuffer cb(code_pos, BytesPerInstWord + 1);
+  MacroAssembler* a = new MacroAssembler(&cb);
+  a->b(entry);
+  ICache::ppc64_flush_icache_bytes(code_pos, NativeGeneralJump::instruction_size);
+}
+
+// MT-safe patching of a jmp instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+  // Bytes beyond offset NativeGeneralJump::instruction_size are copied by caller.
+
+  // Finally patch out the jump.
+  volatile juint *jump_addr = (volatile juint*)instr_addr;
+  // Release not needed because caller uses invalidate_range after copying the remaining bytes.
+  //OrderAccess::release_store(jump_addr, *((juint*)code_buffer));
+  *jump_addr = *((juint*)code_buffer); // atomically store code over branch instruction
+  ICache::ppc64_flush_icache_bytes(instr_addr, NativeGeneralJump::instruction_size);
+}
+
+
 //-------------------------------------------------------------------
 
 // Call trampoline stubs.
@@ -364,10 +389,12 @@
 //
 
 address NativeCallTrampolineStub::encoded_destination_addr() const {
-  address instruction_addr = addr_at(2 * BytesPerInstWord);
-  assert(MacroAssembler::is_ld_largeoffset(instruction_addr),
-         "must be a ld with large offset (from the constant pool)");
-
+  address instruction_addr = addr_at(0 * BytesPerInstWord);
+  if (!MacroAssembler::is_ld_largeoffset(instruction_addr)) {
+    instruction_addr = addr_at(2 * BytesPerInstWord);
+    assert(MacroAssembler::is_ld_largeoffset(instruction_addr),
+           "must be a ld with large offset (from the constant pool)");
+  }
   return instruction_addr;
 }

--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,6 +50,8 @@
   friend class Relocation;
 
  public:
+  bool is_jump() { return Assembler::is_b(long_at(0)); } // See NativeGeneralJump.
+
   bool is_sigtrap_ic_miss_check() {
     assert(UseSIGTRAP, "precondition");
     return MacroAssembler::is_trap_ic_miss_check(long_at(0));
@@ -235,8 +237,8 @@
   return call;
 }
 
-// An interface for accessing/manipulating native set_oop imm, reg instructions.
-// (used to manipulate inlined data references, etc.)
+// An interface for accessing/manipulating native set_oop imm, reg instructions
+// (used to manipulate inlined data references, etc.).
 class NativeMovConstReg: public NativeInstruction {
  public:
 
@@ -384,10 +386,21 @@
   void set_destination(address new_destination);
 };
 
+// Note: Other stubs must not begin with this pattern.
 inline bool is_NativeCallTrampolineStub_at(address address) {
   int first_instr = *(int*)address;
-  return Assembler::is_addis(first_instr) &&
-    (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2;
+  // calculate_address_from_global_toc and long form of ld_largeoffset_unchecked begin with addis with target R12
+  if (Assembler::is_addis(first_instr) &&
+      (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2) return true;
+
+  // short form of ld_largeoffset_unchecked is ld which is followed by mtctr
+  int second_instr = *((int*)address + 1);
+  if (Assembler::is_ld(first_instr) &&
+      (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2 &&
+      Assembler::is_mtctr(second_instr) &&
+      (Register)(intptr_t)Assembler::inv_rs_field(second_instr) == R12_scratch2) return true;
+
+  return false;
 }
 
 inline NativeCallTrampolineStub* NativeCallTrampolineStub_at(address address) {
@@ -395,4 +408,102 @@
   return (NativeCallTrampolineStub*)address;
 }
 
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+//-------------------------------------
+//  N a t i v e G e n e r a l J u m p
+//-------------------------------------
+
+// Despite the name, handles only simple branches.
+class NativeGeneralJump;
+inline NativeGeneralJump* nativeGeneralJump_at(address address);
+
+// Currently only implemented as single unconditional branch.
+class NativeGeneralJump: public NativeInstruction {
+ public:
+
+  enum PPC64_specific_constants {
+    instruction_size = 4
+  };
+
+  address instruction_address() const { return addr_at(0); }
+
+  // Creation.
+  friend inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
+    NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
+    DEBUG_ONLY( jump->verify(); )
+    return jump;
+  }
+
+  // Insertion of native general jump instruction.
+  static void insert_unconditional(address code_pos, address entry);
+
+  address jump_destination() const {
+    DEBUG_ONLY( verify(); )
+    return addr_at(0) + Assembler::inv_li_field(long_at(0));
+  }
+
+  void set_jump_destination(address dest) {
+    DEBUG_ONLY( verify(); )
+    insert_unconditional(addr_at(0), dest);
+  }
+
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+
+  void verify() const { guarantee(Assembler::is_b(long_at(0)), "invalid NativeGeneralJump"); }
+};
+
+// An interface for accessing/manipulating native load int (load_const32).
+class NativeMovRegMem;
+inline NativeMovRegMem* nativeMovRegMem_at(address address);
+class NativeMovRegMem: public NativeInstruction {
+ public:
+
+  enum PPC64_specific_constants {
+    instruction_size = 8
+  };
+
+  address instruction_address() const { return addr_at(0); }
+
+  intptr_t offset() const {
+#ifdef VM_LITTLE_ENDIAN
+    short *hi_ptr = (short*)(addr_at(0));
+    short *lo_ptr = (short*)(addr_at(4));
+#else
+    short *hi_ptr = (short*)(addr_at(0) + 2);
+    short *lo_ptr = (short*)(addr_at(4) + 2);
+#endif
+    return ((*hi_ptr) << 16) | ((*lo_ptr) & 0xFFFF);
+  }
+
+  void set_offset(intptr_t x) {
+#ifdef VM_LITTLE_ENDIAN
+    short *hi_ptr = (short*)(addr_at(0));
+    short *lo_ptr = (short*)(addr_at(4));
+#else
+    short *hi_ptr = (short*)(addr_at(0) + 2);
+    short *lo_ptr = (short*)(addr_at(4) + 2);
+#endif
+    *hi_ptr = x >> 16;
+    *lo_ptr = x & 0xFFFF;
+    ICache::ppc64_flush_icache_bytes(addr_at(0), NativeMovRegMem::instruction_size);
+  }
+
+  void add_offset_in_bytes(intptr_t radd_offset) {
+    set_offset(offset() + radd_offset);
+  }
+
+  void verify() const {
+    guarantee(Assembler::is_lis(long_at(0)), "load_const32 1st instr");
+    guarantee(Assembler::is_ori(long_at(4)), "load_const32 2nd instr");
+  }
+
+ private:
+  friend inline NativeMovRegMem* nativeMovRegMem_at(address address) {
+    NativeMovRegMem* test = (NativeMovRegMem*)address;
+    DEBUG_ONLY( test->verify(); )
+    return test;
+  }
+};
+
 #endif // CPU_PPC_VM_NATIVEINST_PPC_HPP

--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Fri Dec 04 16:38:04 2015 +0100
@@ -698,7 +698,7 @@
 // ----------------------------
 
 reg_class flt_reg(
-/*F0*/              // scratch
+  F0,
   F1,
   F2,
   F3,
@@ -735,7 +735,7 @@
 // Double precision float registers have virtual `high halves' that
 // are needed by the allocator.
 reg_class dbl_reg(
-/*F0,  F0_H*/     // scratch
+  F0,  F0_H,
   F1,  F1_H,
   F2,  F2_H,
   F3,  F3_H,
@@ -1040,8 +1040,6 @@
 //---<  Used for optimization in Compile::Shorten_branches  >---
 //--------------------------------------------------------------
 
-const uint trampoline_stub_size     =  6 * BytesPerInstWord;
-
 class CallStubImpl {
 
  public:
@@ -1053,7 +1051,7 @@
   // This doesn't need to be accurate to the byte, but it
   // must be larger than or equal to the real size of the stub.
   static uint size_call_trampoline() {
-    return trampoline_stub_size;
+    return MacroAssembler::trampoline_stub_size;
   }
 
   // number of relocations needed by a call trampoline stub
@@ -1079,46 +1077,10 @@
 //   branch via CTR (LR/link still points to the call-site above)
 
 void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
-  // Start the stub.
-  address stub = __ start_a_stub(Compile::MAX_stubs_size/2);
+  address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
   if (stub == NULL) {
-    ciEnv::current()->record_failure("CodeCache is full");
-    return;
+    ciEnv::current()->record_out_of_memory_failure();
   }
-
-  // For java_to_interp stubs we use R11_scratch1 as scratch register
-  // and in call trampoline stubs we use R12_scratch2. This way we
-  // can distinguish them (see is_NativeCallTrampolineStub_at()).
-  Register reg_scratch = R12_scratch2;
-
-  // Create a trampoline stub relocation which relates this trampoline stub
-  // with the call instruction at insts_call_instruction_offset in the
-  // instructions code-section.
-  __ relocate(trampoline_stub_Relocation::spec(__ code()->insts()->start() + insts_call_instruction_offset));
-  const int stub_start_offset = __ offset();
-
-  // Now, create the trampoline stub's code:
-  // - load the TOC
-  // - load the call target from the constant pool
-  // - call
-  __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
-  __ ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, reg_scratch, false);
-  __ mtctr(reg_scratch);
-  __ bctr();
-
-  const address stub_start_addr = __ addr_at(stub_start_offset);
-
-  // FIXME: Assert that the trampoline stub can be identified and patched.
-
-  // Assert that the encoded destination_toc_offset can be identified and that it is correct.
-  assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
-         "encoded offset into the constant pool must match");
-  // Trampoline_stub_size should be good.
-  assert((uint)(__ offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
-  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
-
-  // End the stub.
-  __ end_a_stub();
 }
 
 //=============================================================================
@@ -1156,6 +1118,10 @@
   if (!Compile::current()->in_scratch_emit_size()) {
     // Put the entry point as a constant into the constant pool.
     const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
+    if (entry_point_toc_addr == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return offsets;
+    }
     const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 
     // Emit the trampoline stub which will be related to the branch-and-link below.
@@ -2474,6 +2440,10 @@
       // Create a non-oop constant, no relocation needed.
       // If it is an IC, it has a virtual_call_Relocation.
       const_toc_addr = __ long_constant((jlong)$src$$constant);
+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
 
       // Get the constant's TOC offset.
       toc_offset = __ offset_to_method_toc(const_toc_addr);
@@ -2495,6 +2465,10 @@
       // Create a non-oop constant, no relocation needed.
       // If it is an IC, it has a virtual_call_Relocation.
       const_toc_addr = __ long_constant((jlong)$src$$constant);
+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
 
       // Get the constant's TOC offset.
       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
@@ -2631,6 +2605,10 @@
         const_toc_addr = __ long_constant((jlong)$src$$constant);
       }
 
+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
       // Get the constant's TOC offset.
       toc_offset = __ offset_to_method_toc(const_toc_addr);
     }
@@ -2660,6 +2638,10 @@
         const_toc_addr = __ long_constant((jlong)$src$$constant);
       }
 
+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
       // Get the constant's TOC offset.
       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
       // Store the toc offset of the constant.
@@ -3408,6 +3390,10 @@
 
         // Put the entry point as a constant into the constant pool.
         const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
+        if (entry_point_toc_addr == NULL) {
+          ciEnv::current()->record_out_of_memory_failure();
+          return;
+        }
         const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 
         // Emit the trampoline stub which will be related to the branch-and-link below.
@@ -3433,76 +3419,6 @@
     }
   %}
 
-  // Emit a method handle call.
-  //
-  // Method handle calls from compiled to compiled are going thru a
-  // c2i -> i2c adapter, extending the frame for their arguments. The
-  // caller however, returns directly to the compiled callee, that has
-  // to cope with the extended frame. We restore the original frame by
-  // loading the callers sp and adding the calculated framesize.
-  enc_class enc_java_handle_call(method meth) %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-
-    MacroAssembler _masm(&cbuf);
-    address entry_point = (address)$meth$$method;
-
-    // Remember the offset not the address.
-    const int start_offset = __ offset();
-    // The trampoline stub.
-    if (!ra_->C->in_scratch_emit_size()) {
-      // No entry point given, use the current pc.
-      // Make sure branch fits into
-      if (entry_point == 0) entry_point = __ pc();
-
-      // Put the entry point as a constant into the constant pool.
-      const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
-      const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
-
-      // Emit the trampoline stub which will be related to the branch-and-link below.
-      CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
-      if (ra_->C->env()->failing()) { return; } // Code cache may be full.
-      assert(_optimized_virtual, "methodHandle call should be a virtual call");
-      __ relocate(relocInfo::opt_virtual_call_type);
-    }
-
-    // The real call.
-    // Note: At this point we do not have the address of the trampoline
-    // stub, and the entry point might be too far away for bl, so __ pc()
-    // serves as dummy and the bl will be patched later.
-    cbuf.set_insts_mark();
-    __ bl(__ pc());  // Emits a relocation.
-
-    assert(_method, "execute next statement conditionally");
-    // The stub for call to interpreter.
-    address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
-    if (stub == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full");
-      return;
-    }
-
-    // Restore original sp.
-    __ ld(R11_scratch1, 0, R1_SP); // Load caller sp.
-    const long framesize = ra_->C->frame_slots() << LogBytesPerInt;
-    unsigned int bytes = (unsigned int)framesize;
-    long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
-    if (Assembler::is_simm(-offset, 16)) {
-      __ addi(R1_SP, R11_scratch1, -offset);
-    } else {
-      __ load_const_optimized(R12_scratch2, -offset);
-      __ add(R1_SP, R11_scratch1, R12_scratch2);
-    }
-#ifdef ASSERT
-  __ ld(R12_scratch2, 0, R1_SP); // Load from unextended_sp.
-  __ cmpd(CCR0, R11_scratch1, R12_scratch2);
-  __ asm_assert_eq("backlink changed", 0x8000);
-#endif
-    // If fails should store backlink before unextending.
-
-    if (ra_->C->env()->failing()) {
-      return;
-    }
-  %}
-
   // Second node of expanded dynamic call - the call.
   enc_class enc_java_dynamic_call_sched(method meth) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_bl);
@@ -3513,6 +3429,10 @@
       // Create a call trampoline stub for the given method.
       const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
       const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
+      if (entry_point_const == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
       const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
       CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
       if (ra_->C->env()->failing()) { return; } // Code cache may be full.
@@ -3620,7 +3540,11 @@
       address virtual_call_meta_addr = __ pc();
       // Load a clear inline cache.
       AddressLiteral empty_ic((address) Universe::non_oop_word());
-      __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc);
+      bool success = __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc, /*fixed_size*/ true);
+      if (!success) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
       // CALL to fixup routine.  Fixup routine uses ScopeDesc info
       // to determine who we intended to call.
       __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
@@ -3676,7 +3600,11 @@
     __ calculate_address_from_global_toc(Rtoc, __ method_toc());
     // Put entry, env, toc into the constant pool, this needs up to 3 constant
     // pool entries; call_c_using_toc will optimize the call.
-    __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
+    bool success = __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
+    if (!success) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
 #endif
 
     // Check the ret_addr_offset.
@@ -6263,6 +6191,10 @@
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
     address float_address = __ float_constant($src$$constant);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
     __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
   %}
   ins_pipe(pipe_class_memory);
@@ -6284,6 +6216,10 @@
     FloatRegister Rdst    = $dst$$FloatRegister;
     Register Rtoc         = $toc$$Register;
     address float_address = __ float_constant($src$$constant);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
     int offset            = __ offset_to_method_toc(float_address);
     int hi = (offset + (1<<15))>>16;
     int lo = offset - hi * (1<<16);
@@ -6318,7 +6254,12 @@
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_lfd);
-    int offset =  __ offset_to_method_toc(__ double_constant($src$$constant));
+    address float_address = __ double_constant($src$$constant);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
+    int offset =  __ offset_to_method_toc(float_address);
     __ lfd($dst$$FloatRegister, offset, $toc$$Register);
   %}
   ins_pipe(pipe_class_memory);
@@ -6340,7 +6281,11 @@
     FloatRegister Rdst    = $dst$$FloatRegister;
     Register      Rtoc    = $toc$$Register;
     address float_address = __ double_constant($src$$constant);
-    int offset            = __ offset_to_method_toc(float_address);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
+    int offset = __ offset_to_method_toc(float_address);
     int hi = (offset + (1<<15))>>16;
     int lo = offset - hi * (1<<16);
 
@@ -11790,7 +11735,6 @@
 instruct CallStaticJavaDirect(method meth) %{
   match(CallStaticJava);
   effect(USE meth);
-  predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
   ins_cost(CALL_COST);
 
   ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
@@ -11801,20 +11745,6 @@
   ins_pipe(pipe_class_call);
 %}
 
-// Schedulable version of call static node.
-instruct CallStaticJavaDirectHandle(method meth) %{
-  match(CallStaticJava);
-  effect(USE meth);
-  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
-  ins_cost(CALL_COST);
-
-  ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
-
-  format %{ "CALL,static $meth \t// ==> " %}
-  ins_encode( enc_java_handle_call(meth) );
-  ins_pipe(pipe_class_call);
-%}
-
 // Call Java Dynamic Instruction
 
 // Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).

--- a/hotspot/src/cpu/ppc/vm/register_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/register_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -627,6 +627,9 @@
 REGISTER_DECLARATION(Register, R28_mdx,               R28);
 #endif // CC_INTERP
 
+REGISTER_DECLARATION(Register, R19_inline_cache_reg, R19);
+REGISTER_DECLARATION(Register, R29_TOC, R29);
+
 #ifndef DONT_USE_REGISTER_DEFINES
 #define R21_tmp1         AS_REGISTER(Register, R21)
 #define R22_tmp2         AS_REGISTER(Register, R22)
@@ -648,6 +651,9 @@
 #define R28_mdx               AS_REGISTER(Register, R28)
 #endif
 
+#define R19_inline_cache_reg AS_REGISTER(Register, R19)
+#define R29_TOC AS_REGISTER(Register, R29)
+
 #define CCR4_is_synced AS_REGISTER(ConditionRegister, CCR4)
 #endif

--- a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -84,13 +84,11 @@
     NativeConditionalFarBranch* branch = NativeConditionalFarBranch_at(inst_loc);
     return branch->branch_destination();
   } else {
-    // There are two instructions at the beginning of a stub, therefore we
-    // load at orig_addr + 8.
     orig_addr = nativeCall_at(inst_loc)->get_trampoline();
     if (orig_addr == NULL) {
       return (address) -1;
     } else {
-      return (address) nativeMovConstReg_at(orig_addr + 8)->data();
+      return ((NativeCallTrampolineStub*)orig_addr)->destination();
     }
   }
 }

--- a/hotspot/src/cpu/ppc/vm/runtime_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/runtime_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,16 +45,6 @@
 
 #ifdef COMPILER2
 
-// SP adjustment (must use unextended SP) for method handle call sites
-// during exception handling.
-static intptr_t adjust_SP_for_methodhandle_callsite(JavaThread *thread) {
-  RegisterMap map(thread, false);
-  // The frame constructor will do the correction for us (see frame::adjust_unextended_SP).
-  frame mh_caller_frame = thread->last_frame().sender(&map);
-  assert(mh_caller_frame.is_compiled_frame(), "Only may reach here for compiled MH call sites");
-  return (intptr_t) mh_caller_frame.unextended_sp();
-}
-
 //------------------------------generate_exception_blob---------------------------
 // Creates exception blob at the end.
 // Using exception blob, this code is jumped from a compiled method.
@@ -129,17 +119,10 @@
   OopMapSet* oop_maps = new OopMapSet();
   oop_maps->add_gc_map(calls_return_pc - start, map);
 
-  // Get unextended_sp for method handle call sites.
-  Label mh_callsite, mh_done; // Use a 2nd c call if it's a method handle call site.
-  __ lwa(R4_ARG2, in_bytes(JavaThread::is_method_handle_return_offset()), R16_thread);
-  __ cmpwi(CCR0, R4_ARG2, 0);
-  __ bne(CCR0, mh_callsite);
-
   __ mtctr(R3_RET); // Move address of exception handler to SR_CTR.
   __ reset_last_Java_frame();
   __ pop_frame();
 
-  __ bind(mh_done);
   // We have a handler in register SR_CTR (could be deopt blob).
 
   // Get the exception oop.
@@ -161,25 +144,6 @@
   __ mtlr(R4_ARG2);
   __ bctr();
 
-
-  // Same as above, but also set sp to unextended_sp.
-  __ bind(mh_callsite);
-  __ mr(R31, R3_RET); // Save branch address.
-  __ mr(R3_ARG1, R16_thread);
-#if defined(ABI_ELFv2)
-  __ call_c((address) adjust_SP_for_methodhandle_callsite, relocInfo::none);
-#else
-  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, adjust_SP_for_methodhandle_callsite), relocInfo::none);
-#endif
-  // Returns unextended_sp in R3_RET.
-
-  __ mtctr(R31); // Move address of exception handler to SR_CTR.
-  __ reset_last_Java_frame();
-
-  __ mr(R1_SP, R3_RET); // Set sp to unextended_sp.
-  __ b(mh_done);
-
-
   // Make sure all code is generated.
   masm->flush();

--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -62,7 +62,7 @@
   // Support different return pc locations.
   enum ReturnPCLocation {
     return_pc_is_lr,
-    return_pc_is_r4,
+    return_pc_is_pre_saved,
     return_pc_is_thread_saved_exception_pc
   };
 
@@ -241,16 +241,17 @@
   __ mfcr(R31);
   __ std(R31, _abi(cr), R1_SP);
   switch (return_pc_location) {
-    case return_pc_is_lr:    __ mflr(R31);           break;
-    case return_pc_is_r4:    __ mr(R31, R4);     break;
-    case return_pc_is_thread_saved_exception_pc:
-                             __ ld(R31, thread_(saved_exception_pc)); break;
+    case return_pc_is_lr: __ mflr(R31); break;
+    case return_pc_is_pre_saved: assert(return_pc_adjustment == 0, "unsupported"); break;
+    case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
     default: ShouldNotReachHere();
   }
-  if (return_pc_adjustment != 0) {
-    __ addi(R31, R31, return_pc_adjustment);
+  if (return_pc_location != return_pc_is_pre_saved) {
+    if (return_pc_adjustment != 0) {
+      __ addi(R31, R31, return_pc_adjustment);
+    }
+    __ std(R31, _abi(lr), R1_SP);
   }
-  __ std(R31, _abi(lr), R1_SP);
 
   // push a new frame
   __ push_frame(frame_size_in_bytes, R31);
@@ -646,7 +647,7 @@
   return round_to(stk, 2);
 }
 
-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
 // Calling convention for calling C code.
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                         VMRegPair *regs,
@@ -2576,7 +2577,7 @@
 #endif
 }
 
-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
 // Frame generation for deopt and uncommon trap blobs.
 static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
                                 /* Read */
@@ -2734,7 +2735,7 @@
 
   const address start = __ pc();
 
-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
   // --------------------------------------------------------------------------
   // Prolog for non exception case!
 
@@ -2783,28 +2784,43 @@
 
   BLOCK_COMMENT("Prolog for exception case");
 
-  // The RegisterSaves doesn't need to adjust the return pc for this situation.
-  const int return_pc_adjustment_exception = 0;
-
-  // Push the "unpack frame".
-  // Save everything in sight.
-  assert(R4 == R4_ARG2, "exception pc must be in r4");
-  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
-                                                             &first_frame_size_in_bytes,
-                                                             /*generate_oop_map=*/ false,
-                                                             return_pc_adjustment_exception,
-                                                             RegisterSaver::return_pc_is_r4);
-
-  // Deopt during an exception. Save exec mode for unpack_frames.
-  __ li(exec_mode_reg, Deoptimization::Unpack_exception);
-
   // Store exception oop and pc in thread (location known to GC).
   // This is needed since the call to "fetch_unroll_info()" may safepoint.
   __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
   __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
+  __ std(R4_ARG2, _abi(lr), R1_SP);
+
+  // Vanilla deoptimization with an exception pending in exception_oop.
+  int exception_in_tls_offset = __ pc() - start;
+
+  // Push the "unpack frame".
+  // Save everything in sight.
+  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
+                                                             &first_frame_size_in_bytes,
+                                                             /*generate_oop_map=*/ false,
+                                                             /*return_pc_adjustment_exception=*/ 0,
+                                                             RegisterSaver::return_pc_is_pre_saved);
+
+  // Deopt during an exception. Save exec mode for unpack_frames.
+  __ li(exec_mode_reg, Deoptimization::Unpack_exception);
 
   // fall through
 
+  int reexecute_offset = 0;
+#ifdef COMPILER1
+  __ b(exec_mode_initialized);
+
+  // Reexecute entry, similar to c2 uncommon trap
+  reexecute_offset = __ pc() - start;
+
+  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
+                                                             &first_frame_size_in_bytes,
+                                                             /*generate_oop_map=*/ false,
+                                                             /*return_pc_adjustment_reexecute=*/ 0,
+                                                             RegisterSaver::return_pc_is_pre_saved);
+  __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
+#endif
+
   // --------------------------------------------------------------------------
   __ BIND(exec_mode_initialized);
 
@@ -2918,7 +2934,9 @@
   int exception_offset = __ pc() - start;
 #endif // COMPILER2
 
-  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, 0, first_frame_size_in_bytes / wordSize);
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
+                                           reexecute_offset, first_frame_size_in_bytes / wordSize);
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
 }
 
 #ifdef COMPILER2

--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -48,6 +48,12 @@
 #define BLOCK_COMMENT(str) __ block_comment(str)
 #endif
 
+#if defined(ABI_ELFv2)
+#define STUB_ENTRY(name) StubRoutines::name()
+#else
+#define STUB_ENTRY(name) ((FunctionDescriptor*)StubRoutines::name())->entry()
+#endif
+
 class StubGenerator: public StubCodeGenerator {
  private:
 
@@ -259,8 +265,7 @@
       //
 
       // global toc register
-      __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
-
+      __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R11_scratch1);
       // Remember the senderSP so we interpreter can pop c2i arguments off of the stack
       // when called via a c2i.
 
@@ -619,14 +624,17 @@
   //  Kills:
   //     nothing
   //
-  void gen_write_ref_array_pre_barrier(Register from, Register to, Register count, bool dest_uninitialized, Register Rtmp1) {
+  void gen_write_ref_array_pre_barrier(Register from, Register to, Register count, bool dest_uninitialized, Register Rtmp1,
+                                       Register preserve1 = noreg, Register preserve2 = noreg) {
     BarrierSet* const bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCTLogging:
         // With G1, don't generate the call if we statically know that the target in uninitialized
         if (!dest_uninitialized) {
-          const int spill_slots = 4 * wordSize;
-          const int frame_size  = frame::abi_reg_args_size + spill_slots;
+          int spill_slots = 3;
+          if (preserve1 != noreg) { spill_slots++; }
+          if (preserve2 != noreg) { spill_slots++; }
+          const int frame_size = align_size_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
           Label filtered;
 
           // Is marking active?
@@ -640,17 +648,23 @@
           __ beq(CCR0, filtered);
 
           __ save_LR_CR(R0);
-          __ push_frame_reg_args(spill_slots, R0);
-          __ std(from,  frame_size - 1 * wordSize, R1_SP);
-          __ std(to,    frame_size - 2 * wordSize, R1_SP);
-          __ std(count, frame_size - 3 * wordSize, R1_SP);
+          __ push_frame(frame_size, R0);
+          int slot_nr = 0;
+          __ std(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ std(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ std(count, frame_size - (++slot_nr) * wordSize, R1_SP);
+          if (preserve1 != noreg) { __ std(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
+          if (preserve2 != noreg) { __ std(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
 
           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), to, count);
 
-          __ ld(from,  frame_size - 1 * wordSize, R1_SP);
-          __ ld(to,    frame_size - 2 * wordSize, R1_SP);
-          __ ld(count, frame_size - 3 * wordSize, R1_SP);
-          __ pop_frame();
+          slot_nr = 0;
+          __ ld(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ ld(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ ld(count, frame_size - (++slot_nr) * wordSize, R1_SP);
+          if (preserve1 != noreg) { __ ld(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
+          if (preserve2 != noreg) { __ ld(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
+          __ addi(R1_SP, R1_SP, frame_size); // pop_frame()
           __ restore_LR_CR(R0);
 
           __ bind(filtered);
@@ -674,27 +688,22 @@
   //
   //  The input registers and R0 are overwritten.
   //
-  void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) {
+  void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, Register preserve = noreg) {
     BarrierSet* const bs = Universe::heap()->barrier_set();
 
     switch (bs->kind()) {
       case BarrierSet::G1SATBCTLogging:
         {
-          if (branchToEnd) {
-            __ save_LR_CR(R0);
-            // We need this frame only to spill LR.
-            __ push_frame_reg_args(0, R0);
-            __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
-            __ pop_frame();
-            __ restore_LR_CR(R0);
-          } else {
-            // Tail call: fake call from stub caller by branching without linking.
-            address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
-            __ mr_if_needed(R3_ARG1, addr);
-            __ mr_if_needed(R4_ARG2, count);
-            __ load_const(R11, entry_point, R0);
-            __ call_c_and_return_to_caller(R11);
-          }
+          int spill_slots = (preserve != noreg) ? 1 : 0;
+          const int frame_size = align_size_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
+
+          __ save_LR_CR(R0);
+          __ push_frame(frame_size, R0);
+          if (preserve != noreg) { __ std(preserve, frame_size - 1 * wordSize, R1_SP); }
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
+          if (preserve != noreg) { __ ld(preserve, frame_size - 1 * wordSize, R1_SP); }
+          __ addi(R1_SP, R1_SP, frame_size); // pop_frame();
+          __ restore_LR_CR(R0);
         }
         break;
       case BarrierSet::CardTableForRS:
@@ -729,12 +738,9 @@
           __ addi(addr, addr, 1);
           __ bdnz(Lstore_loop);
           __ bind(Lskip_loop);
-
-          if (!branchToEnd) __ blr();
         }
       break;
       case BarrierSet::ModRef:
-        if (!branchToEnd) __ blr();
         break;
       default:
         ShouldNotReachHere();
@@ -763,8 +769,10 @@
 
     // Procedure for large arrays (uses data cache block zero instruction).
     Label dwloop, fast, fastloop, restloop, lastdword, done;
-    int cl_size=VM_Version::get_cache_line_size(), cl_dwords=cl_size>>3, cl_dwordaddr_bits=exact_log2(cl_dwords);
-    int min_dcbz=2; // Needs to be positive, apply dcbz only to at least min_dcbz cache lines.
+    int cl_size = VM_Version::L1_data_cache_line_size();
+    int cl_dwords = cl_size >> 3;
+    int cl_dwordaddr_bits = exact_log2(cl_dwords);
+    int min_dcbz = 2; // Needs to be positive, apply dcbz only to at least min_dcbz cache lines.
 
     // Clear up to 128byte boundary if long enough, dword_cnt=(16-(base>>3))%16.
     __ dcbtst(base_ptr_reg);                    // Indicate write access to first cache line ...
@@ -1081,7 +1089,6 @@
     Register tmp1 = R6_ARG4;
     Register tmp2 = R7_ARG5;
 
-    Label l_overlap;
 #ifdef ASSERT
     __ srdi_(tmp2, R5_ARG3, 31);
     __ asm_assert_eq("missing zero extend", 0xAFFE);
@@ -1091,19 +1098,11 @@
     __ sldi(tmp2, R5_ARG3, log2_elem_size); // size in bytes
     __ cmpld(CCR0, R3_ARG1, R4_ARG2); // Use unsigned comparison!
     __ cmpld(CCR1, tmp1, tmp2);
-    __ crand(CCR0, Assembler::less, CCR1, Assembler::less);
-    __ blt(CCR0, l_overlap); // Src before dst and distance smaller than size.
-
-    // need to copy forwards
-    if (__ is_within_range_of_b(no_overlap_target, __ pc())) {
-      __ b(no_overlap_target);
-    } else {
-      __ load_const(tmp1, no_overlap_target, tmp2);
-      __ mtctr(tmp1);
-      __ bctr();
-    }
-
-    __ bind(l_overlap);
+    __ crnand(CCR0, Assembler::less, CCR1, Assembler::less);
+    // Overlaps if Src before dst and distance smaller than size.
+    // Branch to forward copy routine otherwise (within range of 32kB).
+    __ bc(Assembler::bcondCRbiIs1, Assembler::bi0(CCR0, Assembler::less), no_overlap_target);
+
     // need to copy backwards
   }
 
@@ -1248,6 +1247,7 @@
     }
 
     __ bind(l_4);
+    __ li(R3_RET, 0); // return 0
     __ blr();
 
     return start;
@@ -1269,15 +1269,9 @@
     Register tmp2 = R7_ARG5;
     Register tmp3 = R8_ARG6;
 
-#if defined(ABI_ELFv2)
     address nooverlap_target = aligned ?
-      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
-      StubRoutines::jbyte_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::jbyte_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jbyte_disjoint_arraycopy) :
+      STUB_ENTRY(jbyte_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 0);
     // Do reverse copy. We assume the case of actual overlap is rare enough
@@ -1292,6 +1286,7 @@
     __ lbzx(tmp1, R3_ARG1, R5_ARG3);
     __ bge(CCR0, l_1);
 
+    __ li(R3_RET, 0); // return 0
     __ blr();
 
     return start;
@@ -1474,6 +1469,7 @@
       __ bdnz(l_5);
     }
     __ bind(l_4);
+    __ li(R3_RET, 0); // return 0
     __ blr();
 
     return start;
@@ -1495,15 +1491,9 @@
     Register tmp2 = R7_ARG5;
     Register tmp3 = R8_ARG6;
 
-#if defined(ABI_ELFv2)
     address nooverlap_target = aligned ?
-        StubRoutines::arrayof_jshort_disjoint_arraycopy() :
-        StubRoutines::jshort_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-        ((FunctionDescriptor*)StubRoutines::arrayof_jshort_disjoint_arraycopy())->entry() :
-        ((FunctionDescriptor*)StubRoutines::jshort_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jshort_disjoint_arraycopy) :
+      STUB_ENTRY(jshort_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 1);
 
@@ -1517,6 +1507,7 @@
     __ lhzx(tmp2, R3_ARG1, tmp1);
     __ bge(CCR0, l_1);
 
+    __ li(R3_RET, 0); // return 0
     __ blr();
 
     return start;
@@ -1620,6 +1611,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
     generate_disjoint_int_copy_core(aligned);
+    __ li(R3_RET, 0); // return 0
     __ blr();
     return start;
   }
@@ -1704,20 +1696,15 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
 
-#if defined(ABI_ELFv2)
     address nooverlap_target = aligned ?
-      StubRoutines::arrayof_jint_disjoint_arraycopy() :
-      StubRoutines::jint_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_jint_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::jint_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jint_disjoint_arraycopy) :
+      STUB_ENTRY(jint_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 2);
 
     generate_conjoint_int_copy_core(aligned);
 
+    __ li(R3_RET, 0); // return 0
     __ blr();
 
     return start;
@@ -1796,6 +1783,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
     generate_disjoint_long_copy_core(aligned);
+    __ li(R3_RET, 0); // return 0
     __ blr();
 
     return start;
@@ -1878,19 +1866,14 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
 
-#if defined(ABI_ELFv2)
     address nooverlap_target = aligned ?
-      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
-      StubRoutines::jlong_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_jlong_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::jlong_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jlong_disjoint_arraycopy) :
+      STUB_ENTRY(jlong_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 3);
     generate_conjoint_long_copy_core(aligned);
 
+    __ li(R3_RET, 0); // return 0
     __ blr();
 
     return start;
@@ -1910,15 +1893,9 @@
 
     address start = __ function_entry();
 
-#if defined(ABI_ELFv2)
     address nooverlap_target = aligned ?
-      StubRoutines::arrayof_oop_disjoint_arraycopy() :
-      StubRoutines::oop_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_oop_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::oop_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_oop_disjoint_arraycopy) :
+      STUB_ENTRY(oop_disjoint_arraycopy);
 
     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
 
@@ -1934,7 +1911,9 @@
       generate_conjoint_long_copy_core(aligned);
     }
 
-    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
+    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
+    __ li(R3_RET, 0); // return 0
+    __ blr();
     return start;
   }
 
@@ -1964,11 +1943,460 @@
       generate_disjoint_long_copy_core(aligned);
     }
 
-    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
+    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
+    __ li(R3_RET, 0); // return 0
+    __ blr();
+
+    return start;
+  }
+
+
+  // Helper for generating a dynamic type check.
+  // Smashes only the given temp registers.
+  void generate_type_check(Register sub_klass,
+                           Register super_check_offset,
+                           Register super_klass,
+                           Register temp,
+                           Label& L_success) {
+    assert_different_registers(sub_klass, super_check_offset, super_klass);
+
+    BLOCK_COMMENT("type_check:");
+
+    Label L_miss;
+
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, R0, &L_success, &L_miss, NULL,
+                                     super_check_offset);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, temp, R0, &L_success, NULL);
+
+    // Fall through on failure!
+    __ bind(L_miss);
+  }
+
+
+  //  Generate stub for checked oop copy.
+  //
+  // Arguments for generated stub:
+  //      from:  R3
+  //      to:    R4
+  //      count: R5 treated as signed
+  //      ckoff: R6 (super_check_offset)
+  //      ckval: R7 (super_klass)
+  //      ret:   R3 zero for success; (-1^K) where K is partial transfer count
+  //
+  address generate_checkcast_copy(const char *name, bool dest_uninitialized) {
+
+    const Register R3_from   = R3_ARG1;      // source array address
+    const Register R4_to     = R4_ARG2;      // destination array address
+    const Register R5_count  = R5_ARG3;      // elements count
+    const Register R6_ckoff  = R6_ARG4;      // super_check_offset
+    const Register R7_ckval  = R7_ARG5;      // super_klass
+
+    const Register R8_offset = R8_ARG6;      // loop var, with stride wordSize
+    const Register R9_remain = R9_ARG7;      // loop var, with stride -1
+    const Register R10_oop   = R10_ARG8;     // actual oop copied
+    const Register R11_klass = R11_scratch1; // oop._klass
+    const Register R12_tmp   = R12_scratch2;
+
+    const Register R2_minus1 = R2;
+
+    //__ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    // TODO: Assert that int is 64 bit sign extended and arrays are not conjoint.
+
+    gen_write_ref_array_pre_barrier(R3_from, R4_to, R5_count, dest_uninitialized, R12_tmp, /* preserve: */ R6_ckoff, R7_ckval);
+
+    //inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R12_tmp, R3_RET);
+
+    Label load_element, store_element, store_null, success, do_card_marks;
+    __ or_(R9_remain, R5_count, R5_count); // Initialize loop index, and test it.
+    __ li(R8_offset, 0);                   // Offset from start of arrays.
+    __ li(R2_minus1, -1);
+    __ bne(CCR0, load_element);
+
+    // Empty array: Nothing to do.
+    __ li(R3_RET, 0);           // Return 0 on (trivial) success.
+    __ blr();
+
+    // ======== begin loop ========
+    // (Entry is load_element.)
+    __ align(OptoLoopAlignment);
+    __ bind(store_element);
+    if (UseCompressedOops) {
+      __ encode_heap_oop_not_null(R10_oop);
+      __ bind(store_null);
+      __ stw(R10_oop, R8_offset, R4_to);
+    } else {
+      __ bind(store_null);
+      __ std(R10_oop, R8_offset, R4_to);
+    }
+
+    __ addi(R8_offset, R8_offset, heapOopSize);   // Step to next offset.
+    __ add_(R9_remain, R2_minus1, R9_remain);     // Decrement the count.
+    __ beq(CCR0, success);
+
+    // ======== loop entry is here ========
+    __ bind(load_element);
+    __ load_heap_oop(R10_oop, R8_offset, R3_from, &store_null);  // Load the oop.
+
+    __ load_klass(R11_klass, R10_oop); // Query the object klass.
+
+    generate_type_check(R11_klass, R6_ckoff, R7_ckval, R12_tmp,
+                        // Branch to this on success:
+                        store_element);
+    // ======== end loop ========
+
+    // It was a real error; we must depend on the caller to finish the job.
+    // Register R9_remain has number of *remaining* oops, R5_count number of *total* oops.
+    // Emit GC store barriers for the oops we have copied (R5_count minus R9_remain),
+    // and report their number to the caller.
+    __ subf_(R5_count, R9_remain, R5_count);
+    __ nand(R3_RET, R5_count, R5_count);   // report (-1^K) to caller
+    __ bne(CCR0, do_card_marks);
+    __ blr();
+
+    __ bind(success);
+    __ li(R3_RET, 0);
+
+    __ bind(do_card_marks);
+    // Store check on R4_to[0..R5_count-1].
+    gen_write_ref_array_post_barrier(R4_to, R5_count, R12_tmp, /* preserve: */ R3_RET);
+    __ blr();
+    return start;
+  }
+
+
+  //  Generate 'unsafe' array copy stub.
+  //  Though just as safe as the other stubs, it takes an unscaled
+  //  size_t argument instead of an element count.
+  //
+  // Arguments for generated stub:
+  //      from:  R3
+  //      to:    R4
+  //      count: R5 byte count, treated as ssize_t, can be zero
+  //
+  // Examines the alignment of the operands and dispatches
+  // to a long, int, short, or byte copy loop.
+  //
+  address generate_unsafe_copy(const char* name,
+                               address byte_copy_entry,
+                               address short_copy_entry,
+                               address int_copy_entry,
+                               address long_copy_entry) {
+
+    const Register R3_from   = R3_ARG1;      // source array address
+    const Register R4_to     = R4_ARG2;      // destination array address
+    const Register R5_count  = R5_ARG3;      // elements count (as long on PPC64)
+
+    const Register R6_bits   = R6_ARG4;      // test copy of low bits
+    const Register R7_tmp    = R7_ARG5;
+
+    //__ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    // Bump this on entry, not on exit:
+    //inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, R6_bits, R7_tmp);
+
+    Label short_copy, int_copy, long_copy;
+
+    __ orr(R6_bits, R3_from, R4_to);
+    __ orr(R6_bits, R6_bits, R5_count);
+    __ andi_(R0, R6_bits, (BytesPerLong-1));
+    __ beq(CCR0, long_copy);
+
+    __ andi_(R0, R6_bits, (BytesPerInt-1));
+    __ beq(CCR0, int_copy);
+
+    __ andi_(R0, R6_bits, (BytesPerShort-1));
+    __ beq(CCR0, short_copy);
+
+    // byte_copy:
+    __ b(byte_copy_entry);
+
+    __ bind(short_copy);
+    __ srwi(R5_count, R5_count, LogBytesPerShort);
+    __ b(short_copy_entry);
+
+    __ bind(int_copy);
+    __ srwi(R5_count, R5_count, LogBytesPerInt);
+    __ b(int_copy_entry);
+
+    __ bind(long_copy);
+    __ srwi(R5_count, R5_count, LogBytesPerLong);
+    __ b(long_copy_entry);
 
     return start;
   }
 
+
+  // Perform range checks on the proposed arraycopy.
+  // Kills the two temps, but nothing else.
+  // Also, clean the sign bits of src_pos and dst_pos.
+  void arraycopy_range_checks(Register src,     // source array oop
+                              Register src_pos, // source position
+                              Register dst,     // destination array oop
+                              Register dst_pos, // destination position
+                              Register length,  // length of copy
+                              Register temp1, Register temp2,
+                              Label& L_failed) {
+    BLOCK_COMMENT("arraycopy_range_checks:");
+
+    const Register array_length = temp1;  // scratch
+    const Register end_pos      = temp2;  // scratch
+
+    //  if (src_pos + length > arrayOop(src)->length() ) FAIL;
+    __ lwa(array_length, arrayOopDesc::length_offset_in_bytes(), src);
+    __ add(end_pos, src_pos, length);  // src_pos + length
+    __ cmpd(CCR0, end_pos, array_length);
+    __ bgt(CCR0, L_failed);
+
+    //  if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
+    __ lwa(array_length, arrayOopDesc::length_offset_in_bytes(), dst);
+    __ add(end_pos, dst_pos, length);  // src_pos + length
+    __ cmpd(CCR0, end_pos, array_length);
+    __ bgt(CCR0, L_failed);
+
+    BLOCK_COMMENT("arraycopy_range_checks done");
+  }
+
+
+  //
+  //  Generate generic array copy stubs
+  //
+  //  Input:
+  //    R3    -  src oop
+  //    R4    -  src_pos
+  //    R5    -  dst oop
+  //    R6    -  dst_pos
+  //    R7    -  element count
+  //
+  //  Output:
+  //    R3 ==  0  -  success
+  //    R3 == -1  -  need to call System.arraycopy
+  //
+  address generate_generic_copy(const char *name,
+                                address entry_jbyte_arraycopy,
+                                address entry_jshort_arraycopy,
+                                address entry_jint_arraycopy,
+                                address entry_oop_arraycopy,
+                                address entry_disjoint_oop_arraycopy,
+                                address entry_jlong_arraycopy,
+                                address entry_checkcast_arraycopy) {
+    Label L_failed, L_objArray;
+
+    // Input registers
+    const Register src       = R3_ARG1;  // source array oop
+    const Register src_pos   = R4_ARG2;  // source position
+    const Register dst       = R5_ARG3;  // destination array oop
+    const Register dst_pos   = R6_ARG4;  // destination position
+    const Register length    = R7_ARG5;  // elements count
+
+    // registers used as temp
+    const Register src_klass = R8_ARG6;  // source array klass
+    const Register dst_klass = R9_ARG7;  // destination array klass
+    const Register lh        = R10_ARG8; // layout handler
+    const Register temp      = R2;
+
+    //__ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    // Bump this on entry, not on exit:
+    //inc_counter_np(SharedRuntime::_generic_array_copy_ctr, lh, temp);
+
+    // In principle, the int arguments could be dirty.
+
+    //-----------------------------------------------------------------------
+    // Assembler stubs will be used for this call to arraycopy
+    // if the following conditions are met:
+    //
+    // (1) src and dst must not be null.
+    // (2) src_pos must not be negative.
+    // (3) dst_pos must not be negative.
+    // (4) length  must not be negative.
+    // (5) src klass and dst klass should be the same and not NULL.
+    // (6) src and dst should be arrays.
+    // (7) src_pos + length must not exceed length of src.
+    // (8) dst_pos + length must not exceed length of dst.
+    BLOCK_COMMENT("arraycopy initial argument checks");
+
+    __ cmpdi(CCR1, src, 0);      // if (src == NULL) return -1;
+    __ extsw_(src_pos, src_pos); // if (src_pos < 0) return -1;
+    __ cmpdi(CCR5, dst, 0);      // if (dst == NULL) return -1;
+    __ cror(CCR1, Assembler::equal, CCR0, Assembler::less);
+    __ extsw_(dst_pos, dst_pos); // if (src_pos < 0) return -1;
+    __ cror(CCR5, Assembler::equal, CCR0, Assembler::less);
+    __ extsw_(length, length);   // if (length < 0) return -1;
+    __ cror(CCR1, Assembler::equal, CCR5, Assembler::equal);
+    __ cror(CCR1, Assembler::equal, CCR0, Assembler::less);
+    __ beq(CCR1, L_failed);
+
+    BLOCK_COMMENT("arraycopy argument klass checks");
+    __ load_klass(src_klass, src);
+    __ load_klass(dst_klass, dst);
+
+    // Load layout helper
+    //
+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
+    // 32        30    24            16              8     2                 0
+    //
+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
+    //
+
+    int lh_offset = in_bytes(Klass::layout_helper_offset());
+
+    // Load 32-bits signed value. Use br() instruction with it to check icc.
+    __ lwz(lh, lh_offset, src_klass);
+
+    // Handle objArrays completely differently...
+    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+    __ load_const_optimized(temp, objArray_lh, R0);
+    __ cmpw(CCR0, lh, temp);
+    __ beq(CCR0, L_objArray);
+
+    __ cmpd(CCR5, src_klass, dst_klass);          // if (src->klass() != dst->klass()) return -1;
+    __ cmpwi(CCR6, lh, Klass::_lh_neutral_value); // if (!src->is_Array()) return -1;
+
+    __ crnand(CCR5, Assembler::equal, CCR6, Assembler::less);
+    __ beq(CCR5, L_failed);
+
+    // At this point, it is known to be a typeArray (array_tag 0x3).
+#ifdef ASSERT
+    { Label L;
+      jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
+      __ load_const_optimized(temp, lh_prim_tag_in_place, R0);
+      __ cmpw(CCR0, lh, temp);
+      __ bge(CCR0, L);
+      __ stop("must be a primitive array");
+      __ bind(L);
+    }
+#endif
+
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+                           temp, dst_klass, L_failed);
+
+    // TypeArrayKlass
+    //
+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
+    //
+
+    const Register offset = dst_klass;    // array offset
+    const Register elsize = src_klass;    // log2 element size
+
+    __ rldicl(offset, lh, 64 - Klass::_lh_header_size_shift, 64 - exact_log2(Klass::_lh_header_size_mask + 1));
+    __ andi(elsize, lh, Klass::_lh_log2_element_size_mask);
+    __ add(src, offset, src);       // src array offset
+    __ add(dst, offset, dst);       // dst array offset
+
+    // Next registers should be set before the jump to corresponding stub.
+    const Register from     = R3_ARG1;  // source array address
+    const Register to       = R4_ARG2;  // destination array address
+    const Register count    = R5_ARG3;  // elements count
+
+    // 'from', 'to', 'count' registers should be set in this order
+    // since they are the same as 'src', 'src_pos', 'dst'.
+
+    BLOCK_COMMENT("scale indexes to element size");
+    __ sld(src_pos, src_pos, elsize);
+    __ sld(dst_pos, dst_pos, elsize);
+    __ add(from, src_pos, src);  // src_addr
+    __ add(to, dst_pos, dst);    // dst_addr
+    __ mr(count, length);        // length
+
+    BLOCK_COMMENT("choose copy loop based on element size");
+    // Using conditional branches with range 32kB.
+    const int bo = Assembler::bcondCRbiIs1, bi = Assembler::bi0(CCR0, Assembler::equal);
+    __ cmpwi(CCR0, elsize, 0);
+    __ bc(bo, bi, entry_jbyte_arraycopy);
+    __ cmpwi(CCR0, elsize, LogBytesPerShort);
+    __ bc(bo, bi, entry_jshort_arraycopy);
+    __ cmpwi(CCR0, elsize, LogBytesPerInt);
+    __ bc(bo, bi, entry_jint_arraycopy);
+#ifdef ASSERT
+    { Label L;
+      __ cmpwi(CCR0, elsize, LogBytesPerLong);
+      __ beq(CCR0, L);
+      __ stop("must be long copy, but elsize is wrong");
+      __ bind(L);
+    }
+#endif
+    __ b(entry_jlong_arraycopy);
+
+    // ObjArrayKlass
+  __ bind(L_objArray);
+    // live at this point:  src_klass, dst_klass, src[_pos], dst[_pos], length
+
+    Label L_disjoint_plain_copy, L_checkcast_copy;
+    //  test array classes for subtyping
+    __ cmpd(CCR0, src_klass, dst_klass);         // usual case is exact equality
+    __ bne(CCR0, L_checkcast_copy);
+
+    // Identically typed arrays can be copied without element-wise checks.
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+                           temp, lh, L_failed);
+
+    __ addi(src, src, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //src offset
+    __ addi(dst, dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //dst offset
+    __ sldi(src_pos, src_pos, LogBytesPerHeapOop);
+    __ sldi(dst_pos, dst_pos, LogBytesPerHeapOop);
+    __ add(from, src_pos, src);  // src_addr
+    __ add(to, dst_pos, dst);    // dst_addr
+    __ mr(count, length);        // length
+    __ b(entry_oop_arraycopy);
+
+  __ bind(L_checkcast_copy);
+    // live at this point:  src_klass, dst_klass
+    {
+      // Before looking at dst.length, make sure dst is also an objArray.
+      __ lwz(temp, lh_offset, dst_klass);
+      __ cmpw(CCR0, lh, temp);
+      __ bne(CCR0, L_failed);
+
+      // It is safe to examine both src.length and dst.length.
+      arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+                             temp, lh, L_failed);
+
+      // Marshal the base address arguments now, freeing registers.
+      __ addi(src, src, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //src offset
+      __ addi(dst, dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //dst offset
+      __ sldi(src_pos, src_pos, LogBytesPerHeapOop);
+      __ sldi(dst_pos, dst_pos, LogBytesPerHeapOop);
+      __ add(from, src_pos, src);  // src_addr
+      __ add(to, dst_pos, dst);    // dst_addr
+      __ mr(count, length);        // length
+
+      Register sco_temp = R6_ARG4;             // This register is free now.
+      assert_different_registers(from, to, count, sco_temp,
+                                 dst_klass, src_klass);
+
+      // Generate the type check.
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
+      __ lwz(sco_temp, sco_offset, dst_klass);
+      generate_type_check(src_klass, sco_temp, dst_klass,
+                          temp, L_disjoint_plain_copy);
+
+      // Fetch destination element klass from the ObjArrayKlass header.
+      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+
+      // The checkcast_copy loop needs two extra arguments:
+      __ ld(R7_ARG5, ek_offset, dst_klass);   // dest elem klass
+      __ lwz(R6_ARG4, sco_offset, R7_ARG5);   // sco of elem klass
+      __ b(entry_checkcast_arraycopy);
+    }
+
+    __ bind(L_disjoint_plain_copy);
+    __ b(entry_disjoint_oop_arraycopy);
+
+  __ bind(L_failed);
+    __ li(R3_RET, -1); // return -1
+    __ blr();
+    return start;
+  }
+
+
   void generate_arraycopy_stubs() {
     // Note: the disjoint stubs must be generated first, some of
     // the conjoint stubs use them.
@@ -2005,6 +2433,24 @@
     StubRoutines::_arrayof_oop_arraycopy        = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", false);
     StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", true);
 
+    // special/generic versions
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", false);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true);
+
+    StubRoutines::_unsafe_arraycopy  = generate_unsafe_copy("unsafe_arraycopy",
+                                                            STUB_ENTRY(jbyte_arraycopy),
+                                                            STUB_ENTRY(jshort_arraycopy),
+                                                            STUB_ENTRY(jint_arraycopy),
+                                                            STUB_ENTRY(jlong_arraycopy));
+    StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
+                                                             STUB_ENTRY(jbyte_arraycopy),
+                                                             STUB_ENTRY(jshort_arraycopy),
+                                                             STUB_ENTRY(jint_arraycopy),
+                                                             STUB_ENTRY(oop_arraycopy),
+                                                             STUB_ENTRY(oop_disjoint_arraycopy),
+                                                             STUB_ENTRY(jlong_arraycopy),
+                                                             STUB_ENTRY(checkcast_arraycopy));
+
     // fill routines
     StubRoutines::_jbyte_fill          = generate_fill(T_BYTE,  false, "jbyte_fill");
     StubRoutines::_jshort_fill         = generate_fill(T_SHORT, false, "jshort_fill");

--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -34,7 +34,7 @@
 
 // CRC32 Intrinsics.
 void StubRoutines::ppc64::generate_load_crc_table_addr(MacroAssembler* masm, Register table) {
-  __ load_const(table, StubRoutines::_crc_table_adr);
+  __ load_const_optimized(table, StubRoutines::_crc_table_adr, R0);
 }
 
 // CRC32 Intrinsics.

--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -255,34 +255,33 @@
 
   if (TieredCompilation) {
     const int increment = InvocationCounter::count_increment;
-    const int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
     Label no_mdo;
     if (ProfileInterpreter) {
-      const Register Rmdo = Rscratch1;
+      const Register Rmdo = R3_counters;
       // If no method data exists, go to profile_continue.
       __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
       __ cmpdi(CCR0, Rmdo, 0);
       __ beq(CCR0, no_mdo);
 
       // Increment backedge counter in the MDO.
-      const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
-      __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
+      const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+      __ lwz(Rscratch2, mdo_ic_offs, Rmdo);
+      __ lwz(Rscratch1, in_bytes(MethodData::invoke_mask_offset()), Rmdo);
       __ addi(Rscratch2, Rscratch2, increment);
-      __ stw(Rscratch2, mdo_bc_offs, Rmdo);
-      __ load_const_optimized(Rscratch1, mask, R0);
+      __ stw(Rscratch2, mdo_ic_offs, Rmdo);
       __ and_(Rscratch1, Rscratch2, Rscratch1);
       __ bne(CCR0, done);
       __ b(*overflow);
     }
 
     // Increment counter in MethodCounters*.
-    const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+    const int mo_bc_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
     __ bind(no_mdo);
     __ get_method_counters(R19_method, R3_counters, done);
     __ lwz(Rscratch2, mo_bc_offs, R3_counters);
+    __ lwz(Rscratch1, in_bytes(MethodCounters::invoke_mask_offset()), R3_counters);
     __ addi(Rscratch2, Rscratch2, increment);
     __ stw(Rscratch2, mo_bc_offs, R3_counters);
-    __ load_const_optimized(Rscratch1, mask, R0);
     __ and_(Rscratch1, Rscratch2, Rscratch1);
     __ beq(CCR0, *overflow);
 
@@ -303,8 +302,7 @@
     // Check if we must create a method data obj.
     if (ProfileInterpreter && profile_method != NULL) {
       const Register profile_limit = Rscratch1;
-      int pl_offs = __ load_const_optimized(profile_limit, &InvocationCounter::InterpreterProfileLimit, R0, true);
-      __ lwz(profile_limit, pl_offs, profile_limit);
+      __ lwz(profile_limit, in_bytes(MethodCounters::interpreter_profile_limit_offset()), R3_counters);
       // Test to see if we should create a method data oop.
       __ cmpw(CCR0, Rsum_ivc_bec, profile_limit);
       __ blt(CCR0, *profile_method_continue);
@@ -314,9 +312,7 @@
     // Finally check for counter overflow.
     if (overflow) {
       const Register invocation_limit = Rscratch1;
-      int il_offs = __ load_const_optimized(invocation_limit, &InvocationCounter::InterpreterInvocationLimit, R0, true);
-      __ lwz(invocation_limit, il_offs, invocation_limit);
-      assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), "unexpected field size");
+      __ lwz(invocation_limit, in_bytes(MethodCounters::interpreter_invocation_limit_offset()), R3_counters);
       __ cmpw(CCR0, Rsum_ivc_bec, invocation_limit);
       __ bge(CCR0, *overflow);
     }
@@ -1484,9 +1480,9 @@
 
   intptr_t* locals_base  = (caller->is_interpreted_frame()) ?
     caller->interpreter_frame_esp() + caller_actual_parameters :
-    caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize) ;
+    caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize);
 
-  intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize ;
+  intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize;
   intptr_t* monitor      = monitor_base - (moncount * frame::interpreter_frame_monitor_size());
   intptr_t* esp_base     = monitor - 1;
   intptr_t* esp          = esp_base - tempcount - popframe_extra_args;

--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -37,5 +37,3 @@
   const static int InterpreterCodeSize = 230*K;
 
 #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
-
-

--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -1626,12 +1626,13 @@
   // --------------------------------------------------------------------------
   // Normal (non-jsr) branch handling
 
+  // Bump bytecode pointer by displacement (take the branch).
+  __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
+
   const bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
   if (increment_invocation_counter_for_backward_branches) {
-    //__ unimplemented("branch invocation counter");
-
     Label Lforward;
-    __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
+    __ dispatch_prolog(vtos);
 
     // Check branch direction.
     __ cmpdi(CCR0, Rdisp, 0);
@@ -1642,7 +1643,6 @@
     if (TieredCompilation) {
       Label Lno_mdo, Loverflow;
       const int increment = InvocationCounter::count_increment;
-      const int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
       if (ProfileInterpreter) {
         Register Rmdo = Rscratch1;
 
@@ -1654,7 +1654,7 @@
         // Increment backedge counter in the MDO.
         const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
         __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
-        __ load_const_optimized(Rscratch3, mask, R0);
+        __ lwz(Rscratch3, in_bytes(MethodData::backedge_mask_offset()), Rmdo);
         __ addi(Rscratch2, Rscratch2, increment);
         __ stw(Rscratch2, mdo_bc_offs, Rmdo);
         __ and_(Rscratch3, Rscratch2, Rscratch3);
@@ -1666,19 +1666,19 @@
       const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
       __ bind(Lno_mdo);
       __ lwz(Rscratch2, mo_bc_offs, R4_counters);
-      __ load_const_optimized(Rscratch3, mask, R0);
+      __ lwz(Rscratch3, in_bytes(MethodCounters::backedge_mask_offset()), R4_counters);
       __ addi(Rscratch2, Rscratch2, increment);
-      __ stw(Rscratch2, mo_bc_offs, R19_method);
+      __ stw(Rscratch2, mo_bc_offs, R4_counters);
       __ and_(Rscratch3, Rscratch2, Rscratch3);
       __ bne(CCR0, Lforward);
 
       __ bind(Loverflow);
 
       // Notify point for loop, pass branch bytecode.
-      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R14_bcp, true);
+      __ subf(R4_ARG2, Rdisp, R14_bcp); // Compute branch bytecode (previous bcp).
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);
 
       // Was an OSR adapter generated?
-      // O0 = osr nmethod
       __ cmpdi(CCR0, R3_RET, 0);
       __ beq(CCR0, Lforward);
 
@@ -1714,27 +1714,23 @@
       __ increment_backedge_counter(R4_counters, invoke_ctr, Rscratch2, Rscratch3);
 
       if (ProfileInterpreter) {
-        __ test_invocation_counter_for_mdp(invoke_ctr, Rscratch2, Lforward);
+        __ test_invocation_counter_for_mdp(invoke_ctr, R4_counters, Rscratch2, Lforward);
         if (UseOnStackReplacement) {
-          __ test_backedge_count_for_osr(bumped_count, R14_bcp, Rscratch2);
+          __ test_backedge_count_for_osr(bumped_count, R4_counters, R14_bcp, Rdisp, Rscratch2);
         }
       } else {
         if (UseOnStackReplacement) {
-          __ test_backedge_count_for_osr(invoke_ctr, R14_bcp, Rscratch2);
+          __ test_backedge_count_for_osr(invoke_ctr, R4_counters, R14_bcp, Rdisp, Rscratch2);
         }
       }
     }
 
     __ bind(Lforward);
+    __ dispatch_epilog(vtos);
 
   } else {
-    // Bump bytecode pointer by displacement (take the branch).
-    __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
+    __ dispatch_next(vtos);
   }
-  // Continue with bytecode @ target.
-  // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
-  // %%%%% and changing dispatch_next to dispatch_only.
-  __ dispatch_next(vtos);
 }
 
 // Helper function for if_cmp* methods below.

--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -38,7 +38,6 @@
 # include <sys/sysinfo.h>
 
 int VM_Version::_features = VM_Version::unknown_m;
-int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
 const char* VM_Version::_features_str = "";
 bool VM_Version::_is_determine_features_test_running = false;
 
@@ -56,7 +55,7 @@
 
   // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
   if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
-    if (VM_Version::has_lqarx()) {
+    if (VM_Version::has_tcheck() && VM_Version::has_lqarx()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
     } else if (VM_Version::has_popcntw()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
@@ -68,10 +67,19 @@
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0);
     }
   }
-  guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
-            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
-            PowerArchitecturePPC64 == 8,
-            "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
+
+  bool PowerArchitecturePPC64_ok = false;
+  switch (PowerArchitecturePPC64) {
+    case 8: if (!VM_Version::has_tcheck() ) break;
+            if (!VM_Version::has_lqarx()  ) break;
+    case 7: if (!VM_Version::has_popcntw()) break;
+    case 6: if (!VM_Version::has_cmpb()   ) break;
+    case 5: if (!VM_Version::has_popcntb()) break;
+    case 0: PowerArchitecturePPC64_ok = true; break;
+    default: break;
+  }
+  guarantee(PowerArchitecturePPC64_ok, "PowerArchitecturePPC64 cannot be set to "
+            UINTX_FORMAT " on this machine", PowerArchitecturePPC64);
 
   // Power 8: Configure Data Stream Control Register.
   if (PowerArchitecturePPC64 >= 8) {
@@ -132,9 +140,15 @@
   // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
   _supports_cx8 = true;
 
+  // Used by C1.
+  _supports_atomic_getset4 = true;
+  _supports_atomic_getadd4 = true;
+  _supports_atomic_getset8 = true;
+  _supports_atomic_getadd8 = true;
+
   UseSSE = 0; // Only on x86 and x64
 
-  intx cache_line_size = _measured_cache_line_size;
+  intx cache_line_size = L1_data_cache_line_size();
 
   if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;
 
@@ -261,11 +275,9 @@
     }
   }
 
-  // This machine does not allow unaligned memory accesses
-  if (UseUnalignedAccesses) {
-    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
-      warning("Unaligned memory access is not available on this CPU");
-    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
   }
 }
 
@@ -291,7 +303,7 @@
 }
 
 void VM_Version::print_features() {
-  tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
+  tty->print_cr("Version: %s L1_data_cache_line_size=%d", cpu_features(), L1_data_cache_line_size());
 }
 
 #ifdef COMPILER2
@@ -592,7 +604,7 @@
   int count = 0; // count zeroed bytes
   for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
   guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
-  _measured_cache_line_size = count;
+  _L1_data_cache_line_size = count;
 
   // Execute code. Illegal instructions will be replaced by 0 in the signal handler.
   VM_Version::_is_determine_features_test_running = true;

--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -65,7 +65,6 @@
     all_features_m        = -1
   };
   static int  _features;
-  static int  _measured_cache_line_size;
   static const char* _features_str;
   static bool _is_determine_features_test_running;
 
@@ -99,8 +98,6 @@
 
   static const char* cpu_features() { return _features_str; }
 
-  static int get_cache_line_size()  { return _measured_cache_line_size; }
-
   // Assembler testing
   static void allow_all();
   static void revert();

--- a/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp	Fri Dec 04 14:06:38 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp	Fri Dec 04 16:38:04 2015 +0100
@@ -76,7 +76,8 @@
 
   // We might implicit NULL fault here.
   address npe_addr = __ pc(); // npe = null pointer exception
-  __ load_klass_with_trap_null_check(rcvr_klass, R3);
+  __ null_check(R3, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL);
+  __ load_klass(rcvr_klass, R3);
 
  // Set method (in case of interpreted method), and destination address.
   int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
@@ -111,8 +112,8 @@
 
   // If the vtable entry is null, the method is abstract.
   address ame_addr = __ pc(); // ame = abstract method error
-
-  __ load_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
+  __ null_check(R19_method, in_bytes(Method::from_compiled_offset()), /*implicit only*/NULL);
+  __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
   __ mtctr(R12_scratch2);
   __ bctr();
   masm->flush();
@@ -158,7 +159,8 @@
 
   // We might implicit NULL fault here.
   address npe_addr = __ pc(); // npe = null pointer exception
-  __ load_klass_with_trap_null_check(rcvr_klass, R3_ARG1);
+  __ null_check(R3_ARG1, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL);
+  __ load_klass(rcvr_klass, R3_ARG1);
 
   BLOCK_COMMENT("Load start of itable entries into itable_entry.");
   __ lwz(vtable_len, InstanceKlass::vtable_length_offset() * wordSize, rcvr_klass);
@@ -217,15 +219,7 @@
   address ame_addr = __ pc(); // ame = abstract method error
 
   // Must do an explicit check if implicit checks are disabled.
-  assert(!MacroAssembler::needs_explicit_null_check(in_bytes(Method::from_compiled_offset())), "sanity");
-  if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
-    if (TrapBasedNullChecks) {
-      __ trap_null_check(R19_method);
-    } else {
-      __ cmpdi(CCR0, R19_method, 0);
-      __ beq(CCR0, throw_icce);
-    }
-  }
+  __ null_check(R19_method, in_bytes(Method::from_compiled_offset()), &throw_icce);
   __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
   __ mtctr(R12_scratch2);
   __ bctr();

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os/aix/vm/c1_globals_aix.hpp	Fri Dec 04 16:38:04 2015 +0100
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_AIX_VM_C1_GLOBALS_AIX_HPP
+#define OS_AIX_VM_C1_GLOBALS_AIX_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+//
+// Sets the default values for operating system dependent flags used by the
+// client compiler. (see c1_globals.hpp)
+//
+
+#endif // OS_AIX_VM_C1_GLOBALS_AIX_HPP

author	mdoerr
	Fri, 04 Dec 2015 16:38:04 +0100
changeset 35085	839c8ba29724
parent 35084	5b34a4ae0f58
child 35086	bbf32241d851

hotspot/make/aix/Makefile		file \| annotate \| diff \| comparison \| revisions
hotspot/make/aix/makefiles/fastdebug.make		file \| annotate \| diff \| comparison \| revisions
hotspot/make/aix/makefiles/tiered.make		file \| annotate \| diff \| comparison \| revisions
hotspot/make/linux/Makefile		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/assembler_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/assembler_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_CodeStubs_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_Defs_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_FpuStackSim_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_Runtime1_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c1_globals_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/frame_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/frame_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/ppc.ad		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/register_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/runtime_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/os/aix/vm/c1_globals_aix.hpp		file \| annotate \| diff \| comparison \| revisions