8068053: AARCH64: C1 and C2 compilers
authoraph
Tue, 20 Jan 2015 12:47:43 -0800
changeset 29184 e234025cafb6
parent 29183 0cc8699f7372
child 29185 41cf0610fedf
8068053: AARCH64: C1 and C2 compilers Summary: add src/cpu/aarch64/vm/* C1 and C2 files Reviewed-by: kvn, roland
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/aarch64_ad.m4
hotspot/src/cpu/aarch64/vm/ad_encode.m4
hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_Defs_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_FrameMap_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_FrameMap_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_LinearScan_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
hotspot/src/cpu/aarch64/vm/c1_globals_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c2_init_aarch64.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,12255 @@
+//
+// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2014, Red Hat Inc. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// AArch64 Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def"  name ( register save type, C convention save type,
+//                   ideal register type, encoding );
+// Register Save Types:
+//
+// NS  = No-Save:       The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method, &
+//                      that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call:  The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method,
+//                      but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, but they do not need to be saved at call
+//                      sites.
+//
+// AS  = Always-Save:   The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// We must define the 64 bit int registers in two 32 bit halves, the
+// real lower register and a virtual upper half register. upper halves
+// are used by the register allocator but are not actually supplied as
+// operands to memory ops.
+//
+// follow the C1 compiler in making registers
+//
+//   r0-r7,r10-r26 volatile (caller save)
+//   r27-r32 system (no save, no allocate)
+//   r8-r9 invisible to the allocator (so we can use them as scratch regs)
+//
+// as regards Java usage. we don't use any callee save registers
+// because this makes it difficult to de-optimise a frame (see comment
+// in x86 implementation of Deoptimization::unwind_callee_save_values)
+//
+
+// General Registers
+
+reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
+reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
+reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
+reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
+reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
+reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
+reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
+reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
+reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
+reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
+reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
+reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
+reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
+reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
+reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
+reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
+reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
+reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
+reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
+reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
+reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
+reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
+reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
+reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
+reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
+reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
+reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
+reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
+reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
+reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
+reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
+reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
+reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
+reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
+reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
+reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
+reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
+reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
+reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
+reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
+reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
+reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
+reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
+reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
+reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
+reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
+reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
+reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
+reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
+reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
+reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
+reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
+reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
+reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
+reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
+reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
+reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
+reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
+reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
+reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
+
+// ----------------------------
+// Float/Double Registers
+// ----------------------------
+
+// Double Registers
+
+// The rules of ADL require that double registers be defined in pairs.
+// Each pair must be two 32-bit values, but not necessarily a pair of
+// single float registers. In each pair, ADLC-assigned register numbers
+// must be adjacent, with the lower number even. Finally, when the
+// CPU stores such a register pair to memory, the word associated with
+// the lower ADLC-assigned number must be stored to the lower address.
+
+// AArch64 has 32 floating-point registers. Each can store a vector of
+// single or double precision floating-point values up to 8 * 32
+// floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
+// use the first float or double element of the vector.
+
+// for Java use float registers v0-v15 are always save on call whereas
+// the platform ABI treats v8-v15 as callee save). float registers
+// v16-v31 are SOC as per the platform spec
+
+  reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()         );
+  reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next() );
+  reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()         );
+  reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next() );
+  reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()         );
+  reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next() );
+  reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()         );
+  reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next() );
+  reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()         );
+  reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next() );
+  reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()         );
+  reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next() );
+  reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()         );
+  reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next() );
+  reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()         );
+  reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next() );
+  reg_def V8   ( SOC, SOE, Op_RegF,  8, v8->as_VMReg()         );
+  reg_def V8_H ( SOC, SOE, Op_RegF,  8, v8->as_VMReg()->next() );
+  reg_def V9   ( SOC, SOE, Op_RegF,  9, v9->as_VMReg()         );
+  reg_def V9_H ( SOC, SOE, Op_RegF,  9, v9->as_VMReg()->next() );
+  reg_def V10  ( SOC, SOE, Op_RegF, 10, v10->as_VMReg()        );
+  reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next());
+  reg_def V11  ( SOC, SOE, Op_RegF, 11, v11->as_VMReg()        );
+  reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next());
+  reg_def V12  ( SOC, SOE, Op_RegF, 12, v12->as_VMReg()        );
+  reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next());
+  reg_def V13  ( SOC, SOE, Op_RegF, 13, v13->as_VMReg()        );
+  reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next());
+  reg_def V14  ( SOC, SOE, Op_RegF, 14, v14->as_VMReg()        );
+  reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next());
+  reg_def V15  ( SOC, SOE, Op_RegF, 15, v15->as_VMReg()        );
+  reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next());
+  reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()        );
+  reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next());
+  reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()        );
+  reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next());
+  reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()        );
+  reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next());
+  reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()        );
+  reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next());
+  reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()        );
+  reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next());
+  reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()        );
+  reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next());
+  reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()        );
+  reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next());
+  reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()        );
+  reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next());
+  reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()        );
+  reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next());
+  reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()        );
+  reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next());
+  reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()        );
+  reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next());
+  reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()        );
+  reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next());
+  reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()        );
+  reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next());
+  reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()        );
+  reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next());
+  reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()        );
+  reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next());
+  reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()        );
+  reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next());
+
+// ----------------------------
+// Special Registers
+// ----------------------------
+
+// the AArch64 CSPR status flag register is not directly acessible as
+// instruction operand. the FPSR status flag register is a system
+// register which can be written/read using MSR/MRS but again does not
+// appear as an operand (a code identifying the FSPR occurs as an
+// immediate value in the instruction).
+
+reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
+
+
+// Specify priority of register selection within phases of register
+// allocation.  Highest priority is first.  A useful heuristic is to
+// give registers a low priority when they are required by machine
+// instructions, like EAX and EDX on I486, and choose no-save registers
+// before save-on-call, & save-on-call before save-on-entry.  Registers
+// which participate in fixed calling sequences should come last.
+// Registers which are used as pairs must fall on an even boundary.
+
+alloc_class chunk0(
+    // volatiles
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+
+    // arg registers
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+
+    // non-volatiles
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+
+    // non-allocatable registers
+
+    R27, R27_H, // heapbase
+    R28, R28_H, // thread
+    R29, R29_H, // fp
+    R30, R30_H, // lr
+    R31, R31_H, // sp
+);
+
+alloc_class chunk1(
+
+    // no save
+    V16, V16_H,
+    V17, V17_H,
+    V18, V18_H,
+    V19, V19_H,
+    V20, V20_H,
+    V21, V21_H,
+    V22, V22_H,
+    V23, V23_H,
+    V24, V24_H,
+    V25, V25_H,
+    V26, V26_H,
+    V27, V27_H,
+    V28, V28_H,
+    V29, V29_H,
+    V30, V30_H,
+    V31, V31_H,
+
+    // arg registers
+    V0, V0_H,
+    V1, V1_H,
+    V2, V2_H,
+    V3, V3_H,
+    V4, V4_H,
+    V5, V5_H,
+    V6, V6_H,
+    V7, V7_H,
+
+    // non-volatiles
+    V8, V8_H,
+    V9, V9_H,
+    V10, V10_H,
+    V11, V11_H,
+    V12, V12_H,
+    V13, V13_H,
+    V14, V14_H,
+    V15, V15_H,
+);
+
+alloc_class chunk2(RFLAGS);
+
+//----------Architecture Description Register Classes--------------------------
+// Several register classes are automatically defined based upon information in
+// this architecture description.
+// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
+// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+//
+
+// Class for all 32 bit integer registers -- excludes SP which will
+// never be used as an integer register
+reg_class any_reg32(
+    R0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R10,
+    R11,
+    R12,
+    R13,
+    R14,
+    R15,
+    R16,
+    R17,
+    R18,
+    R19,
+    R20,
+    R21,
+    R22,
+    R23,
+    R24,
+    R25,
+    R26,
+    R27,
+    R28,
+    R29,
+    R30
+);
+
+// Singleton class for R0 int register
+reg_class int_r0_reg(R0);
+
+// Singleton class for R2 int register
+reg_class int_r2_reg(R2);
+
+// Singleton class for R3 int register
+reg_class int_r3_reg(R3);
+
+// Singleton class for R4 int register
+reg_class int_r4_reg(R4);
+
+// Class for all long integer registers (including RSP)
+reg_class any_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+    R27, R27_H,
+    R28, R28_H,
+    R29, R29_H,
+    R30, R30_H,
+    R31, R31_H
+);
+
+// Class for all non-special integer registers
+reg_class no_special_reg32(
+    R0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R10,
+    R11,
+    R12,                        // rmethod
+    R13,
+    R14,
+    R15,
+    R16,
+    R17,
+    R18,
+    R19,
+    R20,
+    R21,
+    R22,
+    R23,
+    R24,
+    R25,
+    R26
+ /* R27, */                     // heapbase
+ /* R28, */                     // thread
+ /* R29, */                     // fp
+ /* R30, */                     // lr
+ /* R31 */                      // sp
+);
+
+// Class for all non-special long integer registers
+reg_class no_special_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,                 // rmethod
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+ /* R27, R27_H, */              // heapbase
+ /* R28, R28_H, */              // thread
+ /* R29, R29_H, */              // fp
+ /* R30, R30_H, */              // lr
+ /* R31, R31_H */               // sp
+);
+
+// Class for 64 bit register r0
+reg_class r0_reg(
+    R0, R0_H
+);
+
+// Class for 64 bit register r1
+reg_class r1_reg(
+    R1, R1_H
+);
+
+// Class for 64 bit register r2
+reg_class r2_reg(
+    R2, R2_H
+);
+
+// Class for 64 bit register r3
+reg_class r3_reg(
+    R3, R3_H
+);
+
+// Class for 64 bit register r4
+reg_class r4_reg(
+    R4, R4_H
+);
+
+// Class for 64 bit register r5
+reg_class r5_reg(
+    R5, R5_H
+);
+
+// Class for 64 bit register r10
+reg_class r10_reg(
+    R10, R10_H
+);
+
+// Class for 64 bit register r11
+reg_class r11_reg(
+    R11, R11_H
+);
+
+// Class for method register
+reg_class method_reg(
+    R12, R12_H
+);
+
+// Class for heapbase register
+reg_class heapbase_reg(
+    R27, R27_H
+);
+
+// Class for thread register
+reg_class thread_reg(
+    R28, R28_H
+);
+
+// Class for frame pointer register
+reg_class fp_reg(
+    R29, R29_H
+);
+
+// Class for link register
+reg_class lr_reg(
+    R30, R30_H
+);
+
+// Class for long sp register
+reg_class sp_reg(
+  R31, R31_H
+);
+
+// Class for all pointer registers
+reg_class ptr_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+    R27, R27_H,
+    R28, R28_H,
+    R29, R29_H,
+    R30, R30_H,
+    R31, R31_H
+);
+
+// Class for all non_special pointer registers
+reg_class no_special_ptr_reg(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+ /* R27, R27_H, */              // heapbase
+ /* R28, R28_H, */              // thread
+ /* R29, R29_H, */              // fp
+ /* R30, R30_H, */              // lr
+ /* R31, R31_H */               // sp
+);
+
+// Class for all float registers
+reg_class float_reg(
+    V0,
+    V1,
+    V2,
+    V3,
+    V4,
+    V5,
+    V6,
+    V7,
+    V8,
+    V9,
+    V10,
+    V11,
+    V12,
+    V13,
+    V14,
+    V15,
+    V16,
+    V17,
+    V18,
+    V19,
+    V20,
+    V21,
+    V22,
+    V23,
+    V24,
+    V25,
+    V26,
+    V27,
+    V28,
+    V29,
+    V30,
+    V31
+);
+
+// Double precision float registers have virtual `high halves' that
+// are needed by the allocator.
+// Class for all double registers
+reg_class double_reg(
+    V0, V0_H,
+    V1, V1_H,
+    V2, V2_H,
+    V3, V3_H,
+    V4, V4_H,
+    V5, V5_H,
+    V6, V6_H,
+    V7, V7_H,
+    V8, V8_H,
+    V9, V9_H,
+    V10, V10_H,
+    V11, V11_H,
+    V12, V12_H,
+    V13, V13_H,
+    V14, V14_H,
+    V15, V15_H,
+    V16, V16_H,
+    V17, V17_H,
+    V18, V18_H,
+    V19, V19_H,
+    V20, V20_H,
+    V21, V21_H,
+    V22, V22_H,
+    V23, V23_H,
+    V24, V24_H,
+    V25, V25_H,
+    V26, V26_H,
+    V27, V27_H,
+    V28, V28_H,
+    V29, V29_H,
+    V30, V30_H,
+    V31, V31_H
+);
+
+// Class for 128 bit register v0
+reg_class v0_reg(
+    V0, V0_H
+);
+
+// Class for 128 bit register v1
+reg_class v1_reg(
+    V1, V1_H
+);
+
+// Class for 128 bit register v2
+reg_class v2_reg(
+    V2, V2_H
+);
+
+// Class for 128 bit register v3
+reg_class v3_reg(
+    V3, V3_H
+);
+
+// Singleton class for condition codes
+reg_class int_flags(RFLAGS);
+
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+//        int_def  <name>         ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+
+// we follow the ppc-aix port in using a simple cost model which ranks
+// register operations as cheap, memory ops as more expensive and
+// branches as most expensive. the first two have a low as well as a
+// normal cost. huge cost appears to be a way of saying don't do
+// something
+
+definitions %{
+  // The default cost (of a register move instruction).
+  int_def INSN_COST            (    100,     100);
+  int_def BRANCH_COST          (    200,     2 * INSN_COST);
+  int_def CALL_COST            (    200,     2 * INSN_COST);
+  int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
+%}
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+
+class CallStubImpl {
+
+  //--------------------------------------------------------------
+  //---<  Used for optimization in Compile::shorten_branches  >---
+  //--------------------------------------------------------------
+
+ public:
+  // Size of call trampoline stub.
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+};
+
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    return MacroAssembler::far_branch_size();
+  }
+
+  static uint size_deopt_handler() {
+    // count one adr and one far branch instruction
+    return 4 * NativeInstruction::instruction_size;
+  }
+};
+
+  bool preceded_by_ordered_load(const Node *barrier);
+
+  // Use barrier instructions rather than load acquire / store
+  // release.
+  const bool UseBarriersForVolatile = true;
+%}
+
+source %{
+
+  // AArch64 has load acquire and store release instructions which we
+  // use for ordered memory accesses, e.g. for volatiles.  The ideal
+  // graph generator also inserts memory barriers around volatile
+  // accesses, and we don't want to generate both barriers and acq/rel
+  // instructions.  So, when we emit a MemBarAcquire we look back in
+  // the ideal graph for an ordered load and only emit the barrier if
+  // we don't find one.
+
+bool preceded_by_ordered_load(const Node *barrier) {
+  Node *x = barrier->lookup(TypeFunc::Parms);
+
+  if (! x)
+    return false;
+
+  if (x->is_DecodeNarrowPtr())
+    x = x->in(1);
+
+  if (x->is_Load())
+    return ! x->as_Load()->is_unordered();
+
+  return false;
+}
+
+#define __ _masm.
+
+// advance declarations for helper functions to convert register
+// indices to register objects
+
+// the ad file has to provide implementations of certain methods
+// expected by the generic code
+//
+// REQUIRED FUNCTIONALITY
+
+//=============================================================================
+
+// !!!!! Special hack to get all types of calls to specify the byte offset
+//       from the start of the call to the point where the return address
+//       will point.
+
+int MachCallStaticJavaNode::ret_addr_offset()
+{
+  // call should be a simple bl
+  // unless this is a method handle invoke in which case it is
+  // mov(rfp, sp), bl, mov(sp, rfp)
+  int off = 4;
+  if (_method_handle_invoke) {
+    off += 4;
+  }
+  return off;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset()
+{
+  return 16; // movz, movk, movk, bl
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+  // for generated stubs the call will be
+  //   far_call(addr)
+  // for real runtime callouts it will be six instructions
+  // see aarch64_enc_java_to_runtime
+  //   adr(rscratch2, retaddr)
+  //   lea(rscratch1, RuntimeAddress(addr)
+  //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
+  //   blrt rscratch1
+  CodeBlob *cb = CodeCache::find_blob(_entry_point);
+  if (cb) {
+    return MacroAssembler::far_branch_size();
+  } else {
+    return 6 * NativeInstruction::instruction_size;
+  }
+}
+
+// Indicate if the safepoint node needs the polling page as an input
+
+// the shared code plants the oop data at the start of the generated
+// code for the safepoint node and that needs ot be at the load
+// instruction itself. so we cannot plant a mov of the safepoint poll
+// address followed by a load. setting this to true means the mov is
+// scheduled as a prior instruction. that's better for scheduling
+// anyway.
+
+bool SafePointNode::needs_polling_address_input()
+{
+  return true;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  st->print("BREAKPOINT");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  __ brk(0);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
+    st->print("nop \t# %d bytes pad for loops and calls", _count);
+  }
+#endif
+
+  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
+    MacroAssembler _masm(&cbuf);
+    for (int i = 0; i < _count; i++) {
+      __ nop();
+    }
+  }
+
+  uint MachNopNode::size(PhaseRegAlloc*) const {
+    return _count * NativeInstruction::instruction_size;
+  }
+
+//=============================================================================
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+  ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  // Empty encoding
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+  return 0;
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+  st->print("-- \t// MachConstantBaseNode (empty encoding)");
+}
+#endif
+
+#ifndef PRODUCT
+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  Compile* C = ra_->C;
+
+  int framesize = C->frame_slots() << LogBytesPerInt;
+
+  if (C->need_stack_bang(framesize))
+    st->print("# stack bang size=%d\n\t", framesize);
+
+  if (framesize == 0) {
+    // Is this even possible?
+    st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize));
+  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+    st->print("sub  sp, sp, #%d\n\t", framesize);
+    st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
+  } else {
+    st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
+    st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
+    st->print("sub  sp, sp, rscratch1");
+  }
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+
+  // n.b. frame size includes space for return pc and rfp
+  const long framesize = C->frame_size_in_bytes();
+  assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
+
+  // insert a nop at the start of the prolog so we can patch in a
+  // branch if we need to invalidate the method later
+  __ nop();
+
+  int bangsize = C->bang_size_in_bytes();
+  if (C->need_stack_bang(bangsize) && UseStackBanging)
+    __ generate_stack_overflow_check(bangsize);
+
+  __ build_frame(framesize);
+
+  if (NotifySimulator) {
+    __ notify(Assembler::method_entry);
+  }
+
+  if (VerifyStackAtCalls) {
+    Unimplemented();
+  }
+
+  C->set_frame_complete(cbuf.insts_size());
+
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
+{
+  return MachNode::size(ra_); // too many variables; just compute it
+                              // the hard way
+}
+
+int MachPrologNode::reloc() const
+{
+  return 0;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  Compile* C = ra_->C;
+  int framesize = C->frame_slots() << LogBytesPerInt;
+
+  st->print("# pop frame %d\n\t",framesize);
+
+  if (framesize == 0) {
+    st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
+  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+    st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
+    st->print("add  sp, sp, #%d\n\t", framesize);
+  } else {
+    st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
+    st->print("add  sp, sp, rscratch1\n\t");
+    st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
+  }
+
+  if (do_polling() && C->is_method_compilation()) {
+    st->print("# touch polling page\n\t");
+    st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
+    st->print("ldr zr, [rscratch1]");
+  }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+  int framesize = C->frame_slots() << LogBytesPerInt;
+
+  __ remove_frame(framesize);
+
+  if (NotifySimulator) {
+    __ notify(Assembler::method_reentry);
+  }
+
+  if (do_polling() && C->is_method_compilation()) {
+    __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
+  }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+  // Variable size. Determine dynamically.
+  return MachNode::size(ra_);
+}
+
+int MachEpilogNode::reloc() const {
+  // Return number of relocatable values contained in this instruction.
+  return 1; // 1 for polling page.
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+// This method seems to be obsolete. It is declared in machnode.hpp
+// and defined in all *.ad files, but it is never called. Should we
+// get rid of it?
+int MachEpilogNode::safepoint_offset() const {
+  assert(do_polling(), "no return for this epilog node");
+  return 4;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float or
+// rc_stack.
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+
+static enum RC rc_class(OptoReg::Name reg) {
+
+  if (reg == OptoReg::Bad) {
+    return rc_bad;
+  }
+
+  // we have 30 int registers * 2 halves
+  // (rscratch1 and rscratch2 are omitted)
+
+  if (reg < 60) {
+    return rc_int;
+  }
+
+  // we have 32 float register * 2 halves
+  if (reg < 60 + 64) {
+    return rc_float;
+  }
+
+  // Between float regs & stack is the flags regs.
+  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
+
+  return rc_stack;
+}
+
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
+  Compile* C = ra_->C;
+
+  // Get registers to move.
+  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
+  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
+  OptoReg::Name dst_hi = ra_->get_reg_second(this);
+  OptoReg::Name dst_lo = ra_->get_reg_first(this);
+
+  enum RC src_hi_rc = rc_class(src_hi);
+  enum RC src_lo_rc = rc_class(src_lo);
+  enum RC dst_hi_rc = rc_class(dst_hi);
+  enum RC dst_lo_rc = rc_class(dst_lo);
+
+  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
+
+  if (src_hi != OptoReg::Bad) {
+    assert((src_lo&1)==0 && src_lo+1==src_hi &&
+           (dst_lo&1)==0 && dst_lo+1==dst_hi,
+           "expected aligned-adjacent pairs");
+  }
+
+  if (src_lo == dst_lo && src_hi == dst_hi) {
+    return 0;            // Self copy, no move.
+  }
+
+  switch (src_lo_rc) {
+  case rc_int:
+    if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ mov(as_Register(Matcher::_regEncode[dst_lo]),
+                 as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("mov  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ movw(as_Register(Matcher::_regEncode[dst_lo]),
+                  as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("movw  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovd  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_Register(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovs  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else {                    // gpr --> stack spill
+      assert(dst_lo_rc == rc_stack, "spill to bad register class");
+      int dst_offset = ra_->reg2offset(dst_lo);
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ str(as_Register(Matcher::_regEncode[src_lo]),
+                 Address(sp, dst_offset));
+        } else if (st) {
+          st->print("str  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+                    dst_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ strw(as_Register(Matcher::_regEncode[src_lo]),
+                 Address(sp, dst_offset));
+        } else if (st) {
+          st->print("strw  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+                    dst_offset);
+        }
+      }
+    }
+    return 4;
+  case rc_float:
+    if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovd  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovs  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovd  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else if (st) {
+          st->print("fmovs  %s, %s\t# shuffle",
+                    Matcher::regName[dst_lo],
+                    Matcher::regName[src_lo]);
+        }
+      }
+    } else {                    // fpr --> stack spill
+      assert(dst_lo_rc == rc_stack, "spill to bad register class");
+      int dst_offset = ra_->reg2offset(dst_lo);
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]),
+                 Address(sp, dst_offset));
+        } else if (st) {
+          st->print("strd  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+                    dst_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]),
+                 Address(sp, dst_offset));
+        } else if (st) {
+          st->print("strs  %s, [sp, #%d]\t# spill",
+                    Matcher::regName[src_lo],
+                    dst_offset);
+        }
+      }
+    }
+    return 4;
+  case rc_stack:
+    int src_offset = ra_->reg2offset(src_lo);
+    if (dst_lo_rc == rc_int) {  // stack --> gpr load
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldr(as_Register(Matcher::_regEncode[dst_lo]),
+                 Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldr  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+                    src_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrw(as_Register(Matcher::_regEncode[dst_lo]),
+                  Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldr  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+                   src_offset);
+        }
+      }
+      return 4;
+    } else if (dst_lo_rc == rc_float) { // stack --> fpr load
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                 Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldrd  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+                    src_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                  Address(sp, src_offset));
+        } else if (st) {
+          st->print("ldrs  %s, [sp, %d]\t# restore",
+                    Matcher::regName[dst_lo],
+                   src_offset);
+        }
+      }
+      return 4;
+    } else {                    // stack --> stack copy
+      assert(dst_lo_rc == rc_stack, "spill to bad register class");
+      int dst_offset = ra_->reg2offset(dst_lo);
+      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
+          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
+          // 64 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldr(rscratch1, Address(sp, src_offset));
+          __ str(rscratch1, Address(sp, dst_offset));
+        } else if (st) {
+          st->print("ldr  rscratch1, [sp, %d]\t# mem-mem spill",
+                    src_offset);
+          st->print("\n\t");
+          st->print("str  rscratch1, [sp, %d]",
+                    dst_offset);
+        }
+      } else {
+        // 32 bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldrw(rscratch1, Address(sp, src_offset));
+          __ strw(rscratch1, Address(sp, dst_offset));
+        } else if (st) {
+          st->print("ldrw  rscratch1, [sp, %d]\t# mem-mem spill",
+                    src_offset);
+          st->print("\n\t");
+          st->print("strw  rscratch1, [sp, %d]",
+                    dst_offset);
+        }
+      }
+      return 8;
+    }
+  }
+
+  assert(false," bad rc_class for spill ");
+  Unimplemented();
+  return 0;
+
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  if (!ra_)
+    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
+  else
+    implementation(NULL, ra_, false, st);
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  implementation(&cbuf, ra_, false, NULL);
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return implementation(NULL, ra_, true, NULL);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_reg_first(this);
+  st->print("add %s, rsp, #%d]\t# box lock",
+            Matcher::regName[reg], offset);
+}
+#endif
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg    = ra_->get_encode(this);
+
+  if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
+    __ add(as_Register(reg), sp, offset);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
+  return 4;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+{
+  st->print_cr("# MachUEPNode");
+  if (UseCompressedClassPointers) {
+    st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+    if (Universe::narrow_klass_shift() != 0) {
+      st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
+    }
+  } else {
+   st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+  }
+  st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
+  st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+{
+  // This is the unverified entry point.
+  MacroAssembler _masm(&cbuf);
+
+  __ cmp_klass(j_rarg0, rscratch2, rscratch1);
+  Label skip;
+  // TODO
+  // can we avoid this skip and still use a reloc?
+  __ br(Assembler::EQ, skip);
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  __ bind(skip);
+}
+
+uint MachUEPNode::size(PhaseRegAlloc* ra_) const
+{
+  return MachNode::size(ra_);
+}
+
+// REQUIRED EMIT CODE
+
+//=============================================================================
+
+// Emit exception handler code.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
+{
+  // mov rscratch1 #exception_blob_entry_point
+  // br rscratch1
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base =
+  __ start_a_stub(size_exception_handler());
+  if (base == NULL)  return 0;  // CodeBuffer::expand failed
+  int offset = __ offset();
+  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
+{
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base =
+  __ start_a_stub(size_deopt_handler());
+  if (base == NULL)  return 0;  // CodeBuffer::expand failed
+  int offset = __ offset();
+
+  __ adr(lr, __ pc());
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+// REQUIRED MATCHER CODE
+
+//=============================================================================
+
+const bool Matcher::match_rule_supported(int opcode) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_StrEquals and other intrinsics
+  if (!has_match_rule(opcode)) {
+    return false;
+  }
+
+  return true;  // Per default match rules are supported.
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum)
+{
+  Unimplemented();
+  return 0;
+}
+
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
+{
+  Unimplemented();
+  return false;
+}
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+  // Probably always true, even if a temp register is required.
+  return true;
+}
+
+// true just means we have fast l2f conversion
+const bool Matcher::convL2FSupported(void) {
+  return true;
+}
+
+// Vector width in bytes.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  // TODO fixme
+  return 0;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+const int Matcher::min_vector_size(const BasicType bt) {
+  int max_size = max_vector_size(bt);
+  // Min size which can be loaded into vector is 4 bytes.
+  int size = (type2aelembytes(bt) == 1) ? 4 : 2;
+  return MIN2(size,max_size);
+}
+
+// Vector ideal reg.
+const int Matcher::vector_ideal_reg(int len) {
+  // TODO fixme
+  return Op_RegD;
+}
+
+// Only lowest bits of xmm reg are used for vector shift count.
+const int Matcher::vector_shift_count_ideal_reg(int size) {
+  // TODO fixme
+  return Op_RegL;
+}
+
+// AES support not yet implemented
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
+// x86 supports misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  // TODO fixme
+  // return !AlignVector; // can be changed by flag
+  return false;
+}
+
+// false => size gets scaled to BytesPerLong, ok.
+const bool Matcher::init_array_count_is_in_bytes = false;
+
+// Threshold size for cleararray.
+const int Matcher::init_array_short_size = 18 * BytesPerLong;
+
+// Use conditional move (CMOVL)
+const int Matcher::long_cmove_cost() {
+  // long cmoves are no more expensive than int cmoves
+  return 0;
+}
+
+const int Matcher::float_cmove_cost() {
+  // float cmoves are no more expensive than int cmoves
+  return 0;
+}
+
+// Does the CPU require late expand (see block.cpp for description of late expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?  True for Intel but false for most RISCs
+const bool Matcher::clone_shift_expressions = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+const bool Matcher::need_masked_shift_count = false;
+
+// This affects two different things:
+//  - how Decode nodes are matched
+//  - how ImplicitNullCheck opportunities are recognized
+// If true, the matcher will try to remove all Decodes and match them
+// (as operands) into nodes. NullChecks are not prepared to deal with
+// Decodes by final_graph_reshaping().
+// If false, final_graph_reshaping() forces the decode behind the Cmp
+// for a NullCheck. The matcher matches the Decode node into a register.
+// Implicit_null_check optimization moves the Decode along with the
+// memory operation back up before the NullCheck.
+bool Matcher::narrow_oop_use_complex_address() {
+  return Universe::narrow_oop_shift() == 0;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+// TODO
+// decide whether we need to set this to true
+  return false;
+}
+
+// Is it better to copy float constants, or load them directly from
+// memory?  Intel can load a float constant from a direct address,
+// requiring no extra registers.  Most RISCs will have to materialize
+// an address into a register first, so they would do better to copy
+// the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+
+// If CPU can load and store mis-aligned doubles directly then no
+// fixup is needed.  Else we split the double into 2 integer pieces
+// and move it piece-by-piece.  Only happens when passing doubles into
+// C code as the Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = true;
+
+// No-op on amd64
+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
+  Unimplemented();
+}
+
+// Advertise here if the CPU requires explicit rounding operations to
+// implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+// Are floats converted to double when stored to stack during
+// deoptimization?
+bool Matcher::float_in_double() { return true; }
+
+// Do ints take an entire long register or just half?
+// The relevant question is how the int is callee-saved:
+// the whole long is written but de-opt'ing will have to extract
+// the relevant 32 bits.
+const bool Matcher::int_in_long = true;
+
+// Return whether or not this register is ever used as an argument.
+// This function is used on startup to build the trampoline stubs in
+// generateOptoStub.  Registers not mentioned will be killed by the VM
+// call in the trampoline, and arguments in those registers not be
+// available to the callee.
+bool Matcher::can_be_java_arg(int reg)
+{
+  return
+    reg ==  R0_num || reg == R0_H_num ||
+    reg ==  R1_num || reg == R1_H_num ||
+    reg ==  R2_num || reg == R2_H_num ||
+    reg ==  R3_num || reg == R3_H_num ||
+    reg ==  R4_num || reg == R4_H_num ||
+    reg ==  R5_num || reg == R5_H_num ||
+    reg ==  R6_num || reg == R6_H_num ||
+    reg ==  R7_num || reg == R7_H_num ||
+    reg ==  V0_num || reg == V0_H_num ||
+    reg ==  V1_num || reg == V1_H_num ||
+    reg ==  V2_num || reg == V2_H_num ||
+    reg ==  V3_num || reg == V3_H_num ||
+    reg ==  V4_num || reg == V4_H_num ||
+    reg ==  V5_num || reg == V5_H_num ||
+    reg ==  V6_num || reg == V6_H_num ||
+    reg ==  V7_num || reg == V7_H_num;
+}
+
+bool Matcher::is_spillable_arg(int reg)
+{
+  return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
+  return false;
+}
+
+RegMask Matcher::divI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODI projection of divmodI.
+RegMask Matcher::modI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for DIVL projection of divmodL.
+RegMask Matcher::divL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODL projection of divmodL.
+RegMask Matcher::modL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+  return RegMask();
+}
+
+// helper for encoding java_to_runtime calls on sim
+//
+// this is needed to compute the extra arguments required when
+// planting a call to the simulator blrt instruction. the TypeFunc
+// can be queried to identify the counts for integral, and floating
+// arguments and the return type
+
+static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
+{
+  int gps = 0;
+  int fps = 0;
+  const TypeTuple *domain = tf->domain();
+  int max = domain->cnt();
+  for (int i = TypeFunc::Parms; i < max; i++) {
+    const Type *t = domain->field_at(i);
+    switch(t->basic_type()) {
+    case T_FLOAT:
+    case T_DOUBLE:
+      fps++;
+    default:
+      gps++;
+    }
+  }
+  gpcnt = gps;
+  fpcnt = fps;
+  BasicType rt = tf->return_type();
+  switch (rt) {
+  case T_VOID:
+    rtype = MacroAssembler::ret_type_void;
+    break;
+  default:
+    rtype = MacroAssembler::ret_type_integral;
+    break;
+  case T_FLOAT:
+    rtype = MacroAssembler::ret_type_float;
+    break;
+  case T_DOUBLE:
+    rtype = MacroAssembler::ret_type_double;
+    break;
+  }
+}
+
+#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
+  MacroAssembler _masm(&cbuf);                                          \
+  {                                                                     \
+    guarantee(INDEX == -1, "mode not permitted for volatile");          \
+    guarantee(DISP == 0, "mode not permitted for volatile");            \
+    guarantee(SCALE == 0, "mode not permitted for volatile");           \
+    __ INSN(REG, as_Register(BASE));                                    \
+  }
+
+typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
+typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
+
+  // Used for all non-volatile memory accesses.  The use of
+  // $mem->opcode() to discover whether this pattern uses sign-extended
+  // offsets is something of a kludge.
+  static void loadStore(MacroAssembler masm, mem_insn insn,
+                         Register reg, int opcode,
+                         Register base, int index, int size, int disp)
+  {
+    Address::extend scale;
+
+    // Hooboy, this is fugly.  We need a way to communicate to the
+    // encoder that the index needs to be sign extended, so we have to
+    // enumerate all the cases.
+    switch (opcode) {
+    case INDINDEXSCALEDOFFSETI2L:
+    case INDINDEXSCALEDI2L:
+    case INDINDEXSCALEDOFFSETI2LN:
+    case INDINDEXSCALEDI2LN:
+      scale = Address::sxtw(size);
+      break;
+    default:
+      scale = Address::lsl(size);
+    }
+
+    if (index == -1) {
+      (masm.*insn)(reg, Address(base, disp));
+    } else {
+      if (disp == 0) {
+        (masm.*insn)(reg, Address(base, as_Register(index), scale));
+      } else {
+        masm.lea(rscratch1, Address(base, disp));
+        (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
+      }
+    }
+  }
+
+  static void loadStore(MacroAssembler masm, mem_float_insn insn,
+                         FloatRegister reg, int opcode,
+                         Register base, int index, int size, int disp)
+  {
+    Address::extend scale;
+
+    switch (opcode) {
+    case INDINDEXSCALEDOFFSETI2L:
+    case INDINDEXSCALEDI2L:
+    case INDINDEXSCALEDOFFSETI2LN:
+    case INDINDEXSCALEDI2LN:
+      scale = Address::sxtw(size);
+      break;
+    default:
+      scale = Address::lsl(size);
+    }
+
+     if (index == -1) {
+      (masm.*insn)(reg, Address(base, disp));
+    } else {
+      if (disp == 0) {
+        (masm.*insn)(reg, Address(base, as_Register(index), scale));
+      } else {
+        masm.lea(rscratch1, Address(base, disp));
+        (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
+      }
+    }
+  }
+
+%}
+
+
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams.  Encoding classes are parameterized macros
+// used by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction.  Operands specify their base encoding
+// interface with the interface keyword.  There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER.  REG_INTER causes an operand to generate a function
+// which returns its register number when queried.  CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried.  MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.
+//
+// Instructions specify two basic values for encoding.  Again, a
+// function is available to check if the constant displacement is an
+// oop. They use the ins_encode keyword to specify their encoding
+// classes (which must be a sequence of enc_class names, and their
+// parameters, specified in the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode.  Only the opcode sections which a particular
+// instruction needs for encoding need to be specified.
+encode %{
+  // Build emit functions for each basic byte or larger field in the
+  // intel encoding scheme (opcode, rm, sib, immediate), and call them
+  // from C++ code in the enc_class source block.  Emit functions will
+  // live in the main source block for now.  In future, we can
+  // generalize this by adding a syntax that specifies the sizes of
+  // fields in an order, so that the adlc can build the emit functions
+  // automagically
+
+  // catch all for unimplemented encodings
+  enc_class enc_unimplemented %{
+    MacroAssembler _masm(&cbuf);
+    __ unimplemented("C2 catch all");
+  %}
+
+  // BEGIN Non-volatile memory access
+
+  enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
+    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
+    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strb(iRegI src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strb0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strh(iRegI src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strh0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strw(iRegI src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strw0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_str(iRegL src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    // we sometimes get asked to store the stack pointer into the
+    // current thread -- we cannot do that directly on AArch64
+    if (src_reg == r31_sp) {
+      MacroAssembler _masm(&cbuf);
+      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
+      __ mov(rscratch2, sp);
+      src_reg = rscratch2;
+    }
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_str0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strs(vRegF src, memory mem) %{
+    FloatRegister src_reg = as_FloatRegister($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  enc_class aarch64_enc_strd(vRegD src, memory mem) %{
+    FloatRegister src_reg = as_FloatRegister($src$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+  %}
+
+  // END Non-volatile memory access
+
+  // volatile loads and stores
+
+  enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
+    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+                 rscratch1, stlrb);
+  %}
+
+  enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
+    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+                 rscratch1, stlrh);
+  %}
+
+  enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
+    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+                 rscratch1, stlrw);
+  %}
+
+
+  enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarb);
+    __ sxtbw(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarb);
+    __ sxtb(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarb);
+  %}
+
+  enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarb);
+  %}
+
+  enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarh);
+    __ sxthw(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
+    Register dst_reg = as_Register($dst$$reg);
+    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarh);
+    __ sxth(dst_reg, dst_reg);
+  %}
+
+  enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarh);
+  %}
+
+  enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarh);
+  %}
+
+  enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarw);
+  %}
+
+  enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarw);
+  %}
+
+  enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
+    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldar);
+  %}
+
+  enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
+    MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldarw);
+    __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
+  %}
+
+  enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
+    MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+             rscratch1, ldar);
+    __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
+  %}
+
+  enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
+    Register src_reg = as_Register($src$$reg);
+    // we sometimes get asked to store the stack pointer into the
+    // current thread -- we cannot do that directly on AArch64
+    if (src_reg == r31_sp) {
+        MacroAssembler _masm(&cbuf);
+      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
+      __ mov(rscratch2, sp);
+      src_reg = rscratch2;
+    }
+    MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+                 rscratch1, stlr);
+  %}
+
+  enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
+    {
+      MacroAssembler _masm(&cbuf);
+      FloatRegister src_reg = as_FloatRegister($src$$reg);
+      __ fmovs(rscratch2, src_reg);
+    }
+    MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+                 rscratch1, stlrw);
+  %}
+
+  enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
+    {
+      MacroAssembler _masm(&cbuf);
+      FloatRegister src_reg = as_FloatRegister($src$$reg);
+      __ fmovd(rscratch2, src_reg);
+    }
+    MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
+                 rscratch1, stlr);
+  %}
+
+  // synchronized read/update encodings
+
+  enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {
+        __ lea(rscratch1, Address(base, disp));
+        __ ldaxr(dst_reg, rscratch1);
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        __ ldaxr(dst_reg, base);
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
+        __ ldaxr(dst_reg, rscratch1);
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+        __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
+        __ ldaxr(dst_reg, rscratch1);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register src_reg = as_Register($src$$reg);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {
+        __ lea(rscratch2, Address(base, disp));
+        __ stlxr(rscratch1, src_reg, rscratch2);
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        __ stlxr(rscratch1, src_reg, base);
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        __ stlxr(rscratch1, src_reg, rscratch2);
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+        __ stlxr(rscratch1, src_reg, rscratch2);
+      }
+    }
+    __ cmpw(rscratch1, zr);
+  %}
+
+  enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    Register old_reg = as_Register($oldval$$reg);
+    Register new_reg = as_Register($newval$$reg);
+    Register base = as_Register($mem$$base);
+    Register addr_reg;
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {
+        __ lea(rscratch2, Address(base, disp));
+        addr_reg = rscratch2;
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        addr_reg = base;
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      }
+    }
+    Label retry_load, done;
+    __ bind(retry_load);
+    __ ldxr(rscratch1, addr_reg);
+    __ cmp(rscratch1, old_reg);
+    __ br(Assembler::NE, done);
+    __ stlxr(rscratch1, new_reg, addr_reg);
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(done);
+  %}
+
+  enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    Register old_reg = as_Register($oldval$$reg);
+    Register new_reg = as_Register($newval$$reg);
+    Register base = as_Register($mem$$base);
+    Register addr_reg;
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {
+        __ lea(rscratch2, Address(base, disp));
+        addr_reg = rscratch2;
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        addr_reg = base;
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      }
+    }
+    Label retry_load, done;
+    __ bind(retry_load);
+    __ ldxrw(rscratch1, addr_reg);
+    __ cmpw(rscratch1, old_reg);
+    __ br(Assembler::NE, done);
+    __ stlxrw(rscratch1, new_reg, addr_reg);
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(done);
+  %}
+
+  // auxiliary used for CompareAndSwapX to set result register
+  enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
+    MacroAssembler _masm(&cbuf);
+    Register res_reg = as_Register($res$$reg);
+    __ cset(res_reg, Assembler::EQ);
+  %}
+
+  // prefetch encodings
+
+  enc_class aarch64_enc_prefetchr(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+      __ prfm(Address(base, disp), PLDL1KEEP);
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+        __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_prefetchw(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+      __ prfm(Address(base, disp), PSTL1KEEP);
+      __ nop();
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_prefetchnta(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    Register base = as_Register($mem$$base);
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+      __ prfm(Address(base, disp), PSTL1STRM);
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
+        __ nop();
+      } else {
+        __ lea(rscratch1, Address(base, disp));
+	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
+    MacroAssembler _masm(&cbuf);
+    Register cnt_reg = as_Register($cnt$$reg);
+    Register base_reg = as_Register($base$$reg);
+    // base is word aligned
+    // cnt is count of words
+
+    Label loop;
+    Label entry;
+
+//  Algorithm:
+//
+//    scratch1 = cnt & 7;
+//    cnt -= scratch1;
+//    p += scratch1;
+//    switch (scratch1) {
+//      do {
+//        cnt -= 8;
+//          p[-8] = 0;
+//        case 7:
+//          p[-7] = 0;
+//        case 6:
+//          p[-6] = 0;
+//          // ...
+//        case 1:
+//          p[-1] = 0;
+//        case 0:
+//          p += 8;
+//      } while (cnt);
+//    }
+
+    const int unroll = 8; // Number of str(zr) instructions we'll unroll
+
+    __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
+    __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
+    // base_reg always points to the end of the region we're about to zero
+    __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
+    __ adr(rscratch2, entry);
+    __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
+    __ br(rscratch2);
+    __ bind(loop);
+    __ sub(cnt_reg, cnt_reg, unroll);
+    for (int i = -unroll; i < 0; i++)
+      __ str(zr, Address(base_reg, i * wordSize));
+    __ bind(entry);
+    __ add(base_reg, base_reg, unroll * wordSize);
+    __ cbnz(cnt_reg, loop);
+  %}
+
+  /// mov envcodings
+
+  enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
+    MacroAssembler _masm(&cbuf);
+    u_int32_t con = (u_int32_t)$src$$constant;
+    Register dst_reg = as_Register($dst$$reg);
+    if (con == 0) {
+      __ movw(dst_reg, zr);
+    } else {
+      __ movw(dst_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    u_int64_t con = (u_int64_t)$src$$constant;
+    if (con == 0) {
+      __ mov(dst_reg, zr);
+    } else {
+      __ mov(dst_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL || con == (address)1) {
+      ShouldNotReachHere();
+    } else {
+      relocInfo::relocType rtype = $src->constant_reloc();
+      if (rtype == relocInfo::oop_type) {
+        __ movoop(dst_reg, (jobject)con, /*immediate*/true);
+      } else if (rtype == relocInfo::metadata_type) {
+        __ mov_metadata(dst_reg, (Metadata*)con);
+      } else {
+        assert(rtype == relocInfo::none, "unexpected reloc type");
+        if (con < (address)(uintptr_t)os::vm_page_size()) {
+          __ mov(dst_reg, con);
+        } else {
+          unsigned long offset;
+          __ adrp(dst_reg, con, offset);
+          __ add(dst_reg, dst_reg, offset);
+        }
+      }
+    }
+  %}
+
+  enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mov(dst_reg, zr);
+  %}
+
+  enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mov(dst_reg, (u_int64_t)1);
+  %}
+
+  enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
+    MacroAssembler _masm(&cbuf);
+    address page = (address)$src$$constant;
+    Register dst_reg = as_Register($dst$$reg);
+    unsigned long off;
+    __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
+    assert(off == 0, "assumed offset == 0");
+  %}
+
+  enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
+    MacroAssembler _masm(&cbuf);
+    address page = (address)$src$$constant;
+    Register dst_reg = as_Register($dst$$reg);
+    unsigned long off;
+    __ adrp(dst_reg, ExternalAddress(page), off);
+    assert(off == 0, "assumed offset == 0");
+  %}
+
+  enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      ShouldNotReachHere();
+    } else {
+      relocInfo::relocType rtype = $src->constant_reloc();
+      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
+      __ set_narrow_oop(dst_reg, (jobject)con);
+    }
+  %}
+
+  enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mov(dst_reg, zr);
+  %}
+
+  enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      ShouldNotReachHere();
+    } else {
+      relocInfo::relocType rtype = $src->constant_reloc();
+      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
+      __ set_narrow_klass(dst_reg, (Klass *)con);
+    }
+  %}
+
+  // arithmetic encodings
+
+  enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src_reg = as_Register($src1$$reg);
+    int32_t con = (int32_t)$src2$$constant;
+    // add has primary == 0, subtract has primary == 1
+    if ($primary) { con = -con; }
+    if (con < 0) {
+      __ subw(dst_reg, src_reg, -con);
+    } else {
+      __ addw(dst_reg, src_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src_reg = as_Register($src1$$reg);
+    int32_t con = (int32_t)$src2$$constant;
+    // add has primary == 0, subtract has primary == 1
+    if ($primary) { con = -con; }
+    if (con < 0) {
+      __ sub(dst_reg, src_reg, -con);
+    } else {
+      __ add(dst_reg, src_reg, con);
+    }
+  %}
+
+  enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
+  %}
+
+  enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
+  %}
+
+  enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
+  %}
+
+  enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+   Register dst_reg = as_Register($dst$$reg);
+   Register src1_reg = as_Register($src1$$reg);
+   Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
+  %}
+
+  // compare instruction encodings
+
+  enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmpw(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src1$$reg);
+    int32_t val = $src2$$constant;
+    if (val >= 0) {
+      __ subsw(zr, reg, val);
+    } else {
+      __ addsw(zr, reg, -val);
+    }
+  %}
+
+  enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    u_int32_t val = (u_int32_t)$src2$$constant;
+    __ movw(rscratch1, val);
+    __ cmpw(reg1, rscratch1);
+  %}
+
+  enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmp(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src1$$reg);
+    int64_t val = $src2$$constant;
+    if (val >= 0) {
+      __ subs(zr, reg, val);
+    } else if (val != -val) {
+      __ adds(zr, reg, -val);
+    } else {
+    // aargh, Long.MIN_VALUE is a special case
+      __ orr(rscratch1, zr, (u_int64_t)val);
+      __ subs(zr, reg, rscratch1);
+    }
+  %}
+
+  enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    u_int64_t val = (u_int64_t)$src2$$constant;
+    __ mov(rscratch1, val);
+    __ cmp(reg1, rscratch1);
+  %}
+
+  enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmp(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg1 = as_Register($src1$$reg);
+    Register reg2 = as_Register($src2$$reg);
+    __ cmpw(reg1, reg2);
+  %}
+
+  enc_class aarch64_enc_testp(iRegP src) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src$$reg);
+    __ cmp(reg, zr);
+  %}
+
+  enc_class aarch64_enc_testn(iRegN src) %{
+    MacroAssembler _masm(&cbuf);
+    Register reg = as_Register($src$$reg);
+    __ cmpw(reg, zr);
+  %}
+
+  enc_class aarch64_enc_b(label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label *L = $lbl$$label;
+    __ b(*L);
+  %}
+
+  enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label *L = $lbl$$label;
+    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
+  %}
+
+  enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label *L = $lbl$$label;
+    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
+  %}
+
+  enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
+  %{
+     Register sub_reg = as_Register($sub$$reg);
+     Register super_reg = as_Register($super$$reg);
+     Register temp_reg = as_Register($temp$$reg);
+     Register result_reg = as_Register($result$$reg);
+
+     Label miss;
+     MacroAssembler _masm(&cbuf);
+     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
+                                     NULL, &miss,
+                                     /*set_cond_codes:*/ true);
+     if ($primary) {
+       __ mov(result_reg, zr);
+     }
+     __ bind(miss);
+  %}
+
+  enc_class aarch64_enc_java_static_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    address addr = (address)$meth$$method;
+    if (!_method) {
+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+    } else if (_optimized_virtual) {
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
+    } else {
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
+    }
+
+    if (_method) {
+      // Emit stub for static call
+      CompiledStaticCall::emit_to_interp_stub(cbuf);
+    }
+  %}
+
+  enc_class aarch64_enc_java_handle_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+    relocInfo::relocType reloc;
+
+    // RFP is preserved across all calls, even compiled calls.
+    // Use it to preserve SP.
+    __ mov(rfp, sp);
+
+    const int start_offset = __ offset();
+    address addr = (address)$meth$$method;
+    if (!_method) {
+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+    } else if (_optimized_virtual) {
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
+    } else {
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
+    }
+
+    if (_method) {
+      // Emit stub for static call
+      CompiledStaticCall::emit_to_interp_stub(cbuf);
+    }
+
+    // now restore sp
+    __ mov(sp, rfp);
+  %}
+
+  enc_class aarch64_enc_java_dynamic_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+    __ ic_call((address)$meth$$method);
+  %}
+
+  enc_class aarch64_enc_call_epilog() %{
+    MacroAssembler _masm(&cbuf);
+    if (VerifyStackAtCalls) {
+      // Check that stack depth is unchanged: find majik cookie on stack
+      __ call_Unimplemented();
+    }
+  %}
+
+  enc_class aarch64_enc_java_to_runtime(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    // some calls to generated routines (arraycopy code) are scheduled
+    // by C2 as runtime calls. if so we can call them using a br (they
+    // will be in a reachable segment) otherwise we have to use a blrt
+    // which loads the absolute address into a register.
+    address entry = (address)$meth$$method;
+    CodeBlob *cb = CodeCache::find_blob(entry);
+    if (cb) {
+      __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
+    } else {
+      int gpcnt;
+      int fpcnt;
+      int rtype;
+      getCallInfo(tf(), gpcnt, fpcnt, rtype);
+      Label retaddr;
+      __ adr(rscratch2, retaddr);
+      __ lea(rscratch1, RuntimeAddress(entry));
+      // Leave a breadcrumb for JavaThread::pd_last_frame().
+      __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
+      __ blrt(rscratch1, gpcnt, fpcnt, rtype);
+      __ bind(retaddr);
+      __ add(sp, sp, 2 * wordSize);
+    }
+  %}
+
+  enc_class aarch64_enc_rethrow() %{
+    MacroAssembler _masm(&cbuf);
+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
+  %}
+
+  enc_class aarch64_enc_ret() %{
+    MacroAssembler _masm(&cbuf);
+    __ ret(lr);
+  %}
+
+  enc_class aarch64_enc_tail_call(iRegP jump_target) %{
+    MacroAssembler _masm(&cbuf);
+    Register target_reg = as_Register($jump_target$$reg);
+    __ br(target_reg);
+  %}
+
+  enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
+    MacroAssembler _masm(&cbuf);
+    Register target_reg = as_Register($jump_target$$reg);
+    // exception oop should be in r0
+    // ret addr has been popped into lr
+    // callee expects it in r3
+    __ mov(r3, lr);
+    __ br(target_reg);
+  %}
+
+  enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
+    MacroAssembler _masm(&cbuf);
+    Register oop = as_Register($object$$reg);
+    Register box = as_Register($box$$reg);
+    Register disp_hdr = as_Register($tmp$$reg);
+    Register tmp = as_Register($tmp2$$reg);
+    Label cont;
+    Label object_has_monitor;
+    Label cas_failed;
+
+    assert_different_registers(oop, box, tmp, disp_hdr);
+
+    // Load markOop from object into displaced_header.
+    __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+
+    // Always do locking in runtime.
+    if (EmitSync & 0x01) {
+      __ cmp(oop, zr);
+      return;
+    }
+
+    if (UseBiasedLocking) {
+      __ biased_locking_enter(disp_hdr, oop, box, tmp, true, cont);
+    }
+
+    // Handle existing monitor
+    if (EmitSync & 0x02) {
+      // we can use AArch64's bit test and branch here but
+      // markoopDesc does not define a bit index just the bit value
+      // so assert in case the bit pos changes
+#     define __monitor_value_log2 1
+      assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
+      __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
+#     undef __monitor_value_log2
+    }
+
+    // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
+    __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
+
+    // Load Compare Value application register.
+
+    // Initialize the box. (Must happen before we update the object mark!)
+    __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // Compare object markOop with mark and if equal exchange scratch1
+    // with object markOop.
+    // Note that this is simply a CAS: it does not generate any
+    // barriers.  These are separately generated by
+    // membar_acquire_lock().
+    {
+      Label retry_load;
+      __ bind(retry_load);
+      __ ldxr(tmp, oop);
+      __ cmp(tmp, disp_hdr);
+      __ br(Assembler::NE, cas_failed);
+      // use stlxr to ensure update is immediately visible
+      __ stlxr(tmp, box, oop);
+      __ cbzw(tmp, cont);
+      __ b(retry_load);
+    }
+
+    // Formerly:
+    // __ cmpxchgptr(/*oldv=*/disp_hdr,
+    //               /*newv=*/box,
+    //               /*addr=*/oop,
+    //               /*tmp=*/tmp,
+    //               cont,
+    //               /*fail*/NULL);
+
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+    // If the compare-and-exchange succeeded, then we found an unlocked
+    // object, will have now locked it will continue at label cont
+
+    __ bind(cas_failed);
+    // We did not see an unlocked object so try the fast recursive case.
+
+    // Check if the owner is self by comparing the value in the
+    // markOop of object (disp_hdr) with the stack pointer.
+    __ mov(rscratch1, sp);
+    __ sub(disp_hdr, disp_hdr, rscratch1);
+    __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+    // If condition is true we are cont and hence we can store 0 as the
+    // displaced header in the box, which indicates that it is a recursive lock.
+    __ ands(tmp/*==0?*/, disp_hdr, tmp);
+    __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ b(cont);
+
+      __ bind(object_has_monitor);
+      // The object's monitor m is unlocked iff m->owner == NULL,
+      // otherwise m->owner may contain a thread or a stack address.
+      //
+      // Try to CAS m->owner from NULL to current thread.
+      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
+      __ mov(disp_hdr, zr);
+
+      {
+        Label retry_load, fail;
+        __ bind(retry_load);
+        __ ldxr(rscratch1, tmp);
+        __ cmp(disp_hdr, rscratch1);
+        __ br(Assembler::NE, fail);
+        // use stlxr to ensure update is immediately visible
+        __ stlxr(rscratch1, rthread, tmp);
+        __ cbnzw(rscratch1, retry_load);
+        __ bind(fail);
+      }
+
+      // Label next;
+      // __ cmpxchgptr(/*oldv=*/disp_hdr,
+      //               /*newv=*/rthread,
+      //               /*addr=*/tmp,
+      //               /*tmp=*/rscratch1,
+      //               /*succeed*/next,
+      //               /*fail*/NULL);
+      // __ bind(next);
+
+      // store a non-null value into the box.
+      __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+      // PPC port checks the following invariants
+      // #ifdef ASSERT
+      // bne(flag, cont);
+      // We have acquired the monitor, check some invariants.
+      // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
+      // Invariant 1: _recursions should be 0.
+      // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
+      // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
+      //                        "monitor->_recursions should be 0", -1);
+      // Invariant 2: OwnerIsThread shouldn't be 0.
+      // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
+      //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
+      //                           "monitor->OwnerIsThread shouldn't be 0", -1);
+      // #endif
+    }
+
+    __ bind(cont);
+    // flag == EQ indicates success
+    // flag == NE indicates failure
+
+  %}
+
+  // TODO
+  // reimplement this with custom cmpxchgptr code
+  // which avoids some of the unnecessary branching
+  enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
+    MacroAssembler _masm(&cbuf);
+    Register oop = as_Register($object$$reg);
+    Register box = as_Register($box$$reg);
+    Register disp_hdr = as_Register($tmp$$reg);
+    Register tmp = as_Register($tmp2$$reg);
+    Label cont;
+    Label object_has_monitor;
+    Label cas_failed;
+
+    assert_different_registers(oop, box, tmp, disp_hdr);
+
+    // Always do locking in runtime.
+    if (EmitSync & 0x01) {
+      __ cmp(oop, zr); // Oop can't be 0 here => always false.
+      return;
+    }
+
+    if (UseBiasedLocking) {
+      __ biased_locking_exit(oop, tmp, cont);
+    }
+
+    // Find the lock address and load the displaced header from the stack.
+    __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // If the displaced header is 0, we have a recursive unlock.
+    __ cmp(disp_hdr, zr);
+    __ br(Assembler::EQ, cont);
+
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+      __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
+    }
+
+    // Check if it is still a light weight lock, this is is true if we
+    // see the stack address of the basicLock in the markOop of the
+    // object.
+
+      {
+        Label retry_load;
+        __ bind(retry_load);
+        __ ldxr(tmp, oop);
+        __ cmp(box, tmp);
+        __ br(Assembler::NE, cas_failed);
+        // use stlxr to ensure update is immediately visible
+        __ stlxr(tmp, disp_hdr, oop);
+        __ cbzw(tmp, cont);
+        __ b(retry_load);
+      }
+
+    // __ cmpxchgptr(/*compare_value=*/box,
+    //               /*exchange_value=*/disp_hdr,
+    //               /*where=*/oop,
+    //               /*result=*/tmp,
+    //               cont,
+    //               /*cas_failed*/NULL);
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+    __ bind(cas_failed);
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ b(cont);
+
+      __ bind(object_has_monitor);
+      __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
+      __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+      __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
+      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
+      __ cmp(rscratch1, zr);
+      __ br(Assembler::NE, cont);
+
+      __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
+      __ cmp(rscratch1, zr);
+      __ cbnz(rscratch1, cont);
+      // need a release store here
+      __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+      __ stlr(rscratch1, tmp); // rscratch1 is zero
+    }
+
+    __ bind(cont);
+    // flag == EQ indicates success
+    // flag == NE indicates failure
+  %}
+
+%}
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+//  S T A C K   L A Y O U T    Allocators stack-slot number
+//                             |   (to get allocators register number
+//  G  Owned by    |        |  v    add OptoReg::stack0())
+//  r   CALLER     |        |
+//  o     |        +--------+      pad to even-align allocators stack-slot
+//  w     V        |  pad0  |        numbers; owned by CALLER
+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
+//  h     ^        |   in   |  5
+//        |        |  args  |  4   Holes in incoming args owned by SELF
+//  |     |        |        |  3
+//  |     |        +--------+
+//  V     |        | old out|      Empty on Intel, window on Sparc
+//        |    old |preserve|      Must be even aligned.
+//        |     SP-+--------+----> Matcher::_old_SP, even aligned
+//        |        |   in   |  3   area for Intel ret address
+//     Owned by    |preserve|      Empty on Sparc.
+//       SELF      +--------+
+//        |        |  pad2  |  2   pad to align old SP
+//        |        +--------+  1
+//        |        | locks  |  0
+//        |        +--------+----> OptoReg::stack0(), even aligned
+//        |        |  pad1  | 11   pad to align new SP
+//        |        +--------+
+//        |        |        | 10
+//        |        | spills |  9   spills
+//        V        |        |  8   (pad0 slot for callee)
+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
+//        ^        |  out   |  7
+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
+//     Owned by    +--------+
+//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
+//        |    new |preserve|      Must be even-aligned.
+//        |     SP-+--------+----> Matcher::_new_SP, even aligned
+//        |        |        |
+//
+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
+//         known from SELF's arguments and the Java calling convention.
+//         Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+//         area, those holes are owned by SELF.  Holes in the outgoing area
+//         are owned by the CALLEE.  Holes should not be nessecary in the
+//         incoming area, as the Java calling convention is completely under
+//         the control of the AD file.  Doubles can be sorted and packed to
+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
+//         varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
+//         even aligned with pad0 as needed.
+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
+//           (the latter is true on Intel but is it false on AArch64?)
+//         region 6-11 is even aligned; it may be padded out more so that
+//         the region from SP to FP meets the minimum stack alignment.
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
+//         alignment.  Region 11, pad1, may be dynamically extended so that
+//         SP meets the minimum alignment.
+
+frame %{
+  // What direction does stack grow in (assumed to be same for C & Java)
+  stack_direction(TOWARDS_LOW);
+
+  // These three registers define part of the calling convention
+  // between compiled code and the interpreter.
+
+  // Inline Cache Register or methodOop for I2C.
+  inline_cache_reg(R12);
+
+  // Method Oop Register when calling interpreter.
+  interpreter_method_oop_reg(R12);
+
+  // Number of stack slots consumed by locking an object
+  sync_stack_slots(2);
+
+  // Compiled code's Frame Pointer
+  frame_pointer(R31);
+
+  // Interpreter stores its frame pointer in a register which is
+  // stored to the stack by I2CAdaptors.
+  // I2CAdaptors convert from interpreted java to compiled java.
+  interpreter_frame_pointer(R29);
+
+  // Stack alignment requirement
+  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
+
+  // Number of stack slots between incoming argument block and the start of
+  // a new frame.  The PROLOG must add this many slots to the stack.  The
+  // EPILOG must remove this many slots. aarch64 needs two slots for
+  // return address and fp.
+  // TODO think this is correct but check
+  in_preserve_stack_slots(4);
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
+
+  // The after-PROLOG location of the return address.  Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+  // Otherwise, it is above the locks and verification slot and alignment word
+  // TODO this may well be correct but need to check why that - 2 is there
+  // ppc port uses 0 but we definitely need to allow for fixed_slots
+  // which folds in the space used for monitors
+  return_addr(STACK - 2 +
+              round_to((Compile::current()->in_preserve_stack_slots() +
+                        Compile::current()->fixed_slots()),
+                       stack_alignment_in_slots()));
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+  calling_convention
+  %{
+    // No difference between ingoing/outgoing just pass false
+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
+  %}
+
+  c_calling_convention
+  %{
+    // This is obviously always outgoing
+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
+  %}
+
+  // Location of compiled Java return values.  Same as C for now.
+  return_value
+  %{
+    // TODO do we allow ideal_reg == Op_RegN???
+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
+           "only return normal values");
+
+    static const int lo[Op_RegL + 1] = { // enum name
+      0,                                 // Op_Node
+      0,                                 // Op_Set
+      R0_num,                            // Op_RegN
+      R0_num,                            // Op_RegI
+      R0_num,                            // Op_RegP
+      V0_num,                            // Op_RegF
+      V0_num,                            // Op_RegD
+      R0_num                             // Op_RegL
+    };
+
+    static const int hi[Op_RegL + 1] = { // enum name
+      0,                                 // Op_Node
+      0,                                 // Op_Set
+      OptoReg::Bad,                       // Op_RegN
+      OptoReg::Bad,                      // Op_RegI
+      R0_H_num,                          // Op_RegP
+      OptoReg::Bad,                      // Op_RegF
+      V0_H_num,                          // Op_RegD
+      R0_H_num                           // Op_RegL
+    };
+
+    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
+  %}
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1);        // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(INSN_COST); // Required cost attribute
+ins_attrib ins_size(32);        // Required size attribute (in bits)
+ins_attrib ins_short_branch(0); // Required flag: is this instruction
+                                // a non-matching short branch variant
+                                // of some long branch?
+ins_attrib ins_alignment(4);    // Required alignment attribute (must
+                                // be a power of 2) specifies the
+                                // alignment that some part of the
+                                // instruction (not necessarily the
+                                // start) requires.  If > 1, a
+                                // compute_padding() function must be
+                                // provided for the instruction
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+
+// Integer operands 32 bit
+// 32 bit immediate
+operand immI()
+%{
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit zero
+operand immI0()
+%{
+  predicate(n->get_int() == 0);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unit increment
+operand immI_1()
+%{
+  predicate(n->get_int() == 1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unit decrement
+operand immI_M1()
+%{
+  predicate(n->get_int() == -1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_le_4()
+%{
+  predicate(n->get_int() <= 4);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_31()
+%{
+  predicate(n->get_int() == 31);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_8()
+%{
+  predicate(n->get_int() == 8);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_16()
+%{
+  predicate(n->get_int() == 16);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_24()
+%{
+  predicate(n->get_int() == 24);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_32()
+%{
+  predicate(n->get_int() == 32);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_48()
+%{
+  predicate(n->get_int() == 48);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_56()
+%{
+  predicate(n->get_int() == 56);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_64()
+%{
+  predicate(n->get_int() == 64);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_255()
+%{
+  predicate(n->get_int() == 255);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_65535()
+%{
+  predicate(n->get_int() == 65535);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_63()
+%{
+  predicate(n->get_int() == 63);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_255()
+%{
+  predicate(n->get_int() == 255);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_65535()
+%{
+  predicate(n->get_long() == 65535L);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_4294967295()
+%{
+  predicate(n->get_long() == 4294967295L);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_bitmask()
+%{
+  predicate(((n->get_long() & 0xc000000000000000l) == 0)
+            && is_power_of_2(n->get_long() + 1));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_bitmask()
+%{
+  predicate(((n->get_int() & 0xc0000000) == 0)
+            && is_power_of_2(n->get_int() + 1));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Scale values for scaled offset addressing modes (up to long but not quad)
+operand immIScale()
+%{
+  predicate(0 <= n->get_int() && (n->get_int() <= 3));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 26 bit signed offset -- for pc-relative branches
+operand immI26()
+%{
+  predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 19 bit signed offset -- for pc-relative loads
+operand immI19()
+%{
+  predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 12 bit unsigned offset -- for base plus immediate loads
+operand immIU12()
+%{
+  predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLU12()
+%{
+  predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Offset for scaled or unscaled immediate loads and stores
+operand immIOffset()
+%{
+  predicate(Address::offset_ok_for_immed(n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLoffset()
+%{
+  predicate(Address::offset_ok_for_immed(n->get_long()));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit integer valid for add sub immediate
+operand immIAddSub()
+%{
+  predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unsigned integer valid for logical immediate
+// TODO -- check this is right when e.g the mask is 0x80000000
+operand immILog()
+%{
+  predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer operands 64 bit
+// 64 bit immediate
+operand immL()
+%{
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit zero
+operand immL0()
+%{
+  predicate(n->get_long() == 0);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit unit increment
+operand immL_1()
+%{
+  predicate(n->get_long() == 1);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit unit decrement
+operand immL_M1()
+%{
+  predicate(n->get_long() == -1);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit offset of pc in thread anchor
+
+operand immL_pc_off()
+%{
+  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
+                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for add sub immediate
+operand immLAddSub()
+%{
+  predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for logical immediate
+operand immLLog()
+%{
+  predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits()
+%{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer operands
+// Pointer Immediate
+operand immP()
+%{
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immP0()
+%{
+  predicate(n->get_ptr() == 0);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate One
+// this is used in object initialization (initial object header)
+operand immP_1()
+%{
+  predicate(n->get_ptr() == 1);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Polling Page Pointer Immediate
+operand immPollPage()
+%{
+  predicate((address)n->get_ptr() == os::get_polling_page());
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Card Table Byte Map Base
+operand immByteMapBase()
+%{
+  // Get base of card map
+  predicate((jbyte*)n->get_ptr() ==
+        ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate Minus One
+// this is used when we want to write the current PC to the thread anchor
+operand immP_M1()
+%{
+  predicate(n->get_ptr() == -1);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate Minus Two
+// this is used when we want to write the current PC to the thread anchor
+operand immP_M2()
+%{
+  predicate(n->get_ptr() == -2);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float and Double operands
+// Double Immediate
+operand immD()
+%{
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// constant 'double +0.0'.
+operand immD0()
+%{
+  predicate((n->getd() == 0) &&
+            (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0));
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// constant 'double +0.0'.
+operand immDPacked()
+%{
+  predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF()
+%{
+  match(ConF);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// constant 'float +0.0'.
+operand immF0()
+%{
+  predicate((n->getf() == 0) &&
+            (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
+  match(ConF);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//
+operand immFPacked()
+%{
+  predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
+  match(ConF);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow pointer operands
+// Narrow Pointer Immediate
+operand immN()
+%{
+  match(ConN);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow NULL Pointer Immediate
+operand immN0()
+%{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immNKlass()
+%{
+  match(ConNKlass);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer 32 bit Register Operands
+// Integer 32 bitRegister (excludes SP)
+operand iRegI()
+%{
+  constraint(ALLOC_IN_RC(any_reg32));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 32 bit Register not Special
+operand iRegINoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg32));
+  match(RegI);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register Operands
+// Integer 64 bit Register (includes SP)
+operand iRegL()
+%{
+  constraint(ALLOC_IN_RC(any_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegLNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg));
+  match(RegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register Operands
+// Pointer Register
+operand iRegP()
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(RegP);
+  match(iRegPNoSp);
+  match(iRegP_R0);
+  //match(iRegP_R2);
+  //match(iRegP_R4);
+  //match(iRegP_R5);
+  match(thread_RegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register not Special
+operand iRegPNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_ptr_reg));
+  match(RegP);
+  // match(iRegP);
+  // match(iRegP_R0);
+  // match(iRegP_R2);
+  // match(iRegP_R4);
+  // match(iRegP_R5);
+  // match(thread_RegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R0 only
+operand iRegP_R0()
+%{
+  constraint(ALLOC_IN_RC(r0_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R1 only
+operand iRegP_R1()
+%{
+  constraint(ALLOC_IN_RC(r1_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R2 only
+operand iRegP_R2()
+%{
+  constraint(ALLOC_IN_RC(r2_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R3 only
+operand iRegP_R3()
+%{
+  constraint(ALLOC_IN_RC(r3_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R4 only
+operand iRegP_R4()
+%{
+  constraint(ALLOC_IN_RC(r4_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R5 only
+operand iRegP_R5()
+%{
+  constraint(ALLOC_IN_RC(r5_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R10 only
+operand iRegP_R10()
+%{
+  constraint(ALLOC_IN_RC(r10_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R11 only
+operand iRegL_R11()
+%{
+  constraint(ALLOC_IN_RC(r11_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register FP only
+operand iRegP_FP()
+%{
+  constraint(ALLOC_IN_RC(fp_reg));
+  match(RegP);
+  // match(iRegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R0 only
+operand iRegI_R0()
+%{
+  constraint(ALLOC_IN_RC(int_r0_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R2 only
+operand iRegI_R2()
+%{
+  constraint(ALLOC_IN_RC(int_r2_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R3 only
+operand iRegI_R3()
+%{
+  constraint(ALLOC_IN_RC(int_r3_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
+// Register R2 only
+operand iRegI_R4()
+%{
+  constraint(ALLOC_IN_RC(int_r4_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
+// Pointer Register Operands
+// Narrow Pointer Register
+operand iRegN()
+%{
+  constraint(ALLOC_IN_RC(any_reg32));
+  match(RegN);
+  match(iRegNNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegNNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg32));
+  match(RegN);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// heap base register -- used for encoding immN0
+
+operand iRegIHeapbase()
+%{
+  constraint(ALLOC_IN_RC(heapbase_reg));
+  match(RegI);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Float Register
+// Float register operands
+operand vRegF()
+%{
+  constraint(ALLOC_IN_RC(float_reg));
+  match(RegF);
+
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Double Register
+// Double register operands
+operand vRegD()
+%{
+  constraint(ALLOC_IN_RC(double_reg));
+  match(RegD);
+
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V0()
+%{
+  constraint(ALLOC_IN_RC(v0_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V1()
+%{
+  constraint(ALLOC_IN_RC(v1_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V2()
+%{
+  constraint(ALLOC_IN_RC(v2_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V3()
+%{
+  constraint(ALLOC_IN_RC(v3_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Flags register, used as output of signed compare instructions
+
+// note that on AArch64 we also use this register as the output for
+// for floating point compare instructions (CmpF CmpD). this ensures
+// that ordered inequality tests use GT, GE, LT or LE none of which
+// pass through cases where the result is unordered i.e. one or both
+// inputs to the compare is a NaN. this means that the ideal code can
+// replace e.g. a GT with an LE and not end up capturing the NaN case
+// (where the comparison should always fail). EQ and NE tests are
+// always generated in ideal code so that unordered folds into the NE
+// case, matching the behaviour of AArch64 NE.
+//
+// This differs from x86 where the outputs of FP compares use a
+// special FP flags registers and where compares based on this
+// register are distinguished into ordered inequalities (cmpOpUCF) and
+// EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
+// to explicitly handle the unordered case in branches. x86 also has
+// to include extra CMoveX rules to accept a cmpOpUCF input.
+
+operand rFlagsReg()
+%{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  op_cost(0);
+  format %{ "RFLAGS" %}
+  interface(REG_INTER);
+%}
+
+// Flags register, used as output of unsigned compare instructions
+operand rFlagsRegU()
+%{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  op_cost(0);
+  format %{ "RFLAGSU" %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
+  match(reg);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand interpreter_method_oop_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
+  match(reg);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Thread Register
+operand thread_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(thread_reg)); // link_reg
+  match(reg);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand lr_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(lr_reg)); // link_reg
+  match(reg);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//----------Memory Operands----------------------------------------------------
+
+operand indirect(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(reg);
+  op_cost(0);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (LShiftL lreg scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $lreg lsl($scale), $off" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (LShiftL lreg scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $lreg lsl($scale), $off" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg (LShiftL (ConvI2L ireg) scale));
+  op_cost(0);
+  format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg (LShiftL lreg scale));
+  op_cost(0);
+  format %{ "$reg, $lreg lsl($scale)" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndex(iRegP reg, iRegL lreg)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg lreg);
+  op_cost(0);
+  format %{ "$reg, $lreg" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffI(iRegP reg, immIOffset off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(INSN_COST);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffL(iRegP reg, immLoffset off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+
+operand indirectN(iRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+  op_cost(0);
+  format %{ "[$reg]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+  op_cost(0);
+  format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
+  op_cost(0);
+  format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL lreg scale));
+  op_cost(0);
+  format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexN(iRegN reg, iRegL lreg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) lreg);
+  op_cost(0);
+  format %{ "$reg, $lreg\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffIN(iRegN reg, immIOffset off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN(iRegN reg, immLoffset off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+
+
+// AArch64 opto stubs need to write to the pc slot in the thread anchor
+operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+//----------Special Memory Operands--------------------------------------------
+// Stack Slot Operand - This operand is used for loading and storing temporary
+//                      values on the stack where a match requires a value to
+//                      flow through memory.
+operand stackSlotP(sRegP reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  op_cost(100);
+  // No match rule because this operand is only generated in matching
+  // match(RegP);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotI(sRegI reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegI);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotF(sRegF reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegF);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotD(sRegD reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegD);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotL(sRegL reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegL);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1e);  // RSP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+// Operands for expressing Control Flow
+// NOTE: Label is a predefined operand which should not be redefined in
+//       the AD file. It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+// used for signed integral comparisons and fp comparisons
+
+operand cmpOp()
+%{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    not_equal(0x1, "ne");
+    less(0xb, "lt");
+    greater_equal(0xa, "ge");
+    less_equal(0xd, "le");
+    greater(0xc, "gt");
+    overflow(0x6, "vs");
+    no_overflow(0x7, "vc");
+  %}
+%}
+
+// used for unsigned integral comparisons
+
+operand cmpOpU()
+%{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    not_equal(0x1, "ne");
+    less(0x3, "lo");
+    greater_equal(0x2, "hs");
+    less_equal(0x9, "ls");
+    greater(0x8, "hi");
+    overflow(0x6, "vs");
+    no_overflow(0x7, "vc");
+  %}
+%}
+
+// Special operand allowing long args to int ops to be truncated for free
+
+operand iRegL2I(iRegL reg) %{
+
+  op_cost(0);
+
+  match(ConvL2I reg);
+
+  format %{ "l2i($reg)" %}
+
+  interface(REG_INTER)
+%}
+
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used as to simplify
+// instruction definitions by not requiring the AD writer to specify
+// separate instructions for every form of operand when the
+// instruction accepts multiple operand types with the same basic
+// encoding and format. The classic case of this is memory operands.
+
+// memory is used to define read/write location for load/store
+// instruction defs. we can turn a memory op into an Address
+
+opclass memory(indirect, indIndexScaledOffsetI,  indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
+               indirectN, indIndexScaledOffsetIN,  indIndexScaledOffsetLN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
+
+
+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
+// operations. it allows the src to be either an iRegI or a (ConvL2I
+// iRegL). in the latter case the l2i normally planted for a ConvL2I
+// can be elided because the 32-bit instruction will just employ the
+// lower 32 bits anyway.
+//
+// n.b. this does not elide all L2I conversions. if the truncated
+// value is consumed by more than one operation then the ConvL2I
+// cannot be bundled into the consuming nodes so an l2i gets planted
+// (actually a movw $dst $src) and the downstream instructions consume
+// the result of the l2i as an iRegI input. That's a shame since the
+// movw is actually redundant but its not too costly.
+
+opclass iRegIorL2I(iRegI, iRegL2I);
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
+// Integer ALU reg operation
+pipeline %{
+
+attributes %{
+  // ARM instructions are of fixed length
+  fixed_size_instructions;        // Fixed size instructions TODO does
+  max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
+  // ARM instructions come in 32-bit word units
+  instruction_unit_size = 4;         // An instruction is 4 bytes long
+  instruction_fetch_unit_size = 64;  // The processor fetches one line
+  instruction_fetch_units = 1;       // of 64 bytes
+
+  // List of nop instructions
+  nops( MachNop );
+%}
+
+// We don't use an actual pipeline model so don't care about resources
+// or description. we do use pipeline classes to introduce fixed
+// latencies
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine
+
+resources( INS0, INS1, INS01 = INS0 | INS1,
+           ALU0, ALU1, ALU = ALU0 | ALU1,
+           MAC,
+           DIV,
+           BRANCH,
+           LDST,
+           NEON_FP);
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline
+
+pipe_desc(ISS, EX1, EX2, WR);
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+//------- Integer ALU operations --------------------------
+
+// Integer ALU reg-reg operation
+// Operands needed in EX1, result generated in EX2
+// Eg.  ADD     x0, x1, x2
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  src2   : EX1(read);
+  INS01  : ISS; // Dual issue as instruction 0 or 1
+  ALU    : EX2;
+%}
+
+// Integer ALU reg-reg operation with constant shift
+// Shifted register must be available in LATE_ISS instead of EX1
+// Eg.  ADD     x0, x1, x2, LSL #2
+pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg operation with constant shift
+// Eg.  LSL     x0, x1, #shift
+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg-reg operation with variable shift
+// Both operands must be available in LATE_ISS instead of EX1
+// Result is available in EX1 instead of EX2
+// Eg.  LSLV    x0, x1, x2
+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX1(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX1;
+%}
+
+// Integer ALU reg-reg operation with extract
+// As for _vshift above, but result generated in EX2
+// Eg.  EXTR    x0, x1, x2, #N
+pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS1   : ISS; // Can only dual issue as Instruction 1
+  ALU    : EX1;
+%}
+
+// Integer ALU reg operation
+// Eg.  NEG     x0, x1
+pipe_class ialu_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg mmediate operation
+// Eg.  ADD     x0, x1, #N
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU immediate operation (no source operands)
+// Eg.  MOV     x0, #N
+pipe_class ialu_imm(iRegI dst)
+%{
+  single_instruction;
+  dst    : EX1(write);
+  INS01  : ISS;
+  ALU    : EX1;
+%}
+
+//------- Compare operation -------------------------------
+
+// Compare reg-reg
+// Eg.  CMP     x0, x1
+pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  single_instruction;
+//  fixed_latency(16);
+  cr     : EX2(write);
+  op1    : EX1(read);
+  op2    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Compare reg-reg
+// Eg.  CMP     x0, #N
+pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
+%{
+  single_instruction;
+//  fixed_latency(16);
+  cr     : EX2(write);
+  op1    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+//------- Conditional instructions ------------------------
+
+// Conditional no operands
+// Eg.  CSINC   x0, zr, zr, <cond>
+pipe_class icond_none(iRegI dst, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Conditional 2 operand
+// EG.  CSEL    X0, X1, X2, <cond>
+pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  src1   : EX1(read);
+  src2   : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Conditional 2 operand
+// EG.  CSEL    X0, X1, X2, <cond>
+pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  src    : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+//------- Multiply pipeline operations --------------------
+
+// Multiply reg-reg
+// Eg.  MUL     w0, w1, w2
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Multiply accumulate
+// Eg.  MADD    w0, w1, w2, w3
+pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  src3   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Eg.  MUL     w0, w1, w2
+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(3); // Maximum latency for 64 bit mul
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Multiply accumulate
+// Eg.  MADD    w0, w1, w2, w3
+pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+  single_instruction;
+  fixed_latency(3); // Maximum latency for 64 bit mul
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  src3   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+//------- Divide pipeline operations --------------------
+
+// Eg.  SDIV    w0, w1, w2
+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(8); // Maximum latency for 32 bit divide
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS0   : ISS; // Can only dual issue as instruction 0
+  DIV    : WR;
+%}
+
+// Eg.  SDIV    x0, x1, x2
+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(16); // Maximum latency for 64 bit divide
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS0   : ISS; // Can only dual issue as instruction 0
+  DIV    : WR;
+%}
+
+//------- Load pipeline operations ------------------------
+
+// Load - prefetch
+// Eg.  PFRM    <mem>
+pipe_class iload_prefetch(memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Load - reg, mem
+// Eg.  LDR     x0, <mem>
+pipe_class iload_reg_mem(iRegI dst, memory mem)
+%{
+  single_instruction;
+  dst    : WR(write);
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Load - reg, reg
+// Eg.  LDR     x0, [sp, x1]
+pipe_class iload_reg_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Store - zr, mem
+// Eg.  STR     zr, <mem>
+pipe_class istore_mem(memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Store - reg, mem
+// Eg.  STR     x0, <mem>
+pipe_class istore_reg_mem(iRegI src, memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  src    : EX2(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Store - reg, reg
+// Eg. STR      x0, [sp, x1]
+pipe_class istore_reg_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : ISS(read);
+  src    : EX2(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Branch
+pipe_class pipe_branch()
+%{
+  single_instruction;
+  INS01  : ISS;
+  BRANCH : EX1;
+%}
+
+// Conditional branch
+pipe_class pipe_branch_cond(rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  INS01  : ISS;
+  BRANCH : EX1;
+%}
+
+// Compare & Branch
+// EG.  CBZ/CBNZ
+pipe_class pipe_cmp_branch(iRegI op1)
+%{
+  single_instruction;
+  op1    : EX1(read);
+  INS01  : ISS;
+  BRANCH : EX1;
+%}
+
+//------- Synchronisation operations ----------------------
+
+// Any operation requiring serialization.
+// EG.  DMB/Atomic Ops/Load Acquire/Str Release
+pipe_class pipe_serial()
+%{
+  single_instruction;
+  force_serialization;
+  fixed_latency(16);
+  INS01  : ISS(2); // Cannot dual issue with any other instruction
+  LDST   : WR;
+%}
+
+// Generic big/slow expanded idiom - also serialized
+pipe_class pipe_slow()
+%{
+  instruction_count(10);
+  multiple_bundles;
+  force_serialization;
+  fixed_latency(16);
+  INS01  : ISS(2); // Cannot dual issue with any other instruction
+  LDST   : WR;
+%}
+
+// Empty pipeline class
+pipe_class pipe_class_empty()
+%{
+  single_instruction;
+  fixed_latency(0);
+%}
+
+// Default pipeline class.
+pipe_class pipe_class_default()
+%{
+  single_instruction;
+  fixed_latency(2);
+%}
+
+// Pipeline class for compares.
+pipe_class pipe_class_compare()
+%{
+  single_instruction;
+  fixed_latency(16);
+%}
+
+// Pipeline class for memory operations.
+pipe_class pipe_class_memory()
+%{
+  single_instruction;
+  fixed_latency(16);
+%}
+
+// Pipeline class for call.
+pipe_class pipe_class_call()
+%{
+  single_instruction;
+  fixed_latency(100);
+%}
+
+// Define the class for the Nop node.
+define %{
+   MachNop = pipe_class_empty;
+%}
+
+%}
+//----------INSTRUCTIONS-------------------------------------------------------
+//
+// match      -- States which machine-independent subtree may be replaced
+//               by this instruction.
+// ins_cost   -- The estimated cost of this instruction is used by instruction
+//               selection to identify a minimum cost tree of machine
+//               instructions that matches a tree of machine-independent
+//               instructions.
+// format     -- A string providing the disassembly for this instruction.
+//               The value of an instruction's operand may be inserted
+//               by referring to it with a '$' prefix.
+// opcode     -- Three instruction opcodes may be provided.  These are referred
+//               to within an encode class as $primary, $secondary, and $tertiary
+//               rrspectively.  The primary opcode is commonly used to
+//               indicate the type of machine instruction, while secondary
+//               and tertiary are often used for prefix options or addressing
+//               modes.
+// ins_encode -- A list of encode classes with parameters. The encode class
+//               name must have been defined in an 'enc_class' specification
+//               in the encode section of the architecture description.
+
+// ============================================================================
+// Memory (Load/Store) Instructions
+
+// Load Instructions
+
+// Load Byte (8 bit signed)
+instruct loadB(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadB mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsbw  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrsbw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit signed) into long
+instruct loadB2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadB mem)));
+  predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrsb(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned)
+instruct loadUB(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadUB mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrbw  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrb(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned) into long
+instruct loadUB2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+  predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldrb(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed)
+instruct loadS(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadS mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrshw  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrshw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed) into long
+instruct loadS2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadS mem)));
+  predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrsh(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Char (16 bit unsigned)
+instruct loadUS(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadUS mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrh(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short/Char (16 bit unsigned) into long
+instruct loadUS2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+  predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldrh(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed)
+instruct loadI(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadI mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed) into long
+instruct loadI2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadI mem)));
+  predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrsw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldrsw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit unsigned) into long
+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
+%{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+  predicate(UseBarriersForVolatile || n->in(1)->in(1)->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Long (64 bit signed)
+instruct loadL(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (LoadL mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldr  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldr(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Range
+instruct loadRange(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadRange mem));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# range" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Pointer
+instruct loadP(iRegPNoSp dst, memory mem)
+%{
+  match(Set dst (LoadP mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldr  $dst, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_ldr(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Compressed Pointer
+instruct loadN(iRegNNoSp dst, memory mem)
+%{
+  match(Set dst (LoadN mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Klass Pointer
+instruct loadKlass(iRegPNoSp dst, memory mem)
+%{
+  match(Set dst (LoadKlass mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldr  $dst, $mem\t# class" %}
+
+  ins_encode(aarch64_enc_ldr(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Narrow Klass Pointer
+instruct loadNKlass(iRegNNoSp dst, memory mem)
+%{
+  match(Set dst (LoadNKlass mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
+
+  ins_encode(aarch64_enc_ldrw(dst, mem));
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Float
+instruct loadF(vRegF dst, memory mem)
+%{
+  match(Set dst (LoadF mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrs  $dst, $mem\t# float" %}
+
+  ins_encode( aarch64_enc_ldrs(dst, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Load Double
+instruct loadD(vRegD dst, memory mem)
+%{
+  match(Set dst (LoadD mem));
+  predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrd  $dst, $mem\t# double" %}
+
+  ins_encode( aarch64_enc_ldrd(dst, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+
+// Load Int Constant
+instruct loadConI(iRegINoSp dst, immI src)
+%{
+  match(Set dst src);
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# int" %}
+
+  ins_encode( aarch64_enc_movw_imm(dst, src) );
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Long Constant
+instruct loadConL(iRegLNoSp dst, immL src)
+%{
+  match(Set dst src);
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# long" %}
+
+  ins_encode( aarch64_enc_mov_imm(dst, src) );
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant
+
+instruct loadConP(iRegPNoSp dst, immP con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 4);
+  format %{
+    "mov  $dst, $con\t# ptr\n\t"
+  %}
+
+  ins_encode(aarch64_enc_mov_p(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Null Pointer Constant
+
+instruct loadConP0(iRegPNoSp dst, immP0 con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "mov  $dst, $con\t# NULL ptr" %}
+
+  ins_encode(aarch64_enc_mov_p0(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant One
+
+instruct loadConP1(iRegPNoSp dst, immP_1 con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "mov  $dst, $con\t# NULL ptr" %}
+
+  ins_encode(aarch64_enc_mov_p1(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Poll Page Constant
+
+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
+
+  ins_encode(aarch64_enc_mov_poll_page(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Byte Map Base Constant
+
+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "adr  $dst, $con\t# Byte Map Base" %}
+
+  ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Pointer Constant
+
+instruct loadConN(iRegNNoSp dst, immN con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 4);
+  format %{ "mov  $dst, $con\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_mov_n(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Null Pointer Constant
+
+instruct loadConN0(iRegNNoSp dst, immN0 con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
+
+  ins_encode(aarch64_enc_mov_n0(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Klass Constant
+
+instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
+%{
+  match(Set dst con);
+
+  ins_cost(INSN_COST);
+  format %{ "mov  $dst, $con\t# compressed klass ptr" %}
+
+  ins_encode(aarch64_enc_mov_nk(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Packed Float Constant
+
+instruct loadConF_packed(vRegF dst, immFPacked con) %{
+  match(Set dst con);
+  ins_cost(INSN_COST * 4);
+  format %{ "fmovs  $dst, $con"%}
+  ins_encode %{
+    __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Load Float Constant
+
+instruct loadConF(vRegF dst, immF con) %{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 4);
+
+  format %{
+    "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
+  %}
+
+  ins_encode %{
+    __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Load Packed Double Constant
+
+instruct loadConD_packed(vRegD dst, immDPacked con) %{
+  match(Set dst con);
+  ins_cost(INSN_COST);
+  format %{ "fmovd  $dst, $con"%}
+  ins_encode %{
+    __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Load Double Constant
+
+instruct loadConD(vRegD dst, immD con) %{
+  match(Set dst con);
+
+  ins_cost(INSN_COST * 5);
+  format %{
+    "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
+  %}
+
+  ins_encode %{
+    __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Store Instructions
+
+// Store CMS card-mark Immediate
+instruct storeimmCM0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(INSN_COST);
+  format %{ "strb zr, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_strb0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Byte
+instruct storeB(iRegI src, memory mem)
+%{
+  match(Set mem (StoreB mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strb  $src, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_strb(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+
+instruct storeimmB0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreB mem zero));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strb zr, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_strb0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Char/Short
+instruct storeC(iRegI src, memory mem)
+%{
+  match(Set mem (StoreC mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strh  $src, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_strh(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmC0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreC mem zero));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strh  zr, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_strh0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Integer
+
+instruct storeI(iRegIorL2I src, memory mem)
+%{
+  match(Set mem(StoreI mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strw  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_strw(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmI0(immI0 zero, memory mem)
+%{
+  match(Set mem(StoreI mem zero));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strw  zr, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_strw0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeL(iRegL src, memory mem)
+%{
+  match(Set mem (StoreL mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "str  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_str(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeimmL0(immL0 zero, memory mem)
+%{
+  match(Set mem (StoreL mem zero));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "str  zr, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_str0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Pointer
+instruct storeP(iRegP src, memory mem)
+%{
+  match(Set mem (StoreP mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "str  $src, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_str(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Pointer
+instruct storeimmP0(immP0 zero, memory mem)
+%{
+  match(Set mem (StoreP mem zero));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "str zr, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_str0(mem));
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Compressed Pointer
+instruct storeN(iRegN src, memory mem)
+%{
+  match(Set mem (StoreN mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strw  $src, $mem\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_strw(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
+%{
+  match(Set mem (StoreN mem zero));
+  predicate(Universe::narrow_oop_base() == NULL &&
+            Universe::narrow_klass_base() == NULL &&
+            (UseBarriersForVolatile || n->as_Store()->is_unordered()));
+
+  ins_cost(INSN_COST);
+  format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
+
+  ins_encode(aarch64_enc_strw(heapbase, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Float
+instruct storeF(vRegF src, memory mem)
+%{
+  match(Set mem (StoreF mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strs  $src, $mem\t# float" %}
+
+  ins_encode( aarch64_enc_strs(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// TODO
+// implement storeImmF0 and storeFImmPacked
+
+// Store Double
+instruct storeD(vRegD src, memory mem)
+%{
+  match(Set mem (StoreD mem src));
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+
+  ins_cost(INSN_COST);
+  format %{ "strd  $src, $mem\t# double" %}
+
+  ins_encode( aarch64_enc_strd(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Compressed Klass Pointer
+instruct storeNKlass(iRegN src, memory mem)
+%{
+  predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
+  match(Set mem (StoreNKlass mem src));
+
+  ins_cost(INSN_COST);
+  format %{ "strw  $src, $mem\t# compressed klass ptr" %}
+
+  ins_encode(aarch64_enc_strw(src, mem));
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// TODO
+// implement storeImmD0 and storeDImmPacked
+
+// prefetch instructions
+// Must be safe to execute with invalid address (cannot fault).
+
+instruct prefetchr( memory mem ) %{
+  match(PrefetchRead mem);
+
+  ins_cost(INSN_COST);
+  format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %}
+
+  ins_encode( aarch64_enc_prefetchr(mem) );
+
+  ins_pipe(iload_prefetch);
+%}
+
+instruct prefetchw( memory mem ) %{
+  match(PrefetchAllocation mem);
+
+  ins_cost(INSN_COST);
+  format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
+
+  ins_encode( aarch64_enc_prefetchw(mem) );
+
+  ins_pipe(iload_prefetch);
+%}
+
+instruct prefetchnta( memory mem ) %{
+  match(PrefetchWrite mem);
+
+  ins_cost(INSN_COST);
+  format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %}
+
+  ins_encode( aarch64_enc_prefetchnta(mem) );
+
+  ins_pipe(iload_prefetch);
+%}
+
+//  ---------------- volatile loads and stores ----------------
+
+// Load Byte (8 bit signed)
+instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadB mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarsb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldarsb(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Byte (8 bit signed) into long
+instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarsb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldarsb(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Byte (8 bit unsigned)
+instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadUB mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldarb(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Byte (8 bit unsigned) into long
+instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarb  $dst, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_ldarb(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Short (16 bit signed)
+instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadS mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarshw  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldarshw(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadUS mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarhw  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldarhw(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Short/Char (16 bit unsigned) into long
+instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldarh(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Short/Char (16 bit signed) into long
+instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarh  $dst, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_ldarsh(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Integer (32 bit signed)
+instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadI mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldarw(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Integer (32 bit unsigned) into long
+instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
+%{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarw  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldarw(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Long (64 bit signed)
+instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadL mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldar  $dst, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_ldar(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Pointer
+instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadP mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldar  $dst, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_ldar(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Compressed Pointer
+instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadN mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_ldarw(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Float
+instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadF mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldars  $dst, $mem\t# float" %}
+
+  ins_encode( aarch64_enc_fldars(dst, mem) );
+
+  ins_pipe(pipe_serial);
+%}
+
+// Load Double
+instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
+%{
+  match(Set dst (LoadD mem));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "ldard  $dst, $mem\t# double" %}
+
+  ins_encode( aarch64_enc_fldard(dst, mem) );
+
+  ins_pipe(pipe_serial);
+%}
+
+// Store Byte
+instruct storeB_volatile(iRegI src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrb  $src, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_stlrb(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Char/Short
+instruct storeC_volatile(iRegI src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrh  $src, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_stlrh(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Integer
+
+instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem(StoreI mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrw  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_stlrw(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Long (64 bit signed)
+instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreL mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlr  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_stlr(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Pointer
+instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlr  $src, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_stlr(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Compressed Pointer
+instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrw  $src, $mem\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_stlrw(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Float
+instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrs  $src, $mem\t# float" %}
+
+  ins_encode( aarch64_enc_fstlrs(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// TODO
+// implement storeImmF0 and storeFImmPacked
+
+// Store Double
+instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreD mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrd  $src, $mem\t# double" %}
+
+  ins_encode( aarch64_enc_fstlrd(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+//  ---------------- end of volatile loads and stores ----------------
+
+// ============================================================================
+// BSWAP Instructions
+
+instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesI src));
+
+  ins_cost(INSN_COST);
+  format %{ "revw  $dst, $src" %}
+
+  ins_encode %{
+    __ revw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
+  match(Set dst (ReverseBytesL src));
+
+  ins_cost(INSN_COST);
+  format %{ "rev  $dst, $src" %}
+
+  ins_encode %{
+    __ rev(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesUS src));
+
+  ins_cost(INSN_COST);
+  format %{ "rev16w  $dst, $src" %}
+
+  ins_encode %{
+    __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesS src));
+
+  ins_cost(INSN_COST);
+  format %{ "rev16w  $dst, $src\n\t"
+            "sbfmw $dst, $dst, #0, #15" %}
+
+  ins_encode %{
+    __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
+    __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// ============================================================================
+// Zero Count Instructions
+
+instruct countLeadingZerosI(iRegI dst, iRegI src) %{
+  match(Set dst (CountLeadingZerosI src));
+
+  ins_cost(INSN_COST);
+  format %{ "clzw  $dst, $src" %}
+  ins_encode %{
+    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countLeadingZerosL(iRegI dst, iRegL src) %{
+  match(Set dst (CountLeadingZerosL src));
+
+  ins_cost(INSN_COST);
+  format %{ "clz   $dst, $src" %}
+  ins_encode %{
+    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI(iRegI dst, iRegI src) %{
+  match(Set dst (CountTrailingZerosI src));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "rbitw  $dst, $src\n\t"
+            "clzw   $dst, $dst" %}
+  ins_encode %{
+    __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
+    __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL(iRegI dst, iRegL src) %{
+  match(Set dst (CountTrailingZerosL src));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "rbit   $dst, $src\n\t"
+            "clz    $dst, $dst" %}
+  ins_encode %{
+    __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
+    __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// ============================================================================
+// MemBar Instruction
+
+instruct load_fence() %{
+  match(LoadFence);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "load_fence" %}
+
+  ins_encode %{
+    __ membar(Assembler::LoadLoad|Assembler::LoadStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct unnecessary_membar_acquire() %{
+  predicate(! UseBarriersForVolatile && preceded_by_ordered_load(n));
+  match(MemBarAcquire);
+  ins_cost(0);
+
+  format %{ "membar_acquire (elided)" %}
+
+  ins_encode %{
+    __ block_comment("membar_acquire (elided)");
+  %}
+
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct membar_acquire() %{
+  match(MemBarAcquire);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "membar_acquire" %}
+
+  ins_encode %{
+    __ membar(Assembler::LoadLoad|Assembler::LoadStore);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+
+instruct membar_acquire_lock() %{
+  match(MemBarAcquireLock);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "membar_acquire_lock" %}
+
+  ins_encode %{
+    __ membar(Assembler::LoadLoad|Assembler::LoadStore);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct store_fence() %{
+  match(StoreFence);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "store_fence" %}
+
+  ins_encode %{
+    __ membar(Assembler::LoadStore|Assembler::StoreStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_release() %{
+  match(MemBarRelease);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "membar_release" %}
+
+  ins_encode %{
+    __ membar(Assembler::LoadStore|Assembler::StoreStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "MEMBAR-store-store" %}
+
+  ins_encode %{
+    __ membar(Assembler::StoreStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_release_lock() %{
+  match(MemBarReleaseLock);
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "membar_release_lock" %}
+
+  ins_encode %{
+    __ membar(Assembler::LoadStore|Assembler::StoreStore);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(VOLATILE_REF_COST*100);
+
+  format %{ "membar_volatile" %}
+
+  ins_encode %{
+    __ membar(Assembler::StoreLoad);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Cast/Convert Instructions
+
+instruct castX2P(iRegPNoSp dst, iRegL src) %{
+  match(Set dst (CastX2P src));
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# long -> ptr" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct castP2X(iRegLNoSp dst, iRegP src) %{
+  match(Set dst (CastP2X src));
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# ptr -> long" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Convert oop into int for vectors alignment masking
+instruct convP2I(iRegINoSp dst, iRegP src) %{
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  ins_cost(INSN_COST);
+  format %{ "movw $dst, $src\t# ptr -> int" %}
+  ins_encode %{
+    __ movw($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(iRegINoSp dst, iRegN src)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+  ins_cost(INSN_COST);
+  format %{ "mov dst, $src\t# compressed ptr -> int" %}
+  ins_encode %{
+    __ movw($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  effect(KILL cr);
+  ins_cost(INSN_COST * 3);
+  format %{ "encode_heap_oop $dst, $src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ encode_heap_oop(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  ins_cost(INSN_COST * 3);
+  format %{ "encode_heap_oop_not_null $dst, $src" %}
+  ins_encode %{
+    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
+            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  ins_cost(INSN_COST * 3);
+  format %{ "decode_heap_oop $dst, $src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ decode_heap_oop(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
+            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  ins_cost(INSN_COST * 3);
+  format %{ "decode_heap_oop_not_null $dst, $src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ decode_heap_oop_not_null(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// n.b. AArch64 implementations of encode_klass_not_null and
+// decode_klass_not_null do not modify the flags register so, unlike
+// Intel, we don't kill CR as a side effect here
+
+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
+  match(Set dst (EncodePKlass src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "encode_klass_not_null $dst,$src" %}
+
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    __ encode_klass_not_null(dst_reg, src_reg);
+  %}
+
+   ins_pipe(ialu_reg);
+%}
+
+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
+  match(Set dst (DecodeNKlass src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "decode_klass_not_null $dst,$src" %}
+
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    if (dst_reg != src_reg) {
+      __ decode_klass_not_null(dst_reg, src_reg);
+    } else {
+      __ decode_klass_not_null(dst_reg);
+    }
+  %}
+
+   ins_pipe(ialu_reg);
+%}
+
+instruct checkCastPP(iRegPNoSp dst)
+%{
+  match(Set dst (CheckCastPP dst));
+
+  size(0);
+  format %{ "# checkcastPP of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct castPP(iRegPNoSp dst)
+%{
+  match(Set dst (CastPP dst));
+
+  size(0);
+  format %{ "# castPP of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct castII(iRegI dst)
+%{
+  match(Set dst (CastII dst));
+
+  size(0);
+  format %{ "# castII of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_cost(0);
+  ins_pipe(pipe_class_empty);
+%}
+
+// ============================================================================
+// Atomic operation instructions
+//
+// Intel and SPARC both implement Ideal Node LoadPLocked and
+// Store{PIL}Conditional instructions using a normal load for the
+// LoadPLocked and a CAS for the Store{PIL}Conditional.
+//
+// The ideal code appears only to use LoadPLocked/StorePLocked as a
+// pair to lock object allocations from Eden space when not using
+// TLABs.
+//
+// There does not appear to be a Load{IL}Locked Ideal Node and the
+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
+// and to use StoreIConditional only for 32-bit and StoreLConditional
+// only for 64-bit.
+//
+// We implement LoadPLocked and StorePLocked instructions using,
+// respectively the AArch64 hw load-exclusive and store-conditional
+// instructions. Whereas we must implement each of
+// Store{IL}Conditional using a CAS which employs a pair of
+// instructions comprising a load-exclusive followed by a
+// store-conditional.
+
+
+// Locked-load (linked load) of the current heap-top
+// used when updating the eden heap top
+// implemented using ldaxr on AArch64
+
+instruct loadPLocked(iRegPNoSp dst, indirect mem)
+%{
+  match(Set dst (LoadPLocked mem));
+
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
+
+  ins_encode(aarch64_enc_ldaxr(dst, mem));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// Sets flag (EQ) on success.
+// implemented using stlxr on AArch64.
+
+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
+%{
+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
+
+  ins_cost(VOLATILE_REF_COST);
+
+ // TODO
+ // do we need to do a store-conditional release or can we just use a
+ // plain store-conditional?
+
+  format %{
+    "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
+    "cmpw rscratch1, zr\t# EQ on successful write"
+  %}
+
+  ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
+
+  ins_pipe(pipe_serial);
+%}
+
+// this has to be implemented as a CAS
+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
+%{
+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
+
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+    "cmpw rscratch1, zr\t# EQ on successful write"
+  %}
+
+  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+// this has to be implemented as a CAS
+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
+%{
+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
+
+  ins_cost(VOLATILE_REF_COST);
+
+  format %{
+    "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+    "cmpw rscratch1, zr\t# EQ on successful write"
+  %}
+
+  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+// XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
+// can't match them
+
+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
+
+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+
+instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
+  match(Set prev (GetAndSetI mem newv));
+  format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
+  match(Set prev (GetAndSetL mem newv));
+  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
+  match(Set prev (GetAndSetN mem newv));
+  format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
+  match(Set prev (GetAndSetP mem newv));
+  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
+  ins_encode %{
+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+
+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
+  match(Set newval (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addL $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addL [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
+  match(Set newval (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addL $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddL mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addL [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
+  match(Set newval (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addI $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addI [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
+  match(Set newval (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 10);
+  format %{ "get_and_addI $newval, [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem incr));
+  ins_cost(INSN_COST * 9);
+  format %{ "get_and_addI [$mem], $incr" %}
+  ins_encode %{
+    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+// Manifest a CmpL result in an integer register.
+// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
+%{
+  match(Set dst (CmpL3 src1 src2));
+  effect(KILL flags);
+
+  ins_cost(INSN_COST * 6);
+  format %{
+      "cmp $src1, $src2"
+      "csetw $dst, ne"
+      "cnegw $dst, lt"
+  %}
+  // format %{ "CmpL3 $dst, $src1, $src2" %}
+  ins_encode %{
+    __ cmp($src1$$Register, $src2$$Register);
+    __ csetw($dst$$Register, Assembler::NE);
+    __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
+%{
+  match(Set dst (CmpL3 src1 src2));
+  effect(KILL flags);
+
+  ins_cost(INSN_COST * 6);
+  format %{
+      "cmp $src1, $src2"
+      "csetw $dst, ne"
+      "cnegw $dst, lt"
+  %}
+  ins_encode %{
+    int32_t con = (int32_t)$src2$$constant;
+     if (con < 0) {
+      __ adds(zr, $src1$$Register, -con);
+    } else {
+      __ subs(zr, $src1$$Register, con);
+    }
+    __ csetw($dst$$Register, Assembler::NE);
+    __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// ============================================================================
+// Conditional Move Instructions
+
+// n.b. we have identical rules for both a signed compare op (cmpOp)
+// and an unsigned compare op (cmpOpU). it would be nice if we could
+// define an op class which merged both inputs and use it to type the
+// argument to a single rule. unfortunatelyt his fails because the
+// opclass does not live up to the COND_INTER interface of its
+// component operands. When the generic code tries to negate the
+// operand it ends up running the generci Machoper::negate method
+// which throws a ShouldNotHappen. So, we have to provide two flavours
+// of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
+
+instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegI src1, iRegI src2) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegI src1, iRegI src2) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+// n.b. this is selected in preference to the rule above because it
+// avoids loading constant 0 into a source register
+
+// TODO
+// we ought only to be able to cull one of these variants as the ideal
+// transforms ought always to order the zero consistently (to left/right?)
+
+instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegI src, immI0 zero) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegI src, immI0 zero) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+// special case for creating a boolean 0 or 1
+
+// n.b. this is selected in preference to the rule above because it
+// avoids loading constants 0 and 1 into a source register
+
+instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
+
+  ins_encode %{
+    // equivalently
+    // cset(as_Register($dst$$reg),
+    //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
+    __ csincw(as_Register($dst$$reg),
+             zr,
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_none);
+%}
+
+instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
+  match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
+
+  ins_encode %{
+    // equivalently
+    // cset(as_Register($dst$$reg),
+    //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
+    __ csincw(as_Register($dst$$reg),
+             zr,
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_none);
+%}
+
+instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src2$$reg),
+            as_Register($src1$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            zr,
+            as_Register($src$$reg),
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
+
+  ins_encode %{
+    __ csel(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            zr,
+            (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg_reg);
+%}
+
+// special cases where one arg is zero
+
+instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             zr,
+             as_Register($src$$reg),
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
+  match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
+
+  ins_encode %{
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src$$reg),
+             zr,
+             (Assembler::Condition)$cmp$$cmpcode);
+  %}
+
+  ins_pipe(icond_reg);
+%}
+
+instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
+%{
+  match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcsels(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
+%{
+  match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcsels(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
+%{
+  match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcseld(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
+%{
+  match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+
+  format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
+  ins_encode %{
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ fcseld(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src2$$reg),
+              as_FloatRegister($src1$$reg),
+              cond);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// ============================================================================
+// Arithmetic Instructions
+//
+
+// Integer Addition
+
+// TODO
+// these currently employ operations which do not set CR and hence are
+// not flagged as killing CR but we would like to isolate the cases
+// where we want to set flags from those where we don't. need to work
+// out how to do that.
+
+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (AddI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "addw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addI_reg_imm(iRegINoSp dst, iRegI src1, immIAddSub src2) %{
+  match(Set dst (AddI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "addw $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
+  match(Set dst (AddI (ConvL2I src1) src2));
+
+  ins_cost(INSN_COST);
+  format %{ "addw $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Pointer Addition
+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
+  match(Set dst (AddP src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src1, $src2\t# ptr" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
+  match(Set dst (AddP src1 (ConvI2L src2)));
+
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg), ext::sxtw);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
+  match(Set dst (AddP src1 (LShiftL src2 scale)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
+
+  ins_encode %{
+    __ lea(as_Register($dst$$reg),
+           Address(as_Register($src1$$reg), as_Register($src2$$reg),
+                   Address::lsl($scale$$constant)));
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
+  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
+
+  ins_encode %{
+    __ lea(as_Register($dst$$reg),
+           Address(as_Register($src1$$reg), as_Register($src2$$reg),
+                   Address::sxtw($scale$$constant)));
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
+  match(Set dst (LShiftL (ConvI2L src) scale));
+
+  ins_cost(INSN_COST);
+  format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
+
+  ins_encode %{
+    __ sbfiz(as_Register($dst$$reg),
+          as_Register($src$$reg),
+          $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Pointer Immediate Addition
+// n.b. this needs to be more expensive than using an indirect memory
+// operand
+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
+  match(Set dst (AddP src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src1, $src2\t# ptr" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Addition
+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+
+  match(Set dst (AddL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Addition.
+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
+  match(Set dst (AddL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is an add not a sub
+  opcode(0x0);
+
+  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Subtraction
+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (SubI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "subw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Subtraction
+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
+  match(Set dst (SubI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "subw $dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is a sub not an add
+  opcode(0x1);
+
+  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Subtraction
+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+
+  match(Set dst (SubL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Subtraction.
+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
+  match(Set dst (SubL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "sub$dst, $src1, $src2" %}
+
+  // use opcode to indicate that this is a sub not an add
+  opcode(0x1);
+
+  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Negation (special case for sub)
+
+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
+  match(Set dst (SubI zero src));
+
+  ins_cost(INSN_COST);
+  format %{ "negw $dst, $src\t# int" %}
+
+  ins_encode %{
+    __ negw(as_Register($dst$$reg),
+            as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Long Negation
+
+instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
+  match(Set dst (SubL zero src));
+
+  ins_cost(INSN_COST);
+  format %{ "neg $dst, $src\t# long" %}
+
+  ins_encode %{
+    __ neg(as_Register($dst$$reg),
+           as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Integer Multiply
+
+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (MulI src1 src2));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "mulw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ mulw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(imul_reg_reg);
+%}
+
+instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "smull  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ smull(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(imul_reg_reg);
+%}
+
+// Long Multiply
+
+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (MulL src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "mul  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ mul(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(lmul_reg_reg);
+%}
+
+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
+%{
+  match(Set dst (MulHiL src1 src2));
+
+  ins_cost(INSN_COST * 7);
+  format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
+
+  ins_encode %{
+    __ smulh(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(lmul_reg_reg);
+%}
+
+// Combined Integer Multiply & Add/Sub
+
+instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
+  match(Set dst (AddI src3 (MulI src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "madd  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ maddw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             as_Register($src3$$reg));
+  %}
+
+  ins_pipe(imac_reg_reg);
+%}
+
+instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
+  match(Set dst (SubI src3 (MulI src1 src2)));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "msub  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ msubw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             as_Register($src3$$reg));
+  %}
+
+  ins_pipe(imac_reg_reg);
+%}
+
+// Combined Long Multiply & Add/Sub
+
+instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
+  match(Set dst (AddL src3 (MulL src1 src2)));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "madd  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ madd(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg),
+            as_Register($src3$$reg));
+  %}
+
+  ins_pipe(lmac_reg_reg);
+%}
+
+instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
+  match(Set dst (SubL src3 (MulL src1 src2)));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "msub  $dst, $src1, $src2, $src3" %}
+
+  ins_encode %{
+    __ msub(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg),
+            as_Register($src3$$reg));
+  %}
+
+  ins_pipe(lmac_reg_reg);
+%}
+
+// Integer Divide
+
+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (DivI src1 src2));
+
+  ins_cost(INSN_COST * 19);
+  format %{ "sdivw  $dst, $src1, $src2" %}
+
+  ins_encode(aarch64_enc_divw(dst, src1, src2));
+  ins_pipe(idiv_reg_reg);
+%}
+
+instruct signExtract(iRegINoSp dst, iRegI src1, immI_31 div1, immI_31 div2) %{
+  match(Set dst (URShiftI (RShiftI src1 div1) div2));
+  ins_cost(INSN_COST);
+  format %{ "lsrw $dst, $src1, $div1" %}
+  ins_encode %{
+    __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+instruct div2Round(iRegINoSp dst, iRegI src, immI_31 div1, immI_31 div2) %{
+  match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
+  ins_cost(INSN_COST);
+  format %{ "addw $dst, $src, LSR $div1" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+              as_Register($src$$reg),
+              as_Register($src$$reg),
+              Assembler::LSR, 31);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Long Divide
+
+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (DivL src1 src2));
+
+  ins_cost(INSN_COST * 35);
+  format %{ "sdiv   $dst, $src1, $src2" %}
+
+  ins_encode(aarch64_enc_div(dst, src1, src2));
+  ins_pipe(ldiv_reg_reg);
+%}
+
+instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
+  match(Set dst (URShiftL (RShiftL src1 div1) div2));
+  ins_cost(INSN_COST);
+  format %{ "lsr $dst, $src1, $div1" %}
+  ins_encode %{
+    __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
+  match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
+  ins_cost(INSN_COST);
+  format %{ "add $dst, $src, $div1" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+              as_Register($src$$reg),
+              as_Register($src$$reg),
+              Assembler::LSR, 63);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Integer Remainder
+
+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (ModI src1 src2));
+
+  ins_cost(INSN_COST * 22);
+  format %{ "sdivw  rscratch1, $src1, $src2\n\t"
+            "msubw($dst, rscratch1, $src2, $src1" %}
+
+  ins_encode(aarch64_enc_modw(dst, src1, src2));
+  ins_pipe(idiv_reg_reg);
+%}
+
+// Long Remainder
+
+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (ModL src1 src2));
+
+  ins_cost(INSN_COST * 38);
+  format %{ "sdiv   rscratch1, $src1, $src2\n"
+            "msub($dst, rscratch1, $src2, $src1" %}
+
+  ins_encode(aarch64_enc_mod(dst, src1, src2));
+  ins_pipe(ldiv_reg_reg);
+%}
+
+// Integer Shifts
+
+// Shift Left Register
+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (LShiftI src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lslvw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lslvw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (LShiftI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
+
+  ins_encode %{
+    __ lslw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (URShiftI src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lsrvw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lsrvw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (URShiftI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
+
+  ins_encode %{
+    __ lsrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (RShiftI src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "asrvw  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ asrvw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (RShiftI src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
+
+  ins_encode %{
+    __ asrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Combined Int Mask and Right Shift (using UBFM)
+// TODO
+
+// Long Shifts
+
+// Shift Left Register
+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lslv  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lslv(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ lsl(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "lsrv  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ lsrv(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ lsr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// A special-case pattern for card table stores.
+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
+  match(Set dst (URShiftL (CastP2X src1) src2));
+
+  ins_cost(INSN_COST);
+  format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ lsr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "asrv  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ asrv(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
+
+  ins_encode %{
+    __ asr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
+instruct regL_not_reg(iRegLNoSp dst,
+                         iRegL src1, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorL src1 m1));
+  ins_cost(INSN_COST);
+  format %{ "eon  $dst, $src1, zr" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              zr,
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+instruct regI_not_reg(iRegINoSp dst,
+                         iRegI src1, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorI src1 m1));
+  ins_cost(INSN_COST);
+  format %{ "eonw  $dst, $src1, zr" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              zr,
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct AndI_reg_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "bic  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AndL_reg_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "bic  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct OrI_reg_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "orn  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct OrL_reg_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL src2 m1)));
+  ins_cost(INSN_COST);
+  format %{ "orn  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct XorI_reg_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2, immI_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorI m1 (XorI src2 src1)));
+  ins_cost(INSN_COST);
+  format %{ "eon  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct XorL_reg_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2, immL_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (XorL m1 (XorL src2 src1)));
+  ins_cost(INSN_COST);
+  format %{ "eon  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ bicw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ bicw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ bicw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ bic(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eonw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eon(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ ornw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ ornw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, immI_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ ornw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, immL_M1 src4, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_URShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_RShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndI_reg_LShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AndL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AndL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_URShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_RShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorI_reg_LShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct XorL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (XorL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_URShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_RShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrI_reg_LShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct OrL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (OrL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddI_reg_URShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddI_reg_RShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddI_reg_LShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct AddL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (AddL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "add  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ add(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubI_reg_URShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubI src1 (URShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubL_reg_URShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubL src1 (URShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubI_reg_RShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubI src1 (RShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubL_reg_RShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubL src1 (RShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::ASR,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubI_reg_LShift_reg(iRegINoSp dst,
+                         iRegI src1, iRegI src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubI src1 (LShiftI src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ subw(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+instruct SubL_reg_LShift_reg(iRegLNoSp dst,
+                         iRegL src1, iRegL src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst (SubL src1 (LShiftL src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
+
+  ins_encode %{
+    __ sub(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}
+
+
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what sbfm can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 63
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 63 - lshift;
+    int r = (rshift - lshift) & 63;
+    __ sbfm(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct sbfmwI(iRegINoSp dst, iRegI src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what sbfmw can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 31
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 31 - lshift;
+    int r = (rshift - lshift) & 31;
+    __ sbfmw(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what ubfm can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 63
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 63 - lshift;
+    int r = (rshift - lshift) & 63;
+    __ ubfm(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct ubfmwI(iRegINoSp dst, iRegI src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
+  // Make sure we are not going to exceed what ubfmw can do.
+  predicate((unsigned int)n->in(2)->get_int() <= 31
+            && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = 31 - lshift;
+    int r = (rshift - lshift) & 31;
+    __ ubfmw(as_Register($dst$$reg),
+            as_Register($src$$reg),
+            r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+// Bitfield extract with shift & mask
+
+instruct ubfxwI(iRegINoSp dst, iRegI src, immI rshift, immI_bitmask mask)
+%{
+  match(Set dst (AndI (URShiftI src rshift) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "ubfxw $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfxw(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
+%{
+  match(Set dst (AndL (URShiftL src rshift) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "ubfx $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfx(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// We can use ubfx when extending an And with a mask when we know mask
+// is positive.  We know that because immI_bitmask guarantees it.
+instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
+%{
+  match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfx $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfx(as_Register($dst$$reg),
+            as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Rotations
+
+instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 63);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+instruct extrOrI(iRegINoSp dst, iRegI src1, iRegI src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 31);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 63);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+instruct extrAddI(iRegINoSp dst, iRegI src1, iRegI src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & 31);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+
+
+// rol expander
+
+instruct rolL_rReg(iRegL dst, iRegL src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "rol    $dst, $src, $shift" %}
+  ins_cost(INSN_COST * 3);
+  ins_encode %{
+    __ subw(rscratch1, zr, as_Register($shift$$reg));
+    __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
+            rscratch1);
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// rol expander
+
+instruct rolI_rReg(iRegI dst, iRegI src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "rol    $dst, $src, $shift" %}
+  ins_cost(INSN_COST * 3);
+  ins_encode %{
+    __ subw(rscratch1, zr, as_Register($shift$$reg));
+    __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
+            rscratch1);
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+instruct rolL_rReg_Var_C_64(iRegL dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
+%{
+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
+
+  expand %{
+    rolL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rolL_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
+
+  expand %{
+    rolL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rolI_rReg_Var_C_32(iRegL dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
+%{
+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
+
+  expand %{
+    rolL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rolI_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
+
+  expand %{
+    rolL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+// ror expander
+
+instruct rorL_rReg(iRegL dst, iRegL src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "ror    $dst, $src, $shift" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
+            as_Register($shift$$reg));
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// ror expander
+
+instruct rorI_rReg(iRegI dst, iRegI src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "ror    $dst, $src, $shift" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
+            as_Register($shift$$reg));
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+instruct rorL_rReg_Var_C_64(iRegL dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
+%{
+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
+
+  expand %{
+    rorL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rorL_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
+
+  expand %{
+    rorL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rorI_rReg_Var_C_32(iRegL dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
+%{
+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
+
+  expand %{
+    rorL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rorI_rReg_Var_C0(iRegL dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
+%{
+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
+
+  expand %{
+    rorL_rReg(dst, src, shift, cr);
+  %}
+%}
+
+// Add/subtract (extended)
+
+instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (ConvI2L src2)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtw $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%};
+
+instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (ConvI2L src2)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, sxtw $src2" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%};
+
+
+instruct AddExtI_sxth(iRegINoSp dst, iRegI src1, iRegI src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxth $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtI_sxtb(iRegINoSp dst, iRegI src1, iRegI src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtI_uxtb(iRegINoSp dst, iRegI src1, iRegI src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, uxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxth $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtw $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, sxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::sxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, uxtb $src2" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+
+instruct AddExtI_uxtb_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtI_uxth_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "addw  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
+%{
+  match(Set dst (AddL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "add  $dst, $src1, $src2, uxtw" %}
+
+   ins_encode %{
+     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtI_uxtb_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtI_uxth_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubI src1 (AndI src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "subw  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, uxtb" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtb);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, uxth" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxth);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
+%{
+  match(Set dst (SubL src1 (AndL src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "sub  $dst, $src1, $src2, uxtw" %}
+
+   ins_encode %{
+     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::uxtw);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// END This section of the file is automatically generated. Do not edit --------------
+
+// ============================================================================
+// Floating Point Arithmetic Instructions
+
+instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (AddF src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fadds   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fadds(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (AddD src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "faddd   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ faddd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (SubF src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fsubs   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fsubs(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (SubD src1 src2));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fsubd   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fsubd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (MulF src1 src2));
+
+  ins_cost(INSN_COST * 6);
+  format %{ "fmuls   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fmuls(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (MulD src1 src2));
+
+  ins_cost(INSN_COST * 6);
+  format %{ "fmuld   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fmuld(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// We cannot use these fused mul w add/sub ops because they don't
+// produce the same result as the equivalent separated ops
+// (essentially they don't round the intermediate result). that's a
+// shame. leaving them here in case we can idenitfy cases where it is
+// legitimate to use them
+
+
+// instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+//   match(Set dst (AddF (MulF src1 src2) src3));
+
+//   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmadds(as_FloatRegister($dst$$reg),
+//              as_FloatRegister($src1$$reg),
+//              as_FloatRegister($src2$$reg),
+//              as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+//   match(Set dst (AddD (MulD src1 src2) src3));
+
+//   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmaddd(as_FloatRegister($dst$$reg),
+//              as_FloatRegister($src1$$reg),
+//              as_FloatRegister($src2$$reg),
+//              as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+//   match(Set dst (AddF (MulF (NegF src1) src2) src3));
+//   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
+
+//   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmsubs(as_FloatRegister($dst$$reg),
+//               as_FloatRegister($src1$$reg),
+//               as_FloatRegister($src2$$reg),
+//              as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+//   match(Set dst (AddD (MulD (NegD src1) src2) src3));
+//   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
+
+//   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fmsubd(as_FloatRegister($dst$$reg),
+//               as_FloatRegister($src1$$reg),
+//               as_FloatRegister($src2$$reg),
+//               as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+//   match(Set dst (SubF (MulF (NegF src1) src2) src3));
+//   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
+
+//   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fnmadds(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
+//   match(Set dst (SubD (MulD (NegD src1) src2) src3));
+//   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
+
+//   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fnmaddd(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
+//   match(Set dst (SubF (MulF src1 src2) src3));
+
+//   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//     __ fnmsubs(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+// instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
+//   match(Set dst (SubD (MulD src1 src2) src3));
+
+//   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
+
+//   ins_encode %{
+//   // n.b. insn name should be fnmsubd
+//     __ fnmsub(as_FloatRegister($dst$$reg),
+//                as_FloatRegister($src1$$reg),
+//                as_FloatRegister($src2$$reg),
+//                as_FloatRegister($src3$$reg));
+//   %}
+
+//   ins_pipe(pipe_class_default);
+// %}
+
+
+instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (DivF src1  src2));
+
+  ins_cost(INSN_COST * 18);
+  format %{ "fdivs   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fdivs(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
+  match(Set dst (DivD src1  src2));
+
+  ins_cost(INSN_COST * 32);
+  format %{ "fdivd   $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ fdivd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src1$$reg),
+             as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct negF_reg_reg(vRegF dst, vRegF src) %{
+  match(Set dst (NegF src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg   $dst, $src" %}
+
+  ins_encode %{
+    __ fnegs(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct negD_reg_reg(vRegD dst, vRegD src) %{
+  match(Set dst (NegD src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fnegd   $dst, $src" %}
+
+  ins_encode %{
+    __ fnegd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct absF_reg(vRegF dst, vRegF src) %{
+  match(Set dst (AbsF src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fabss   $dst, $src" %}
+  ins_encode %{
+    __ fabss(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct absD_reg(vRegD dst, vRegD src) %{
+  match(Set dst (AbsD src));
+
+  ins_cost(INSN_COST * 3);
+  format %{ "fabsd   $dst, $src" %}
+  ins_encode %{
+    __ fabsd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct sqrtD_reg(vRegD dst, vRegD src) %{
+  match(Set dst (SqrtD src));
+
+  ins_cost(INSN_COST * 50);
+  format %{ "fsqrtd  $dst, $src" %}
+  ins_encode %{
+    __ fsqrtd(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct sqrtF_reg(vRegF dst, vRegF src) %{
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+  ins_cost(INSN_COST * 50);
+  format %{ "fsqrts  $dst, $src" %}
+  ins_encode %{
+    __ fsqrts(as_FloatRegister($dst$$reg),
+             as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// ============================================================================
+// Logical Instructions
+
+// Integer Logical Instructions
+
+// And Instructions
+
+
+instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "andw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "andsw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Or Instructions
+
+instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "orrw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "orrw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orrw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Xor Instructions
+
+instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "eorw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "eorw  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ eorw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Logical Instructions
+// TODO
+
+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
+  match(Set dst (AndL src1 src2));
+
+  format %{ "and  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
+  match(Set dst (AndL src1 src2));
+
+  format %{ "and  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ andr(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Or Instructions
+
+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (OrL src1 src2));
+
+  format %{ "orr  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
+  match(Set dst (OrL src1 src2));
+
+  format %{ "orr  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ orr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Xor Instructions
+
+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (XorL src1 src2));
+
+  format %{ "eor  $dst, $src1, $src2\t# int" %}
+
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
+  match(Set dst (XorL src1 src2));
+
+  ins_cost(INSN_COST);
+  format %{ "eor  $dst, $src1, $src2\t# int" %}
+
+  ins_encode %{
+    __ eor(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           (unsigned long)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
+%{
+  match(Set dst (ConvI2L src));
+
+  ins_cost(INSN_COST);
+  format %{ "sxtw  $dst, $src\t# i2l" %}
+  ins_encode %{
+    __ sbfm($dst$$Register, $src$$Register, 0, 31);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// this pattern occurs in bigmath arithmetic
+instruct convUI2L_reg_reg(iRegLNoSp dst, iRegI src, immL_32bits mask)
+%{
+  match(Set dst (AndL (ConvI2L src) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
+  ins_encode %{
+    __ ubfm($dst$$Register, $src$$Register, 0, 31);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
+  match(Set dst (ConvL2I src));
+
+  ins_cost(INSN_COST);
+  format %{ "movw  $dst, $src \t// l2i" %}
+
+  ins_encode %{
+    __ movw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct convI2B(iRegINoSp dst, iRegI src, rFlagsReg cr)
+%{
+  match(Set dst (Conv2B src));
+  effect(KILL cr);
+
+  format %{
+    "cmpw $src, zr\n\t"
+    "cset $dst, ne"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($src$$reg), zr);
+    __ cset(as_Register($dst$$reg), Assembler::NE);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
+%{
+  match(Set dst (Conv2B src));
+  effect(KILL cr);
+
+  format %{
+    "cmp  $src, zr\n\t"
+    "cset $dst, ne"
+  %}
+
+  ins_encode %{
+    __ cmp(as_Register($src$$reg), zr);
+    __ cset(as_Register($dst$$reg), Assembler::NE);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct convD2F_reg(vRegF dst, vRegD src) %{
+  match(Set dst (ConvD2F src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtd  $dst, $src \t// d2f" %}
+
+  ins_encode %{
+    __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convF2D_reg(vRegD dst, vRegF src) %{
+  match(Set dst (ConvF2D src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvts  $dst, $src \t// f2d" %}
+
+  ins_encode %{
+    __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
+  match(Set dst (ConvF2I src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzsw  $dst, $src \t// f2i" %}
+
+  ins_encode %{
+    __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
+  match(Set dst (ConvF2L src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzs  $dst, $src \t// f2l" %}
+
+  ins_encode %{
+    __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convI2F_reg_reg(vRegF dst, iRegI src) %{
+  match(Set dst (ConvI2F src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfws  $dst, $src \t// i2f" %}
+
+  ins_encode %{
+    __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
+  match(Set dst (ConvL2F src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfs  $dst, $src \t// l2f" %}
+
+  ins_encode %{
+    __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
+  match(Set dst (ConvD2I src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzdw  $dst, $src \t// d2i" %}
+
+  ins_encode %{
+    __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
+  match(Set dst (ConvD2L src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "fcvtzd  $dst, $src \t// d2l" %}
+
+  ins_encode %{
+    __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convI2D_reg_reg(vRegD dst, iRegI src) %{
+  match(Set dst (ConvI2D src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfwd  $dst, $src \t// i2d" %}
+
+  ins_encode %{
+    __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
+  match(Set dst (ConvL2D src));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "scvtfd  $dst, $src \t// l2d" %}
+
+  ins_encode %{
+    __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// stack <-> reg and reg <-> reg shuffles with no conversion
+
+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
+
+  ins_encode %{
+    __ ldrw($dst$$Register, Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
+
+  ins_encode %{
+    __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
+
+  ins_encode %{
+    __ ldr($dst$$Register, Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(4 * INSN_COST);
+
+  format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
+
+  ins_encode %{
+    __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
+
+  ins_encode %{
+    __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
+
+  ins_encode %{
+    __ strw($src$$Register, Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
+
+  ins_encode %{
+    __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
+
+  ins_encode %{
+    __ str($src$$Register, Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
+
+  ins_encode %{
+    __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
+
+  ins_encode %{
+    __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
+
+  ins_encode %{
+    __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(INSN_COST);
+
+  format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
+
+  ins_encode %{
+    __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+// ============================================================================
+// clearing of an array
+
+instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
+%{
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL cnt, USE_KILL base);
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ClearArray $cnt, $base" %}
+
+  ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// ============================================================================
+// Overflow Math Instructions
+
+instruct overflowAddI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+
+  format %{ "cmnw  $op1, $op2\t# overflow check int" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmnw($op1$$Register, $op2$$Register);
+  %}
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct overflowAddI_reg_imm(rFlagsReg cr, iRegI op1, immIAddSub op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+
+  format %{ "cmnw  $op1, $op2\t# overflow check int" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmnw($op1$$Register, $op2$$constant);
+  %}
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
+%{
+  match(Set cr (OverflowAddL op1 op2));
+
+  format %{ "cmn   $op1, $op2\t# overflow check long" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmn($op1$$Register, $op2$$Register);
+  %}
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
+%{
+  match(Set cr (OverflowAddL op1 op2));
+
+  format %{ "cmn   $op1, $op2\t# overflow check long" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmn($op1$$Register, $op2$$constant);
+  %}
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct overflowSubI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "cmpw  $op1, $op2\t# overflow check int" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmpw($op1$$Register, $op2$$Register);
+  %}
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct overflowSubI_reg_imm(rFlagsReg cr, iRegI op1, immIAddSub op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "cmpw  $op1, $op2\t# overflow check int" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmpw($op1$$Register, $op2$$constant);
+  %}
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
+%{
+  match(Set cr (OverflowSubL op1 op2));
+
+  format %{ "cmp   $op1, $op2\t# overflow check long" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$Register);
+  %}
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
+%{
+  match(Set cr (OverflowSubL op1 op2));
+
+  format %{ "cmp   $op1, $op2\t# overflow check long" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$constant);
+  %}
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegI op1)
+%{
+  match(Set cr (OverflowSubI zero op1));
+
+  format %{ "cmpw  zr, $op1\t# overflow check int" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmpw(zr, $op1$$Register);
+  %}
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
+%{
+  match(Set cr (OverflowSubL zero op1));
+
+  format %{ "cmp   zr, $op1\t# overflow check long" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ cmp(zr, $op1$$Register);
+  %}
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct overflowMulI_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+
+  format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
+            "cmp   rscratch1, rscratch1, sxtw\n\t"
+            "movw  rscratch1, #0x80000000\n\t"
+            "cselw rscratch1, rscratch1, zr, NE\n\t"
+            "cmpw  rscratch1, #1" %}
+  ins_cost(5 * INSN_COST);
+  ins_encode %{
+    __ smull(rscratch1, $op1$$Register, $op2$$Register);
+    __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
+    __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
+    __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
+    __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct overflowMulI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, rFlagsReg cr)
+%{
+  match(If cmp (OverflowMulI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
+            || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
+  effect(USE labl, KILL cr);
+
+  format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
+            "cmp   rscratch1, rscratch1, sxtw\n\t"
+            "b$cmp   $labl" %}
+  ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ smull(rscratch1, $op1$$Register, $op2$$Register);
+    __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
+    __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
+%{
+  match(Set cr (OverflowMulL op1 op2));
+
+  format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
+            "smulh rscratch2, $op1, $op2\n\t"
+            "cmp   rscratch2, rscratch1, ASR #31\n\t"
+            "movw  rscratch1, #0x80000000\n\t"
+            "cselw rscratch1, rscratch1, zr, NE\n\t"
+            "cmpw  rscratch1, #1" %}
+  ins_cost(6 * INSN_COST);
+  ins_encode %{
+    __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
+    __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
+    __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
+    __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
+    __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
+    __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
+%{
+  match(If cmp (OverflowMulL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
+            || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
+  effect(USE labl, KILL cr);
+
+  format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
+            "smulh rscratch2, $op1, $op2\n\t"
+            "cmp   rscratch2, rscratch1, ASR #31\n\t"
+            "b$cmp $labl" %}
+  ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
+    __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
+    __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
+    __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Compare Instructions
+
+instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (CmpI op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmpw(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
+%{
+  match(Set cr (CmpI op1 zero));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw $op1, 0" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
+%{
+  match(Set cr (CmpI op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
+%{
+  match(Set cr (CmpI op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cmpw  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmpw_imm(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+// Unsigned compare Instructions; really, same as signed compare
+// except it should only be used to feed an If or a CMovI which takes a
+// cmpOpU.
+
+instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
+%{
+  match(Set cr (CmpU op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
+%{
+  match(Set cr (CmpU op1 zero));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw $op1, #0\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
+%{
+  match(Set cr (CmpU op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmpw  $op1, $op2\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
+%{
+  match(Set cr (CmpU op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cmpw  $op1, $op2\t# unsigned" %}
+
+  ins_encode(aarch64_enc_cmpw_imm(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
+%{
+  match(Set cr (CmpL op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
+%{
+  match(Set cr (CmpL op1 zero));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "tst  $op1" %}
+
+  ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
+%{
+  match(Set cr (CmpL op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
+%{
+  match(Set cr (CmpL op1 op2));
+
+  effect(DEF cr, USE op1);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "cmp  $op1, $op2" %}
+
+  ins_encode(aarch64_enc_cmp_imm(op1, op2));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
+%{
+  match(Set cr (CmpP op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2\t // ptr" %}
+
+  ins_encode(aarch64_enc_cmpp(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
+%{
+  match(Set cr (CmpN op1 op2));
+
+  effect(DEF cr, USE op1, USE op2);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, $op2\t // compressed ptr" %}
+
+  ins_encode(aarch64_enc_cmpn(op1, op2));
+
+  ins_pipe(icmp_reg_reg);
+%}
+
+instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
+%{
+  match(Set cr (CmpP op1 zero));
+
+  effect(DEF cr, USE op1, USE zero);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, 0\t // ptr" %}
+
+  ins_encode(aarch64_enc_testp(op1));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
+%{
+  match(Set cr (CmpN op1 zero));
+
+  effect(DEF cr, USE op1, USE zero);
+
+  ins_cost(INSN_COST);
+  format %{ "cmp  $op1, 0\t // compressed ptr" %}
+
+  ins_encode(aarch64_enc_testn(op1));
+
+  ins_pipe(icmp_reg_imm);
+%}
+
+// FP comparisons
+//
+// n.b. CmpF/CmpD set a normal flags reg which then gets compared
+// using normal cmpOp. See declaration of rFlagsReg for details.
+
+instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
+%{
+  match(Set cr (CmpF src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmps $src1, $src2" %}
+
+  ins_encode %{
+    __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
+%{
+  match(Set cr (CmpF src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmps $src1, 0.0" %}
+
+  ins_encode %{
+    __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+// FROM HERE
+
+instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
+%{
+  match(Set cr (CmpD src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmpd $src1, $src2" %}
+
+  ins_encode %{
+    __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
+%{
+  match(Set cr (CmpD src1 src2));
+
+  ins_cost(3 * INSN_COST);
+  format %{ "fcmpd $src1, 0.0" %}
+
+  ins_encode %{
+    __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
+%{
+  match(Set dst (CmpF3 src1 src2));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmps $src1, $src2\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    FloatRegister s2 = as_FloatRegister($src2$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmps(s1, s2);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+
+  ins_pipe(pipe_class_default);
+
+%}
+
+instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
+%{
+  match(Set dst (CmpD3 src1 src2));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmpd $src1, $src2\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    FloatRegister s2 = as_FloatRegister($src2$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmpd(s1, s2);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_default);
+
+%}
+
+instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
+%{
+  match(Set dst (CmpF3 src1 zero));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmps $src1, 0.0\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmps(s1, 0.0D);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+
+  ins_pipe(pipe_class_default);
+
+%}
+
+instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
+%{
+  match(Set dst (CmpD3 src1 zero));
+  effect(KILL cr);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fcmpd $src1, 0.0\n\t"
+            "csinvw($dst, zr, zr, eq\n\t"
+            "csnegw($dst, $dst, $dst, lt)"
+  %}
+
+  ins_encode %{
+    Label done;
+    FloatRegister s1 = as_FloatRegister($src1$$reg);
+    Register d = as_Register($dst$$reg);
+    __ fcmpd(s1, 0.0D);
+    // installs 0 if EQ else -1
+    __ csinvw(d, zr, zr, Assembler::EQ);
+    // keeps -1 if less or unordered else installs 1
+    __ csnegw(d, d, d, Assembler::LT);
+    __ bind(done);
+  %}
+  ins_pipe(pipe_class_default);
+
+%}
+
+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q, rFlagsReg cr)
+%{
+  match(Set dst (CmpLTMask p q));
+  effect(KILL cr);
+
+  ins_cost(3 * INSN_COST);
+
+  format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
+            "csetw $dst, lt\n\t"
+            "subw $dst, zr, $dst"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
+    __ csetw(as_Register($dst$$reg), Assembler::LT);
+    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegI src, immI0 zero, rFlagsReg cr)
+%{
+  match(Set dst (CmpLTMask src zero));
+  effect(KILL cr);
+
+  ins_cost(INSN_COST);
+
+  format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
+
+  ins_encode %{
+    __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// ============================================================================
+// Max and Min
+
+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+  match(Set dst (MinI src1 src2));
+
+  effect(DEF dst, USE src1, USE src2, KILL cr);
+  size(8);
+
+  ins_cost(INSN_COST * 3);
+  format %{
+    "cmpw $src1 $src2\t signed int\n\t"
+    "cselw $dst, $src1, $src2 lt\t"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($src1$$reg),
+            as_Register($src2$$reg));
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             Assembler::LT);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+// FROM HERE
+
+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+  match(Set dst (MaxI src1 src2));
+
+  effect(DEF dst, USE src1, USE src2, KILL cr);
+  size(8);
+
+  ins_cost(INSN_COST * 3);
+  format %{
+    "cmpw $src1 $src2\t signed int\n\t"
+    "cselw $dst, $src1, $src2 gt\t"
+  %}
+
+  ins_encode %{
+    __ cmpw(as_Register($src1$$reg),
+            as_Register($src2$$reg));
+    __ cselw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             as_Register($src2$$reg),
+             Assembler::GT);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// ============================================================================
+// Branch Instructions
+
+// Direct Branch.
+instruct branch(label lbl)
+%{
+  match(Goto);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b  $lbl" %}
+
+  ins_encode(aarch64_enc_b(lbl));
+
+  ins_pipe(pipe_branch);
+%}
+
+// Conditional Near Branch
+instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
+%{
+  // Same match rule as `branchConFar'.
+  match(If cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // If set to 1 this indicates that the current instruction is a
+  // short variant of a long branch. This avoids using this
+  // instruction in first-pass matching. It will then only be used in
+  // the `Shorten_branches' pass.
+  // ins_short_branch(1);
+  format %{ "b$cmp  $lbl" %}
+
+  ins_encode(aarch64_enc_br_con(cmp, lbl));
+
+  ins_pipe(pipe_branch_cond);
+%}
+
+// Conditional Near Branch Unsigned
+instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
+%{
+  // Same match rule as `branchConFar'.
+  match(If cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // If set to 1 this indicates that the current instruction is a
+  // short variant of a long branch. This avoids using this
+  // instruction in first-pass matching. It will then only be used in
+  // the `Shorten_branches' pass.
+  // ins_short_branch(1);
+  format %{ "b$cmp  $lbl\t# unsigned" %}
+
+  ins_encode(aarch64_enc_br_conU(cmp, lbl));
+
+  ins_pipe(pipe_branch_cond);
+%}
+
+// Make use of CBZ and CBNZ.  These instructions, as well as being
+// shorter than (cmp; branch), have the additional benefit of not
+// killing the flags.
+
+instruct cmpI_imm0_branch(cmpOp cmp, iRegI op1, immI0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cbw$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbzw($op1$$Register, *L);
+    else
+      __ cbnzw($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbz($op1$$Register, *L);
+    else
+      __ cbnz($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpP op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbz($op1$$Register, *L);
+    else
+      __ cbnz($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+// Conditional Far Branch
+// Conditional Far Branch Unsigned
+// TODO: fixme
+
+// counted loop end branch near
+instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
+%{
+  match(CountedLoopEnd cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // short variant.
+  // ins_short_branch(1);
+  format %{ "b$cmp $lbl \t// counted loop end" %}
+
+  ins_encode(aarch64_enc_br_con(cmp, lbl));
+
+  ins_pipe(pipe_branch);
+%}
+
+// counted loop end branch near Unsigned
+instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
+%{
+  match(CountedLoopEnd cmp cr);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  // short variant.
+  // ins_short_branch(1);
+  format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
+
+  ins_encode(aarch64_enc_br_conU(cmp, lbl));
+
+  ins_pipe(pipe_branch);
+%}
+
+// counted loop end branch far
+// counted loop end branch far unsigned
+// TODO: fixme
+
+// ============================================================================
+// inlined locking and unlocking
+
+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+%{
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP tmp2);
+
+  // TODO
+  // identify correct cost
+  ins_cost(5 * INSN_COST);
+  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
+
+  ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+%{
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp, TEMP tmp2);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
+
+  ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
+
+  ins_pipe(pipe_serial);
+%}
+
+
+// ============================================================================
+// Safepoint Instructions
+
+// TODO
+// provide a near and far version of this code
+
+instruct safePoint(iRegP poll)
+%{
+  match(SafePoint poll);
+
+  format %{
+    "ldrw zr, [$poll]\t# Safepoint: poll for GC"
+  %}
+  ins_encode %{
+    __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
+  %}
+  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
+%}
+
+
+// ============================================================================
+// Procedure Call/Return Instructions
+
+// Call Java Static Instruction
+
+instruct CallStaticJavaDirect(method meth)
+%{
+  match(CallStaticJava);
+
+  effect(USE meth);
+
+  predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
+
+  ins_cost(CALL_COST);
+
+  format %{ "call,static $meth \t// ==> " %}
+
+  ins_encode( aarch64_enc_java_static_call(meth),
+              aarch64_enc_call_epilog );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// TO HERE
+
+// Call Java Static Instruction (method handle version)
+
+instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
+%{
+  match(CallStaticJava);
+
+  effect(USE meth);
+
+  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
+
+  ins_cost(CALL_COST);
+
+  format %{ "call,static $meth \t// (methodhandle) ==> " %}
+
+  ins_encode( aarch64_enc_java_handle_call(meth),
+              aarch64_enc_call_epilog );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Java Dynamic Instruction
+instruct CallDynamicJavaDirect(method meth)
+%{
+  match(CallDynamicJava);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL,dynamic $meth \t// ==> " %}
+
+  ins_encode( aarch64_enc_java_dynamic_call(meth),
+               aarch64_enc_call_epilog );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallRuntimeDirect(method meth)
+%{
+  match(CallRuntime);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL, runtime $meth" %}
+
+  ins_encode( aarch64_enc_java_to_runtime(meth) );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafDirect(method meth)
+%{
+  match(CallLeaf);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL, runtime leaf $meth" %}
+
+  ins_encode( aarch64_enc_java_to_runtime(meth) );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafNoFPDirect(method meth)
+%{
+  match(CallLeafNoFP);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL, runtime leaf nofp $meth" %}
+
+  ins_encode( aarch64_enc_java_to_runtime(meth) );
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
+%{
+  match(TailCall jump_target method_oop);
+
+  ins_cost(CALL_COST);
+
+  format %{ "br $jump_target\t# $method_oop holds method oop" %}
+
+  ins_encode(aarch64_enc_tail_call(jump_target));
+
+  ins_pipe(pipe_class_call);
+%}
+
+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
+%{
+  match(TailJump jump_target ex_oop);
+
+  ins_cost(CALL_COST);
+
+  format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
+
+  ins_encode(aarch64_enc_tail_jmp(jump_target));
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler. No code emitted.
+// TODO check
+// should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
+instruct CreateException(iRegP_R0 ex_oop)
+%{
+  match(Set ex_oop (CreateEx));
+
+  format %{ " -- \t// exception oop; no code emitted" %}
+
+  size(0);
+
+  ins_encode( /*empty*/ );
+
+  ins_pipe(pipe_class_empty);
+%}
+
+// Rethrow exception: The exception oop will come in the first
+// argument position. Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException() %{
+  match(Rethrow);
+  ins_cost(CALL_COST);
+
+  format %{ "b rethrow_stub" %}
+
+  ins_encode( aarch64_enc_rethrow() );
+
+  ins_pipe(pipe_class_call);
+%}
+
+
+// Return Instruction
+// epilog node loads ret address into lr as part of frame pop
+instruct Ret()
+%{
+  match(Return);
+
+  format %{ "ret\t// return register" %}
+
+  ins_encode( aarch64_enc_ret() );
+
+  ins_pipe(pipe_branch);
+%}
+
+// Die now.
+instruct ShouldNotReachHere() %{
+  match(Halt);
+
+  ins_cost(CALL_COST);
+  format %{ "ShouldNotReachHere" %}
+
+  ins_encode %{
+    // TODO
+    // implement proper trap call here
+    __ brk(999);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// ============================================================================
+// Partial Subtype Check
+//
+// superklass array for an instance of the superklass.  Set a hidden
+// internal cache on a hit (cache is checked with exposed code in
+// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
+// encoding ALSO sets flags.
+
+instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
+%{
+  match(Set result (PartialSubtypeCheck sub super));
+  effect(KILL cr, KILL temp);
+
+  ins_cost(1100);  // slightly larger than the next version
+  format %{ "partialSubtypeCheck $result, $sub, $super" %}
+
+  ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
+
+  opcode(0x1); // Force zero of result reg on hit
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
+%{
+  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
+  effect(KILL temp, KILL result);
+
+  ins_cost(1100);  // slightly larger than the next version
+  format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
+
+  ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
+
+  opcode(0x0); // Don't zero result reg on hit
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
+                        iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
+%{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
+       iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
+
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      -1, $result$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
+                 immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
+                 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
+
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, zr,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      icnt2, $result$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
+                        iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
+%{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+
+  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
+  ins_encode %{
+    __ string_equals($str1$$Register, $str2$$Register,
+                      $cnt$$Register, $result$$Register,
+                      $tmp$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
+                      iRegP_R10 tmp, rFlagsReg cr)
+%{
+  match(Set result (AryEq ary1 ary2));
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
+
+  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
+  ins_encode %{
+    __ char_arrays_equals($ary1$$Register, $ary2$$Register,
+                          $result$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
+                          vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
+                          vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
+                          iRegI_R0 result, rFlagsReg cr)
+%{
+  match(Set result (EncodeISOArray src (Binary dst len)));
+  effect(USE_KILL src, USE_KILL dst, USE_KILL len,
+         KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
+
+  format %{ "Encode array $src,$dst,$len -> $result" %}
+  ins_encode %{
+    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
+         $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
+         $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
+  %}
+  ins_pipe( pipe_class_memory );
+%}
+
+// ============================================================================
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(thread_RegP dst)
+%{
+  match(Set dst (ThreadLocal));
+
+  ins_cost(0);
+
+  format %{ " -- \t// $dst=Thread::current(), empty" %}
+
+  size(0);
+
+  ins_encode( /*empty*/ );
+
+  ins_pipe(pipe_class_empty);
+%}
+
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser.  An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == RAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(iRegINoSp dst, iRegI src)
+// %{
+//   match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
+// %{
+//   match(Set dst (AddI dst src));
+//   effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+//   // increment preceeded by register-register move
+//   peepmatch ( incI_iReg movI );
+//   // require that the destination register of the increment
+//   // match the destination register of the move
+//   peepconstraint ( 0.dst == 1.dst );
+//   // construct a replacement instruction that sets
+//   // the destination to ( move's source register + one )
+//   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+
+// Implementation no longer uses movX instructions since
+// machine-independent system no longer uses CopyX nodes.
+//
+// peephole
+// %{
+//   peepmatch (incI_iReg movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (decI_iReg movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (addI_iReg_imm movI);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (incL_iReg movL);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (decL_iReg movL);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (addL_iReg_imm movL);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
+// %}
+
+// peephole
+// %{
+//   peepmatch (addP_iReg_imm movP);
+//   peepconstraint (0.dst == 1.dst);
+//   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
+// %}
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, iRegI src)
+// %{
+//   match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(iRegINoSp dst, memory mem)
+// %{
+//   match(Set dst (LoadI mem));
+// %}
+//
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
+// Local Variables:
+// mode: c++
+// End:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64_ad.m4	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,365 @@
+dnl Copyright (c) 2014, Red Hat Inc. All rights reserved.
+dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+dnl
+dnl This code is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU General Public License version 2 only, as
+dnl published by the Free Software Foundation.
+dnl
+dnl This code is distributed in the hope that it will be useful, but WITHOUT
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl version 2 for more details (a copy is included in the LICENSE file that
+dnl accompanied this code).
+dnl
+dnl You should have received a copy of the GNU General Public License version
+dnl 2 along with this work; if not, write to the Free Software Foundation,
+dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+dnl
+dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+dnl or visit www.oracle.com if you need additional information or have any
+dnl questions.
+dnl
+dnl 
+dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic
+dnl and shift patterns patterns used in aarch64.ad.
+dnl
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
+define(`BASE_SHIFT_INSN',
+`
+instruct $2$1_reg_$4_reg(iReg$1NoSp dst,
+                         iReg$1 src1, iReg$1 src2,
+                         immI src3, rFlagsReg cr) %{
+  match(Set dst ($2$1 src1 ($4$1 src2 src3)));
+
+  ins_cost(1.9 * INSN_COST);
+  format %{ "$3  $dst, $src1, $src2, $5 $src3" %}
+
+  ins_encode %{
+    __ $3(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::$5,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}')dnl
+define(`BASE_INVERTED_INSN',
+`
+instruct $2$1_reg_not_reg(iReg$1NoSp dst,
+                         iReg$1 src1, iReg$1 src2, imm$1_M1 m1,
+                         rFlagsReg cr) %{
+dnl This ifelse is because hotspot reassociates (xor (xor ..)..)
+dnl into this canonical form.
+  ifelse($2,Xor,
+    match(Set dst (Xor$1 m1 (Xor$1 src2 src1)));,
+    match(Set dst ($2$1 src1 (Xor$1 src2 m1)));)
+  ins_cost(INSN_COST);
+  format %{ "$3  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ $3(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}')dnl
+define(`INVERTED_SHIFT_INSN',
+`
+instruct $2$1_reg_$4_not_reg(iReg$1NoSp dst,
+                         iReg$1 src1, iReg$1 src2,
+                         immI src3, imm$1_M1 src4, rFlagsReg cr) %{
+dnl This ifelse is because hotspot reassociates (xor (xor ..)..)
+dnl into this canonical form.
+  ifelse($2,Xor,
+    match(Set dst ($2$1 src4 (Xor$1($4$1 src2 src3) src1)));,
+    match(Set dst ($2$1 src1 (Xor$1($4$1 src2 src3) src4)));)
+  ins_cost(1.9 * INSN_COST);
+  format %{ "$3  $dst, $src1, $src2, $5 $src3" %}
+
+  ins_encode %{
+    __ $3(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              as_Register($src2$$reg),
+              Assembler::$5,
+              $src3$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_reg_shift);
+%}')dnl
+define(`NOT_INSN',
+`instruct reg$1_not_reg(iReg$1NoSp dst,
+                         iReg$1 src1, imm$1_M1 m1,
+                         rFlagsReg cr) %{
+  match(Set dst (Xor$1 src1 m1));
+  ins_cost(INSN_COST);
+  format %{ "$2  $dst, $src1, zr" %}
+
+  ins_encode %{
+    __ $2(as_Register($dst$$reg),
+              as_Register($src1$$reg),
+              zr,
+              Assembler::LSL, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}')dnl
+dnl
+define(`BOTH_SHIFT_INSNS',
+`BASE_SHIFT_INSN(I, $1, ifelse($2,andr,andw,$2w), $3, $4)
+BASE_SHIFT_INSN(L, $1, $2, $3, $4)')dnl
+dnl
+define(`BOTH_INVERTED_INSNS',
+`BASE_INVERTED_INSN(I, $1, $2, $3, $4)
+BASE_INVERTED_INSN(L, $1, $2, $3, $4)')dnl
+dnl
+define(`BOTH_INVERTED_SHIFT_INSNS',
+`INVERTED_SHIFT_INSN(I, $1, $2w, $3, $4, ~0, int)
+INVERTED_SHIFT_INSN(L, $1, $2, $3, $4, ~0l, long)')dnl
+dnl
+define(`ALL_SHIFT_KINDS',
+`BOTH_SHIFT_INSNS($1, $2, URShift, LSR)
+BOTH_SHIFT_INSNS($1, $2, RShift, ASR)
+BOTH_SHIFT_INSNS($1, $2, LShift, LSL)')dnl
+dnl
+define(`ALL_INVERTED_SHIFT_KINDS',
+`BOTH_INVERTED_SHIFT_INSNS($1, $2, URShift, LSR)
+BOTH_INVERTED_SHIFT_INSNS($1, $2, RShift, ASR)
+BOTH_INVERTED_SHIFT_INSNS($1, $2, LShift, LSL)')dnl
+dnl
+NOT_INSN(L, eon)
+NOT_INSN(I, eonw)
+BOTH_INVERTED_INSNS(And, bic)
+BOTH_INVERTED_INSNS(Or, orn)
+BOTH_INVERTED_INSNS(Xor, eon)
+ALL_INVERTED_SHIFT_KINDS(And, bic)
+ALL_INVERTED_SHIFT_KINDS(Xor, eon)
+ALL_INVERTED_SHIFT_KINDS(Or, orn)
+ALL_SHIFT_KINDS(And, andr)
+ALL_SHIFT_KINDS(Xor, eor)
+ALL_SHIFT_KINDS(Or, orr)
+ALL_SHIFT_KINDS(Add, add)
+ALL_SHIFT_KINDS(Sub, sub)
+dnl
+dnl EXTEND mode, rshift_op, src, lshift_count, rshift_count
+define(`EXTEND', `($2$1 (LShift$1 $3 $4) $5)')
+define(`BFM_INSN',`
+// Shift Left followed by Shift Right.
+// This idiom is used by the compiler for the i2b bytecode etc.
+instruct $4$1(iReg$1NoSp dst, iReg$1 src, immI lshift_count, immI rshift_count)
+%{
+  match(Set dst EXTEND($1, $3, src, lshift_count, rshift_count));
+  // Make sure we are not going to exceed what $4 can do.
+  predicate((unsigned int)n->in(2)->get_int() <= $2
+            && (unsigned int)n->in(1)->in(2)->get_int() <= $2);
+
+  ins_cost(INSN_COST * 2);
+  format %{ "$4  $dst, $src, $rshift_count - $lshift_count, #$2 - $lshift_count" %}
+  ins_encode %{
+    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
+    int s = $2 - lshift;
+    int r = (rshift - lshift) & $2;
+    __ $4(as_Register($dst$$reg),
+	    as_Register($src$$reg),
+	    r, s);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}')
+BFM_INSN(L, 63, RShift, sbfm)
+BFM_INSN(I, 31, RShift, sbfmw)
+BFM_INSN(L, 63, URShift, ubfm)
+BFM_INSN(I, 31, URShift, ubfmw)
+dnl
+// Bitfield extract with shift & mask
+define(`BFX_INSN',
+`instruct $3$1(iReg$1NoSp dst, iReg$1 src, immI rshift, imm$1_bitmask mask)
+%{
+  match(Set dst (And$1 ($2$1 src rshift) mask));
+
+  ins_cost(INSN_COST);
+  format %{ "$3 $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ $3(as_Register($dst$$reg),
+	    as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}')
+BFX_INSN(I,URShift,ubfxw)
+BFX_INSN(L,URShift,ubfx)
+
+// We can use ubfx when extending an And with a mask when we know mask
+// is positive.  We know that because immI_bitmask guarantees it.
+instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
+%{
+  match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
+
+  ins_cost(INSN_COST * 2);
+  format %{ "ubfx $dst, $src, $mask" %}
+  ins_encode %{
+    int rshift = $rshift$$constant;
+    long mask = $mask$$constant;
+    int width = exact_log2(mask+1);
+    __ ubfx(as_Register($dst$$reg),
+	    as_Register($src$$reg), rshift, width);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Rotations
+
+define(`EXTRACT_INSN',
+`instruct extr$3$1(iReg$1NoSp dst, iReg$1 src1, iReg$1 src2, immI lshift, immI rshift, rFlagsReg cr)
+%{
+  match(Set dst ($3$1 (LShift$1 src1 lshift) (URShift$1 src2 rshift)));
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & $2));
+
+  ins_cost(INSN_COST);
+  format %{ "extr $dst, $src1, $src2, #$rshift" %}
+
+  ins_encode %{
+    __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
+            $rshift$$constant & $2);
+  %}
+  ins_pipe(ialu_reg_reg_extr);
+%}
+')dnl
+EXTRACT_INSN(L, 63, Or, extr)
+EXTRACT_INSN(I, 31, Or, extrw)
+EXTRACT_INSN(L, 63, Add, extr)
+EXTRACT_INSN(I, 31, Add, extrw)
+define(`ROL_EXPAND', `
+// $2 expander
+
+instruct $2$1_rReg(iReg$1 dst, iReg$1 src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "$2    $dst, $src, $shift" %}
+  ins_cost(INSN_COST * 3);
+  ins_encode %{
+    __ subw(rscratch1, zr, as_Register($shift$$reg));
+    __ $3(as_Register($dst$$reg), as_Register($src$$reg),
+	    rscratch1);
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}')dnl
+define(`ROR_EXPAND', `
+// $2 expander
+
+instruct $2$1_rReg(iReg$1 dst, iReg$1 src, iRegI shift, rFlagsReg cr)
+%{
+  effect(DEF dst, USE src, USE shift);
+
+  format %{ "$2    $dst, $src, $shift" %}
+  ins_cost(INSN_COST);
+  ins_encode %{
+    __ $3(as_Register($dst$$reg), as_Register($src$$reg),
+	    as_Register($shift$$reg));
+    %}
+  ins_pipe(ialu_reg_reg_vshift);
+%}')dnl
+define(ROL_INSN, `
+instruct $3$1_rReg_Var_C$2(iRegL dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr)
+%{
+  match(Set dst (Or$1 (LShift$1 src shift) (URShift$1 src (SubI c$2 shift))));
+
+  expand %{
+    $3L_rReg(dst, src, shift, cr);
+  %}
+%}')dnl
+define(ROR_INSN, `
+instruct $3$1_rReg_Var_C$2(iRegL dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr)
+%{
+  match(Set dst (Or$1 (URShift$1 src shift) (LShift$1 src (SubI c$2 shift))));
+
+  expand %{
+    $3L_rReg(dst, src, shift, cr);
+  %}
+%}')dnl
+ROL_EXPAND(L, rol, rorv)
+ROL_EXPAND(I, rol, rorvw)
+ROL_INSN(L, _64, rol)
+ROL_INSN(L, 0, rol)
+ROL_INSN(I, _32, rol)
+ROL_INSN(I, 0, rol)
+ROR_EXPAND(L, ror, rorv)
+ROR_EXPAND(I, ror, rorvw)
+ROR_INSN(L, _64, ror)
+ROR_INSN(L, 0, ror)
+ROR_INSN(I, _32, ror)
+ROR_INSN(I, 0, ror)
+
+// Add/subtract (extended)
+dnl ADD_SUB_EXTENDED(mode, size, add node, shift node, insn, shift type, wordsize
+define(`ADD_SUB_CONV', `
+instruct $3Ext$1(iReg$2NoSp dst, iReg$2 src1, iReg$1orL2I src2, rFlagsReg cr)
+%{
+  match(Set dst ($3$2 src1 (ConvI2L src2)));
+  ins_cost(INSN_COST);
+  format %{ "$4  $dst, $src1, $5 $src2" %}
+
+   ins_encode %{
+     __ $4(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::$5);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}')dnl
+ADD_SUB_CONV(I,L,Add,add,sxtw);
+ADD_SUB_CONV(I,L,Sub,sub,sxtw);
+dnl
+define(`ADD_SUB_EXTENDED', `
+instruct $3Ext$1_$6(iReg$1NoSp dst, iReg$1 src1, iReg$1 src2, immI_`'eval($7-$2) lshift, immI_`'eval($7-$2) rshift, rFlagsReg cr)
+%{
+  match(Set dst ($3$1 src1 EXTEND($1, $4, src2, lshift, rshift)));
+  ins_cost(INSN_COST);
+  format %{ "$5  $dst, $src1, $6 $src2" %}
+
+   ins_encode %{
+     __ $5(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::$6);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}')
+ADD_SUB_EXTENDED(I,16,Add,RShift,add,sxth,32)
+ADD_SUB_EXTENDED(I,8,Add,RShift,add,sxtb,32)
+ADD_SUB_EXTENDED(I,8,Add,URShift,add,uxtb,32)
+ADD_SUB_EXTENDED(L,16,Add,RShift,add,sxth,64)
+ADD_SUB_EXTENDED(L,32,Add,RShift,add,sxtw,64)
+ADD_SUB_EXTENDED(L,8,Add,RShift,add,sxtb,64)
+ADD_SUB_EXTENDED(L,8,Add,URShift,add,uxtb,64)
+dnl
+dnl ADD_SUB_ZERO_EXTEND(mode, size, add node, insn, shift type)
+define(`ADD_SUB_ZERO_EXTEND', `
+instruct $3Ext$1_$5_and(iReg$1NoSp dst, iReg$1 src1, iReg$1 src2, imm$1_$2 mask, rFlagsReg cr)
+%{
+  match(Set dst ($3$1 src1 (And$1 src2 mask)));
+  ins_cost(INSN_COST);
+  format %{ "$4  $dst, $src1, $src2, $5" %}
+
+   ins_encode %{
+     __ $4(as_Register($dst$$reg), as_Register($src1$$reg),
+            as_Register($src2$$reg), ext::$5);
+   %}
+  ins_pipe(ialu_reg_reg);
+%}')
+dnl
+ADD_SUB_ZERO_EXTEND(I,255,Add,addw,uxtb)
+ADD_SUB_ZERO_EXTEND(I,65535,Add,addw,uxth)
+ADD_SUB_ZERO_EXTEND(L,255,Add,add,uxtb)
+ADD_SUB_ZERO_EXTEND(L,65535,Add,add,uxth)
+ADD_SUB_ZERO_EXTEND(L,4294967295,Add,add,uxtw)
+dnl
+ADD_SUB_ZERO_EXTEND(I,255,Sub,subw,uxtb)
+ADD_SUB_ZERO_EXTEND(I,65535,Sub,subw,uxth)
+ADD_SUB_ZERO_EXTEND(L,255,Sub,sub,uxtb)
+ADD_SUB_ZERO_EXTEND(L,65535,Sub,sub,uxth)
+ADD_SUB_ZERO_EXTEND(L,4294967295,Sub,sub,uxtw)
+
+// END This section of the file is automatically generated. Do not edit --------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/ad_encode.m4	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,98 @@
+dnl Copyright (c) 2014, Red Hat Inc. All rights reserved.
+dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+dnl
+dnl This code is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU General Public License version 2 only, as
+dnl published by the Free Software Foundation.
+dnl
+dnl This code is distributed in the hope that it will be useful, but WITHOUT
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl version 2 for more details (a copy is included in the LICENSE file that
+dnl accompanied this code).
+dnl
+dnl You should have received a copy of the GNU General Public License version
+dnl 2 along with this work; if not, write to the Free Software Foundation,
+dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+dnl
+dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+dnl or visit www.oracle.com if you need additional information or have any
+dnl questions.
+dnl
+dnl 
+dnl Process this file with m4 ad_encode.m4 to generate the load/store
+dnl patterns used in aarch64.ad.
+dnl
+define(choose, `loadStore($1, &MacroAssembler::$3, $2, $4,
+               $5, $6, $7, $8);dnl
+
+  %}')dnl
+define(access, `
+    $3Register $1_reg = as_$3Register($$1$$reg);
+    $4choose(MacroAssembler(&cbuf), $1_reg,$2,$mem->opcode(),
+        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl
+define(load,`
+  enc_class aarch64_enc_$2($1 dst, memory mem) %{dnl
+access(dst,$2,$3)')dnl
+load(iRegI,ldrsbw)
+load(iRegI,ldrsb)
+load(iRegI,ldrb)
+load(iRegL,ldrb)
+load(iRegI,ldrshw)
+load(iRegI,ldrsh)
+load(iRegI,ldrh)
+load(iRegL,ldrh)
+load(iRegI,ldrw)
+load(iRegL,ldrw)
+load(iRegL,ldrsw)
+load(iRegL,ldr)
+load(vRegF,ldrs,Float)
+load(vRegD,ldrd,Float)
+define(STORE,`
+  enc_class aarch64_enc_$2($1 src, memory mem) %{dnl
+access(src,$2,$3,$4)')dnl
+define(STORE0,`
+  enc_class aarch64_enc_$2`'0(memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    choose(_masm,zr,$2,$mem->opcode(),
+        as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl
+STORE(iRegI,strb)
+STORE0(iRegI,strb)
+STORE(iRegI,strh)
+STORE0(iRegI,strh)
+STORE(iRegI,strw)
+STORE0(iRegI,strw)
+STORE(iRegL,str,,
+`// we sometimes get asked to store the stack pointer into the
+    // current thread -- we cannot do that directly on AArch64
+    if (src_reg == r31_sp) {
+      MacroAssembler _masm(&cbuf);
+      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
+      __ mov(rscratch2, sp);
+      src_reg = rscratch2;
+    }
+    ')
+STORE0(iRegL,str)
+STORE(vRegF,strs,Float)
+STORE(vRegD,strd,Float)
+
+  enc_class aarch64_enc_strw_immn(immN src, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    address con = (address)$src$$constant;
+    // need to do this the hard way until we can manage relocs
+    // for 32 bit constants
+    __ movoop(rscratch2, (jobject)con);
+    if (con) __ encode_heap_oop_not_null(rscratch2);
+    choose(_masm,rscratch2,strw,$mem->opcode(),
+        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)
+
+  enc_class aarch64_enc_strw_immnk(immN src, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    address con = (address)$src$$constant;
+    // need to do this the hard way until we can manage relocs
+    // for 32 bit constants
+    __ movoop(rscratch2, (jobject)con);
+    __ encode_klass_not_null(rscratch2);
+    choose(_masm,rscratch2,strw,$mem->opcode(),
+        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+
+#define __ ce->masm()->
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  ce->store_parameter(_method->as_register(), 1);
+  ce->store_parameter(_bci, 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+                               bool throw_index_out_of_bounds_exception)
+  : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+  , _index(index)
+{
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_info->deoptimize_on_exception()) {
+    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+    __ far_call(RuntimeAddress(a));
+    ce->add_call_info_here(_info);
+    ce->verify_oop_map(_info);
+    debug_only(__ should_not_reach_here());
+    return;
+  }
+
+  if (_index->is_cpu_register()) {
+    __ mov(rscratch1, _index->as_register());
+  } else {
+    __ mov(rscratch1, _index->as_jint());
+  }
+  Runtime1::StubID stub_id;
+  if (_throw_index_out_of_bounds_exception) {
+    stub_id = Runtime1::throw_index_exception_id;
+  } else {
+    stub_id = Runtime1::throw_range_check_failed_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id)), NULL, rscratch2);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+  _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  __ far_call(RuntimeAddress(a));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+#ifdef ASSERT
+  __ should_not_reach_here();
+#endif
+}
+
+
+
+// Implementation of NewInstanceStub
+
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id   = stub_id;
+}
+
+
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  __ mov(r3, _klass_reg->as_register());
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == r0, "result must in r0,");
+  __ b(_continuation);
+}
+
+
+// Implementation of NewTypeArrayStub
+
+// Implementation of NewTypeArrayStub
+
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == r19, "length must in r19,");
+  assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == r0, "result must in r0");
+  __ b(_continuation);
+}
+
+
+// Implementation of NewObjectArrayStub
+
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _result = result;
+  _length = length;
+  _info = new CodeEmitInfo(info);
+}
+
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == r19, "length must in r19,");
+  assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == r0, "result must in r0");
+  __ b(_continuation);
+}
+// Implementation of MonitorAccessStubs
+
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+: MonitorAccessStub(obj_reg, lock_reg)
+{
+  _info = new CodeEmitInfo(info);
+}
+
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  ce->store_parameter(_obj_reg->as_register(),  1);
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  Runtime1::StubID enter_id;
+  if (ce->compilation()->has_fpu_code()) {
+    enter_id = Runtime1::monitorenter_id;
+  } else {
+    enter_id = Runtime1::monitorenter_nofpu_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_compute_lock) {
+    // lock_reg was destroyed by fast unlocking attempt => recompute it
+    ce->monitor_address(_monitor_ix, _lock_reg);
+  }
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  // note: non-blocking leaf routine => no call info needed
+  Runtime1::StubID exit_id;
+  if (ce->compilation()->has_fpu_code()) {
+    exit_id = Runtime1::monitorexit_id;
+  } else {
+    exit_id = Runtime1::monitorexit_nofpu_id;
+  }
+  __ adr(lr, _continuation);
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
+}
+
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
+// - Replace original code with a call to the stub
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {
+}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  assert(false, "AArch64 should not use C1 runtime patching");
+}
+
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+  ce->add_call_info_here(_info);
+  DEBUG_ONLY(__ should_not_reach_here());
+}
+
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  address a;
+  if (_info->deoptimize_on_exception()) {
+    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  } else {
+    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+  }
+
+  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  __ bind(_entry);
+  __ far_call(RuntimeAddress(a));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+
+  __ bind(_entry);
+  // pass the object in a scratch register because all other registers
+  // must be preserved
+  if (_obj->is_cpu_register()) {
+    __ mov(rscratch1, _obj->as_register());
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, rscratch2);
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  //---------------slow case: call to native-----------------
+  __ bind(_entry);
+  // Figure out where the args should go
+  // This should really convert the IntrinsicID to the Method* and signature
+  // but I don't know how to do that.
+  //
+  VMRegPair args[5];
+  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
+  SharedRuntime::java_calling_convention(signature, args, 5, true);
+
+  // push parameters
+  // (src, src_pos, dest, destPos, length)
+  Register r[5];
+  r[0] = src()->as_register();
+  r[1] = src_pos()->as_register();
+  r[2] = dst()->as_register();
+  r[3] = dst_pos()->as_register();
+  r[4] = length()->as_register();
+
+  // next registers will get stored on the stack
+  for (int i = 0; i < 5 ; i++ ) {
+    VMReg r_1 = args[i].first();
+    if (r_1->is_stack()) {
+      int st_off = r_1->reg2stack() * wordSize;
+      __ str (r[i], Address(sp, st_off));
+    } else {
+      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
+    }
+  }
+
+  ce->align_call(lir_static_call);
+
+  ce->emit_static_call_stub();
+  Address resolve(SharedRuntime::get_resolve_static_call_stub(),
+                  relocInfo::static_call_type);
+  __ trampoline_call(resolve);
+  ce->add_call_info_here(info());
+
+#ifndef PRODUCT
+  __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
+  __ incrementw(Address(rscratch2));
+#endif
+
+  __ b(_continuation);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+  __ cbz(pre_val_reg, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ b(_continuation);
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ cbz(new_val_reg, _continuation);
+  ce->store_parameter(addr()->as_pointer_register(), 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ b(_continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+/////////////////////////////////////////////////////////////////////////////
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_Defs_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP
+
+// native word offsets from memory address (little endian)
+enum {
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerWord
+};
+
+// explicit rounding operations are required to implement the strictFP mode
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+// FIXME: There are no callee-saved
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,       // number of registers used during code emission
+  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers,  // number of registers used during code emission
+
+  pd_nof_caller_save_cpu_regs_frame_map = 19 - 2,  // number of registers killed by calls
+  pd_nof_caller_save_fpu_regs_frame_map = 32,  // number of registers killed by calls
+
+  pd_first_callee_saved_reg = 19 - 2,
+  pd_last_callee_saved_reg = 26 - 2,
+
+  pd_last_allocatable_cpu_reg = 16,
+
+  pd_nof_cpu_regs_reg_alloc
+    = pd_last_allocatable_cpu_reg + 1,  // number of registers that are visible to register allocator
+  pd_nof_fpu_regs_reg_alloc = 8,  // number of registers that are visible to register allocator
+
+  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
+  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
+  pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg = 16,
+  pd_first_byte_reg = 0,
+  pd_last_byte_reg = 16,
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
+
+  pd_first_callee_saved_fpu_reg = 8 + pd_first_fpu_reg,
+  pd_last_callee_saved_fpu_reg = 15 + pd_first_fpu_reg,
+};
+
+
+// Encoding of float value in debug info.  This is true on x86 where
+// floats are extended to doubles when stored in the stack, false for
+// AArch64 where floats and doubles are stored in their native form.
+enum {
+  pd_float_saved_as_double = false
+};
+
+#endif // CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FpuStackSim.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "utilities/array.hpp"
+#include "utilities/ostream.hpp"
+
+//--------------------------------------------------------
+//               FpuStackSim
+//--------------------------------------------------------
+
+// No FPU stack on AARCH64
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_FPUSTACKSIM_HPP
+#define CPU_AARCH64_VM_C1_FPUSTACKSIM_HPP
+
+// No FPU stack on AARCH64
+class FpuStackSim;
+
+#endif // CPU_AARCH64_VM_C1_FPUSTACKSIM_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_FrameMap_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
+  } else if (r_1->is_Register()) {
+    Register reg = r_1->as_Register();
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      Register reg2 = r_2->as_Register();
+      assert(reg2 == reg, "must be same register");
+      opr = as_long_opr(reg);
+    } else if (type == T_OBJECT || type == T_ARRAY) {
+      opr = as_oop_opr(reg);
+    } else if (type == T_METADATA) {
+      opr = as_metadata_opr(reg);
+    } else {
+      opr = as_opr(reg);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    int num = r_1->as_FloatRegister()->encoding();
+    if (type == T_FLOAT) {
+      opr = LIR_OprFact::single_fpu(num);
+    } else {
+      opr = LIR_OprFact::double_fpu(num);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  return opr;
+}
+
+LIR_Opr FrameMap::r0_opr;
+LIR_Opr FrameMap::r1_opr;
+LIR_Opr FrameMap::r2_opr;
+LIR_Opr FrameMap::r3_opr;
+LIR_Opr FrameMap::r4_opr;
+LIR_Opr FrameMap::r5_opr;
+LIR_Opr FrameMap::r6_opr;
+LIR_Opr FrameMap::r7_opr;
+LIR_Opr FrameMap::r8_opr;
+LIR_Opr FrameMap::r9_opr;
+LIR_Opr FrameMap::r10_opr;
+LIR_Opr FrameMap::r11_opr;
+LIR_Opr FrameMap::r12_opr;
+LIR_Opr FrameMap::r13_opr;
+LIR_Opr FrameMap::r14_opr;
+LIR_Opr FrameMap::r15_opr;
+LIR_Opr FrameMap::r16_opr;
+LIR_Opr FrameMap::r17_opr;
+LIR_Opr FrameMap::r18_opr;
+LIR_Opr FrameMap::r19_opr;
+LIR_Opr FrameMap::r20_opr;
+LIR_Opr FrameMap::r21_opr;
+LIR_Opr FrameMap::r22_opr;
+LIR_Opr FrameMap::r23_opr;
+LIR_Opr FrameMap::r24_opr;
+LIR_Opr FrameMap::r25_opr;
+LIR_Opr FrameMap::r26_opr;
+LIR_Opr FrameMap::r27_opr;
+LIR_Opr FrameMap::r28_opr;
+LIR_Opr FrameMap::r29_opr;
+LIR_Opr FrameMap::r30_opr;
+
+LIR_Opr FrameMap::rfp_opr;
+LIR_Opr FrameMap::sp_opr;
+
+LIR_Opr FrameMap::receiver_opr;
+
+LIR_Opr FrameMap::r0_oop_opr;
+LIR_Opr FrameMap::r1_oop_opr;
+LIR_Opr FrameMap::r2_oop_opr;
+LIR_Opr FrameMap::r3_oop_opr;
+LIR_Opr FrameMap::r4_oop_opr;
+LIR_Opr FrameMap::r5_oop_opr;
+LIR_Opr FrameMap::r6_oop_opr;
+LIR_Opr FrameMap::r7_oop_opr;
+LIR_Opr FrameMap::r8_oop_opr;
+LIR_Opr FrameMap::r9_oop_opr;
+LIR_Opr FrameMap::r10_oop_opr;
+LIR_Opr FrameMap::r11_oop_opr;
+LIR_Opr FrameMap::r12_oop_opr;
+LIR_Opr FrameMap::r13_oop_opr;
+LIR_Opr FrameMap::r14_oop_opr;
+LIR_Opr FrameMap::r15_oop_opr;
+LIR_Opr FrameMap::r16_oop_opr;
+LIR_Opr FrameMap::r17_oop_opr;
+LIR_Opr FrameMap::r18_oop_opr;
+LIR_Opr FrameMap::r19_oop_opr;
+LIR_Opr FrameMap::r20_oop_opr;
+LIR_Opr FrameMap::r21_oop_opr;
+LIR_Opr FrameMap::r22_oop_opr;
+LIR_Opr FrameMap::r23_oop_opr;
+LIR_Opr FrameMap::r24_oop_opr;
+LIR_Opr FrameMap::r25_oop_opr;
+LIR_Opr FrameMap::r26_oop_opr;
+LIR_Opr FrameMap::r27_oop_opr;
+LIR_Opr FrameMap::r28_oop_opr;
+LIR_Opr FrameMap::r29_oop_opr;
+LIR_Opr FrameMap::r30_oop_opr;
+
+LIR_Opr FrameMap::rscratch1_opr;
+LIR_Opr FrameMap::rscratch2_opr;
+LIR_Opr FrameMap::rscratch1_long_opr;
+LIR_Opr FrameMap::rscratch2_long_opr;
+
+LIR_Opr FrameMap::r0_metadata_opr;
+LIR_Opr FrameMap::r1_metadata_opr;
+LIR_Opr FrameMap::r2_metadata_opr;
+LIR_Opr FrameMap::r3_metadata_opr;
+LIR_Opr FrameMap::r4_metadata_opr;
+LIR_Opr FrameMap::r5_metadata_opr;
+
+LIR_Opr FrameMap::long0_opr;
+LIR_Opr FrameMap::long1_opr;
+LIR_Opr FrameMap::fpu0_float_opr;
+LIR_Opr FrameMap::fpu0_double_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+//--------------------------------------------------------
+//               FrameMap
+//--------------------------------------------------------
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+
+  int i=0;
+  map_register(i, r0); r0_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r1); r1_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r2); r2_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r3); r3_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r4); r4_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r5); r5_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r6); r6_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r7); r7_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r10); r10_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r11); r11_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r12); r12_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r13); r13_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r14); r14_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r15); r15_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r16); r16_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r17); r17_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r18); r18_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r19); r19_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r20); r20_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r21); r21_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r22); r22_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r23); r23_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r24); r24_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r25); r25_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++;
+
+  map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase
+  map_register(i, r28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread
+  map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp
+  map_register(i, r30); r30_opr = LIR_OprFact::single_cpu(i); i++; // lr
+  map_register(i, r31_sp); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
+  map_register(i, r8); r8_opr = LIR_OprFact::single_cpu(i); i++;   // rscratch1
+  map_register(i, r9); r9_opr = LIR_OprFact::single_cpu(i); i++;   // rscratch2
+
+  rscratch1_opr = r8_opr;
+  rscratch2_opr = r9_opr;
+  rscratch1_long_opr = LIR_OprFact::double_cpu(r8_opr->cpu_regnr(), r8_opr->cpu_regnr());
+  rscratch2_long_opr = LIR_OprFact::double_cpu(r9_opr->cpu_regnr(), r9_opr->cpu_regnr());
+
+  long0_opr = LIR_OprFact::double_cpu(0, 0);
+  long1_opr = LIR_OprFact::double_cpu(1, 1);
+
+  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
+  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
+
+  _caller_save_cpu_regs[0] = r0_opr;
+  _caller_save_cpu_regs[1] = r1_opr;
+  _caller_save_cpu_regs[2] = r2_opr;
+  _caller_save_cpu_regs[3] = r3_opr;
+  _caller_save_cpu_regs[4] = r4_opr;
+  _caller_save_cpu_regs[5] = r5_opr;
+  _caller_save_cpu_regs[6]  = r6_opr;
+  _caller_save_cpu_regs[7]  = r7_opr;
+  // rscratch1, rscratch 2 not included
+  _caller_save_cpu_regs[8] = r10_opr;
+  _caller_save_cpu_regs[9] = r11_opr;
+  _caller_save_cpu_regs[10] = r12_opr;
+  _caller_save_cpu_regs[11] = r13_opr;
+  _caller_save_cpu_regs[12] = r14_opr;
+  _caller_save_cpu_regs[13] = r15_opr;
+  _caller_save_cpu_regs[14] = r16_opr;
+  _caller_save_cpu_regs[15] = r17_opr;
+  _caller_save_cpu_regs[16] = r18_opr;
+
+  for (int i = 0; i < 8; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+
+  _init_done = true;
+
+  r0_oop_opr = as_oop_opr(r0);
+  r1_oop_opr = as_oop_opr(r1);
+  r2_oop_opr = as_oop_opr(r2);
+  r3_oop_opr = as_oop_opr(r3);
+  r4_oop_opr = as_oop_opr(r4);
+  r5_oop_opr = as_oop_opr(r5);
+  r6_oop_opr = as_oop_opr(r6);
+  r7_oop_opr = as_oop_opr(r7);
+  r8_oop_opr = as_oop_opr(r8);
+  r9_oop_opr = as_oop_opr(r9);
+  r10_oop_opr = as_oop_opr(r10);
+  r11_oop_opr = as_oop_opr(r11);
+  r12_oop_opr = as_oop_opr(r12);
+  r13_oop_opr = as_oop_opr(r13);
+  r14_oop_opr = as_oop_opr(r14);
+  r15_oop_opr = as_oop_opr(r15);
+  r16_oop_opr = as_oop_opr(r16);
+  r17_oop_opr = as_oop_opr(r17);
+  r18_oop_opr = as_oop_opr(r18);
+  r19_oop_opr = as_oop_opr(r19);
+  r20_oop_opr = as_oop_opr(r20);
+  r21_oop_opr = as_oop_opr(r21);
+  r22_oop_opr = as_oop_opr(r22);
+  r23_oop_opr = as_oop_opr(r23);
+  r24_oop_opr = as_oop_opr(r24);
+  r25_oop_opr = as_oop_opr(r25);
+  r26_oop_opr = as_oop_opr(r26);
+  r27_oop_opr = as_oop_opr(r27);
+  r28_oop_opr = as_oop_opr(r28);
+  r29_oop_opr = as_oop_opr(r29);
+  r30_oop_opr = as_oop_opr(r30);
+
+  r0_metadata_opr = as_metadata_opr(r0);
+  r1_metadata_opr = as_metadata_opr(r1);
+  r2_metadata_opr = as_metadata_opr(r2);
+  r3_metadata_opr = as_metadata_opr(r3);
+  r4_metadata_opr = as_metadata_opr(r4);
+  r5_metadata_opr = as_metadata_opr(r5);
+
+  sp_opr = as_pointer_opr(r31_sp);
+  rfp_opr = as_pointer_opr(rfp);
+
+  VMRegPair regs;
+  BasicType sig_bt = T_OBJECT;
+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
+  receiver_opr = as_oop_opr(regs.first()->as_Register());
+
+  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  // for rbp, based address use this:
+  // return Address(rbp, in_bytes(sp_offset) - (framesize() - 2) * 4);
+  return Address(sp, in_bytes(sp_offset));
+}
+
+
+// ----------------mapping-----------------------
+// all mapping is based on rfp addressing, except for simple leaf methods where we access
+// the locals sp based (and no frame is built)
+
+
+// Frame for simple leaf methods (quick entries)
+//
+//   +----------+
+//   | ret addr |   <- TOS
+//   +----------+
+//   | args     |
+//   | ......   |
+
+// Frame for standard methods
+//
+//   | .........|  <- TOS
+//   | locals   |
+//   +----------+
+//   |  old fp, |  <- RFP
+//   +----------+
+//   | ret addr |
+//   +----------+
+//   |  args    |
+//   | .........|
+
+
+// For OopMaps, map a local variable or spill index to an VMRegImpl name.
+// This is the offset from sp() in the frame of the slot for the index,
+// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
+//
+//           framesize +
+//           stack0         stack0          0  <- VMReg
+//             |              | <registers> |
+//  ...........|..............|.............|
+//      0 1 2 3 x x 4 5 6 ... |                <- local indices
+//      ^           ^        sp()                 ( x x indicate link
+//      |           |                               and return addr)
+//  arguments   non-argument locals
+
+
+VMReg FrameMap::fpu_regname (int n) {
+  // Return the OptoReg name for the fpu stack slot "n"
+  // A spilled fpu stack slot comprises to two single-word OptoReg's.
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+  return FrameMap::sp_opr;
+}
+
+
+// JSR 292
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  // assert(rfp == rbp_mh_SP_save, "must be same register");
+  return rfp_opr;
+}
+
+
+bool FrameMap::validate_frame() {
+  return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_FrameMap_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP
+
+//  On AArch64 the frame looks as follows:
+//
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+
+ public:
+  static const int pd_c_runtime_reserved_arg_size;
+
+  enum {
+    first_available_sp_in_frame = 0,
+    frame_pad_in_bytes = 16,
+    nof_reg_args = 8
+  };
+
+ public:
+  static LIR_Opr receiver_opr;
+
+  static LIR_Opr r0_opr;
+  static LIR_Opr r1_opr;
+  static LIR_Opr r2_opr;
+  static LIR_Opr r3_opr;
+  static LIR_Opr r4_opr;
+  static LIR_Opr r5_opr;
+  static LIR_Opr r6_opr;
+  static LIR_Opr r7_opr;
+  static LIR_Opr r8_opr;
+  static LIR_Opr r9_opr;
+  static LIR_Opr r10_opr;
+  static LIR_Opr r11_opr;
+  static LIR_Opr r12_opr;
+  static LIR_Opr r13_opr;
+  static LIR_Opr r14_opr;
+  static LIR_Opr r15_opr;
+  static LIR_Opr r16_opr;
+  static LIR_Opr r17_opr;
+  static LIR_Opr r18_opr;
+  static LIR_Opr r19_opr;
+  static LIR_Opr r20_opr;
+  static LIR_Opr r21_opr;
+  static LIR_Opr r22_opr;
+  static LIR_Opr r23_opr;
+  static LIR_Opr r24_opr;
+  static LIR_Opr r25_opr;
+  static LIR_Opr r26_opr;
+  static LIR_Opr r27_opr;
+  static LIR_Opr r28_opr;
+  static LIR_Opr r29_opr;
+  static LIR_Opr r30_opr;
+  static LIR_Opr rfp_opr;
+  static LIR_Opr sp_opr;
+
+  static LIR_Opr r0_oop_opr;
+  static LIR_Opr r1_oop_opr;
+  static LIR_Opr r2_oop_opr;
+  static LIR_Opr r3_oop_opr;
+  static LIR_Opr r4_oop_opr;
+  static LIR_Opr r5_oop_opr;
+  static LIR_Opr r6_oop_opr;
+  static LIR_Opr r7_oop_opr;
+  static LIR_Opr r8_oop_opr;
+  static LIR_Opr r9_oop_opr;
+  static LIR_Opr r10_oop_opr;
+  static LIR_Opr r11_oop_opr;
+  static LIR_Opr r12_oop_opr;
+  static LIR_Opr r13_oop_opr;
+  static LIR_Opr r14_oop_opr;
+  static LIR_Opr r15_oop_opr;
+  static LIR_Opr r16_oop_opr;
+  static LIR_Opr r17_oop_opr;
+  static LIR_Opr r18_oop_opr;
+  static LIR_Opr r19_oop_opr;
+  static LIR_Opr r20_oop_opr;
+  static LIR_Opr r21_oop_opr;
+  static LIR_Opr r22_oop_opr;
+  static LIR_Opr r23_oop_opr;
+  static LIR_Opr r24_oop_opr;
+  static LIR_Opr r25_oop_opr;
+  static LIR_Opr r26_oop_opr;
+  static LIR_Opr r27_oop_opr;
+  static LIR_Opr r28_oop_opr;
+  static LIR_Opr r29_oop_opr;
+  static LIR_Opr r30_oop_opr;
+
+  static LIR_Opr rscratch1_opr;
+  static LIR_Opr rscratch2_opr;
+  static LIR_Opr rscratch1_long_opr;
+  static LIR_Opr rscratch2_long_opr;
+
+  static LIR_Opr r0_metadata_opr;
+  static LIR_Opr r1_metadata_opr;
+  static LIR_Opr r2_metadata_opr;
+  static LIR_Opr r3_metadata_opr;
+  static LIR_Opr r4_metadata_opr;
+  static LIR_Opr r5_metadata_opr;
+
+  static LIR_Opr long0_opr;
+  static LIR_Opr long1_opr;
+  static LIR_Opr fpu0_float_opr;
+  static LIR_Opr fpu0_double_opr;
+
+  static LIR_Opr as_long_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+
+  // VMReg name for spilled physical FPU stack slot n
+  static VMReg fpu_regname (int n);
+
+  static bool is_caller_save_register (LIR_Opr opr) { return true; }
+  static bool is_caller_save_register (Register r) { return true; }
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg()             { return pd_last_cpu_reg;  }
+  static int last_byte_reg()            { return pd_last_byte_reg; }
+
+#endif // CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,3186 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "gc_interface/collectedHeap.hpp"
+#include "memory/barrierSet.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+
+
+
+#ifndef PRODUCT
+#define COMMENT(x)   do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+NEEDS_CLEANUP // remove this definitions ?
+const Register IC_Klass    = rscratch2;   // where the IC klass is cached
+const Register SYNC_header = r0;   // synchronization header
+const Register SHIFT_count = r0;   // where count for shift operations must be
+
+#define __ _masm->
+
+
+static void select_different_registers(Register preserve,
+                                       Register extra,
+                                       Register &tmp1,
+                                       Register &tmp2) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp2 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2);
+}
+
+
+
+static void select_different_registers(Register preserve,
+                                       Register extra,
+                                       Register &tmp1,
+                                       Register &tmp2,
+                                       Register &tmp3) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp2 = extra;
+  } else if (tmp3 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp3 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2, tmp3);
+}
+
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
+
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+  return FrameMap::receiver_opr;
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
+}
+
+//--------------fpu register translations-----------------------
+
+
+address LIR_Assembler::float_constant(float f) {
+  address const_addr = __ float_constant(f);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+
+address LIR_Assembler::double_constant(double d) {
+  address const_addr = __ double_constant(d);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+address LIR_Assembler::int_constant(jlong n) {
+  address const_addr = __ long_constant(n);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
+
+void LIR_Assembler::reset_FPU() { Unimplemented(); }
+
+void LIR_Assembler::fpop() { Unimplemented(); }
+
+void LIR_Assembler::fxch(int i) { Unimplemented(); }
+
+void LIR_Assembler::fld(int i) { Unimplemented(); }
+
+void LIR_Assembler::ffree(int i) { Unimplemented(); }
+
+void LIR_Assembler::breakpoint() { Unimplemented(); }
+
+void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
+
+void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
+
+bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
+//-------------------------------------------
+
+static Register as_reg(LIR_Opr op) {
+  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
+}
+
+static jlong as_long(LIR_Opr data) {
+  jlong result;
+  switch (data->type()) {
+  case T_INT:
+    result = (data->as_jint());
+    break;
+  case T_LONG:
+    result = (data->as_jlong());
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return result;
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
+  Register base = addr->base()->as_pointer_register();
+  LIR_Opr opr = addr->index();
+  if (opr->is_cpu_register()) {
+    Register index;
+    if (opr->is_single_cpu())
+      index = opr->as_register();
+    else
+      index = opr->as_register_lo();
+    assert(addr->disp() == 0, "must be");
+    switch(opr->type()) {
+      case T_INT:
+        return Address(base, index, Address::sxtw(addr->scale()));
+      case T_LONG:
+        return Address(base, index, Address::lsl(addr->scale()));
+      default:
+        ShouldNotReachHere();
+      }
+  } else  {
+    intptr_t addr_offset = intptr_t(addr->disp());
+    if (Address::offset_ok_for_immed(addr_offset, addr->scale()))
+      return Address(base, addr_offset, Address::lsl(addr->scale()));
+    else {
+      __ mov(tmp, addr_offset);
+      return Address(base, tmp, Address::lsl(addr->scale()));
+    }
+  }
+  return Address();
+}
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+  ShouldNotReachHere();
+  return Address();
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+  return as_Address(addr, rscratch1);
+}
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+  return as_Address(addr, rscratch1);  // Ouch
+  // FIXME: This needs to be much more clever.  See x86.
+}
+
+
+void LIR_Assembler::osr_entry() {
+  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+  ValueStack* entry_state = osr_entry->state();
+  int number_of_locks = entry_state->locks_size();
+
+  // we jump here if osr happens with the interpreter
+  // state set up to continue at the beginning of the
+  // loop that triggered osr - in particular, we have
+  // the following registers setup:
+  //
+  // r2: osr buffer
+  //
+
+  // build frame
+  ciMethod* m = compilation()->method();
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[0..number_of_locks]
+  //
+  // locals is a direct copy of the interpreter frame so in the osr buffer
+  // so first slot in the local array is the last local from the interpreter
+  // and last slot is local[0] (receiver) from the interpreter
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method)
+
+  // Initialize monitors in the compiled activation.
+  //   r2: pointer to osr buffer
+  //
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_pointer_register();
+  { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+    int monitor_offset = BytesPerWord * method()->max_locals() +
+      (2 * BytesPerWord) * (number_of_locks - 1);
+    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+    // the OSR buffer using 2 word entries: first the lock and then
+    // the oop.
+    for (int i = 0; i < number_of_locks; i++) {
+      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+#ifdef ASSERT
+      // verify the interpreter's monitor has a non-null object
+      {
+        Label L;
+        __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord));
+        __ cbnz(rscratch1, L);
+        __ stop("locked object is NULL");
+        __ bind(L);
+      }
+#endif
+      __ ldr(r19, Address(OSR_buf, slot_offset + 0));
+      __ str(r19, frame_map()->address_for_monitor_lock(i));
+      __ ldr(r19, Address(OSR_buf, slot_offset + 1*BytesPerWord));
+      __ str(r19, frame_map()->address_for_monitor_object(i));
+    }
+  }
+}
+
+
+// inline cache check; done before the frame is built.
+int LIR_Assembler::check_icache() {
+  Register receiver = FrameMap::receiver_opr->as_register();
+  Register ic_klass = IC_Klass;
+  int start_offset = __ offset();
+  __ inline_cache_check(receiver, ic_klass);
+
+  // if icache check fails, then jump to runtime routine
+  // Note: RECEIVER must still contain the receiver!
+  Label dont;
+  __ br(Assembler::EQ, dont);
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+  // We align the verified entry point unless the method body
+  // (including its inline cache check) will fit in a single 64-byte
+  // icache line.
+  if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
+    // force alignment after the cache check.
+    __ align(CodeEntryAlignment);
+  }
+
+  __ bind(dont);
+  return start_offset;
+}
+
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+  if (o == NULL) {
+    __ mov(reg, zr);
+  } else {
+    __ movoop(reg, o, /*immediate*/true);
+  }
+}
+
+void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
+  address target = NULL;
+  relocInfo::relocType reloc_type = relocInfo::none;
+
+  switch (patching_id(info)) {
+  case PatchingStub::access_field_id:
+    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+    reloc_type = relocInfo::section_word_type;
+    break;
+  case PatchingStub::load_klass_id:
+    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+    reloc_type = relocInfo::metadata_type;
+    break;
+  case PatchingStub::load_mirror_id:
+    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+    reloc_type = relocInfo::oop_type;
+    break;
+  case PatchingStub::load_appendix_id:
+    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+    reloc_type = relocInfo::oop_type;
+    break;
+  default: ShouldNotReachHere();
+  }
+
+  __ far_call(RuntimeAddress(target));
+  add_call_info_here(info);
+}
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  deoptimize_trap(info);
+}
+
+
+// This specifies the rsp decrement needed to build the frame
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+  // if rounding, must let FrameMap know!
+
+  // The frame_map records size in slots (32bit word)
+
+  // subtract two words to account for return address and link
+  return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word))  * VMRegImpl::stack_slot_size;
+}
+
+
+int LIR_Assembler::emit_exception_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(exception_handler_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("exception handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  // the exception oop and pc are in r0, and r3
+  // no other registers need to be preserved, so invalidate them
+  __ invalidate_registers(false, true, true, false, true, true);
+
+  // check that there is really an exception
+  __ verify_not_null_oop(r0);
+
+  // search an exception handler (r0: exception oop, r3: throwing pc)
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));  __ should_not_reach_here();
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+#ifndef PRODUCT
+  if (CommentedAssembly) {
+    _masm->block_comment("Unwind handler");
+  }
+#endif
+
+  int offset = code_offset();
+
+  // Fetch the exception from TLS and clear out exception related thread state
+  __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
+
+  __ bind(_unwind_handler_entry);
+  __ verify_not_null_oop(r0);
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ mov(r19, r0);  // Preserve the exception
+  }
+
+  // Preform needed unlocking
+  MonitorExitStub* stub = NULL;
+  if (method()->is_synchronized()) {
+    monitor_address(0, FrameMap::r0_opr);
+    stub = new MonitorExitStub(FrameMap::r0_opr, true, 0);
+    __ unlock_object(r5, r4, r0, *stub->entry());
+    __ bind(*stub->continuation());
+  }
+
+  if (compilation()->env()->dtrace_method_probes()) {
+    __ call_Unimplemented();
+#if 0
+    __ movptr(Address(rsp, 0), rax);
+    __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding());
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
+#endif
+  }
+
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ mov(r0, r19);  // Restore the exception
+  }
+
+  // remove the activation and dispatch to the unwind handler
+  __ block_comment("remove_frame and dispatch to the unwind handler");
+  __ remove_frame(initial_frame_size_in_bytes());
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+
+  // Emit the slow path assembly
+  if (stub != NULL) {
+    stub->emit_code(this);
+  }
+
+  return offset;
+}
+
+
+int LIR_Assembler::emit_deopt_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(deopt_handler_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("deopt handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  __ adr(lr, pc());
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+// This is the fast version of java.lang.String.compare; it has not
+// OSR-entry and therefore, we generate a slow version for OSR's
+void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info)  {
+  __ mov(r2, (address)__FUNCTION__);
+  __ call_Unimplemented();
+}
+
+
+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
+  _masm->code_section()->relocate(adr, relocInfo::poll_type);
+  int pc_offset = code_offset();
+  flush_debug_info(pc_offset);
+  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
+  if (info->exception_handlers() != NULL) {
+    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
+  }
+}
+
+// Rather than take a segfault when the polling page is protected,
+// explicitly check for a safepoint in progress and if there is one,
+// fake a call to the handler as if a segfault had been caught.
+void LIR_Assembler::poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info) {
+  __ mov(rscratch1, SafepointSynchronize::address_of_state());
+  __ ldrb(rscratch1, Address(rscratch1));
+  Label nope, poll;
+  __ cbz(rscratch1, nope);
+  __ block_comment("safepoint");
+  __ enter();
+  __ push(0x3, sp);                // r0 & r1
+  __ push(0x3ffffffc, sp);         // integer registers except lr & sp & r0 & r1
+  __ adr(r0, poll);
+  __ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset()));
+  __ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub));
+  __ blrt(rscratch1, 1, 0, 1);
+  __ maybe_isb();
+  __ pop(0x3ffffffc, sp);          // integer registers except lr & sp & r0 & r1
+  __ mov(rscratch1, r0);
+  __ pop(0x3, sp);                 // r0 & r1
+  __ leave();
+  __ br(rscratch1);
+  address polling_page(os::get_polling_page());
+  assert(os::is_poll_address(polling_page), "should be");
+  unsigned long off;
+  __ adrp(rscratch1, Address(polling_page, rtype), off);
+  __ bind(poll);
+  if (info)
+    add_debug_info_for_branch(info);  // This isn't just debug info:
+                                      // it's the oop map
+  else
+    __ code_section()->relocate(pc(), rtype);
+  __ ldrw(zr, Address(rscratch1, off));
+  __ bind(nope);
+}
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
+  // Pop the stack before the safepoint code
+  __ remove_frame(initial_frame_size_in_bytes());
+  address polling_page(os::get_polling_page());
+  __ read_polling_page(rscratch1, polling_page, relocInfo::poll_return_type);
+  __ ret(lr);
+}
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+  address polling_page(os::get_polling_page());
+  guarantee(info != NULL, "Shouldn't be NULL");
+  assert(os::is_poll_address(polling_page), "should be");
+  unsigned long off;
+  __ adrp(rscratch1, Address(polling_page, relocInfo::poll_type), off);
+  assert(off == 0, "must be");
+  add_debug_info_for_branch(info);  // This isn't just debug info:
+  // it's the oop map
+  __ read_polling_page(rscratch1, relocInfo::poll_type);
+  return __ offset();
+}
+
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+  if (from_reg == r31_sp)
+    from_reg = sp;
+  if (to_reg == r31_sp)
+    to_reg = sp;
+  __ mov(to_reg, from_reg);
+}
+
+void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
+
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+
+  switch (c->type()) {
+    case T_INT: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ movw(dest->as_register(), c->as_jint());
+      break;
+    }
+
+    case T_ADDRESS: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ mov(dest->as_register(), c->as_jint());
+      break;
+    }
+
+    case T_LONG: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ mov(dest->as_register_lo(), (intptr_t)c->as_jlong());
+      break;
+    }
+
+    case T_OBJECT: {
+        if (patch_code == lir_patch_none) {
+          jobject2reg(c->as_jobject(), dest->as_register());
+        } else {
+          jobject2reg_with_patching(dest->as_register(), info);
+        }
+      break;
+    }
+
+    case T_METADATA: {
+      if (patch_code != lir_patch_none) {
+        klass2reg_with_patching(dest->as_register(), info);
+      } else {
+        __ mov_metadata(dest->as_register(), c->as_metadata());
+      }
+      break;
+    }
+
+    case T_FLOAT: {
+      if (__ operand_valid_for_float_immediate(c->as_jfloat())) {
+        __ fmovs(dest->as_float_reg(), (c->as_jfloat()));
+      } else {
+        __ adr(rscratch1, InternalAddress(float_constant(c->as_jfloat())));
+        __ ldrs(dest->as_float_reg(), Address(rscratch1));
+      }
+      break;
+    }
+
+    case T_DOUBLE: {
+      if (__ operand_valid_for_float_immediate(c->as_jdouble())) {
+        __ fmovd(dest->as_double_reg(), (c->as_jdouble()));
+      } else {
+        __ adr(rscratch1, InternalAddress(double_constant(c->as_jdouble())));
+        __ ldrd(dest->as_double_reg(), Address(rscratch1));
+      }
+      break;
+    }
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  LIR_Const* c = src->as_constant_ptr();
+  switch (c->type()) {
+  case T_OBJECT:
+    {
+      if (! c->as_jobject())
+        __ str(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
+      else {
+        const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL);
+        reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false);
+      }
+    }
+    break;
+  case T_ADDRESS:
+    {
+      const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL);
+      reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false);
+    }
+  case T_INT:
+  case T_FLOAT:
+    {
+      Register reg = zr;
+      if (c->as_jint_bits() == 0)
+        __ strw(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
+      else {
+        __ movw(rscratch1, c->as_jint_bits());
+        __ strw(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix()));
+      }
+    }
+    break;
+  case T_LONG:
+  case T_DOUBLE:
+    {
+      Register reg = zr;
+      if (c->as_jlong_bits() == 0)
+        __ str(zr, frame_map()->address_for_slot(dest->double_stack_ix(),
+                                                 lo_word_offset_in_bytes));
+      else {
+        __ mov(rscratch1, (intptr_t)c->as_jlong_bits());
+        __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(),
+                                                        lo_word_offset_in_bytes));
+      }
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
+  assert(src->is_constant(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Address* to_addr = dest->as_address_ptr();
+
+  void (Assembler::* insn)(Register Rt, const Address &adr);
+
+  switch (type) {
+  case T_ADDRESS:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::str;
+    break;
+  case T_LONG:
+    assert(c->as_jlong() == 0, "should be");
+    insn = &Assembler::str;
+    break;
+  case T_INT:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::strw;
+    break;
+  case T_OBJECT:
+  case T_ARRAY:
+    assert(c->as_jobject() == 0, "should be");
+    if (UseCompressedOops && !wide) {
+      insn = &Assembler::strw;
+    } else {
+      insn = &Assembler::str;
+    }
+    break;
+  case T_CHAR:
+  case T_SHORT:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::strh;
+    break;
+  case T_BOOLEAN:
+  case T_BYTE:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::strb;
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  if (info) add_debug_info_for_null_check_here(info);
+  (_masm->*insn)(zr, as_Address(to_addr, rscratch1));
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  // move between cpu-registers
+  if (dest->is_single_cpu()) {
+    if (src->type() == T_LONG) {
+      // Can do LONG -> OBJECT
+      move_regs(src->as_register_lo(), dest->as_register());
+      return;
+    }
+    assert(src->is_single_cpu(), "must match");
+    if (src->type() == T_OBJECT) {
+      __ verify_oop(src->as_register());
+    }
+    move_regs(src->as_register(), dest->as_register());
+
+  } else if (dest->is_double_cpu()) {
+    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
+      // Surprising to me but we can see move of a long to t_object
+      __ verify_oop(src->as_register());
+      move_regs(src->as_register(), dest->as_register_lo());
+      return;
+    }
+    assert(src->is_double_cpu(), "must match");
+    Register f_lo = src->as_register_lo();
+    Register f_hi = src->as_register_hi();
+    Register t_lo = dest->as_register_lo();
+    Register t_hi = dest->as_register_hi();
+    assert(f_hi == f_lo, "must be same");
+    assert(t_hi == t_lo, "must be same");
+    move_regs(f_lo, t_lo);
+
+  } else if (dest->is_single_fpu()) {
+    __ fmovs(dest->as_float_reg(), src->as_float_reg());
+
+  } else if (dest->is_double_fpu()) {
+    __ fmovd(dest->as_double_reg(), src->as_double_reg());
+
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+  if (src->is_single_cpu()) {
+    if (type == T_ARRAY || type == T_OBJECT) {
+      __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
+      __ verify_oop(src->as_register());
+    } else if (type == T_METADATA || type == T_DOUBLE) {
+      __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
+    } else {
+      __ strw(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
+    }
+
+  } else if (src->is_double_cpu()) {
+    Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
+    __ str(src->as_register_lo(), dest_addr_LO);
+
+  } else if (src->is_single_fpu()) {
+    Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+    __ strs(src->as_float_reg(), dest_addr);
+
+  } else if (src->is_double_fpu()) {
+    Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix());
+    __ strd(src->as_double_reg(), dest_addr);
+
+  } else {
+    ShouldNotReachHere();
+  }
+
+}
+
+
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
+  LIR_Address* to_addr = dest->as_address_ptr();
+  PatchingStub* patch = NULL;
+  Register compressed_src = rscratch1;
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (type == T_ARRAY || type == T_OBJECT) {
+    __ verify_oop(src->as_register());
+
+    if (UseCompressedOops && !wide) {
+      __ encode_heap_oop(compressed_src, src->as_register());
+    } else {
+      compressed_src = src->as_register();
+    }
+  }
+
+  int null_check_here = code_offset();
+  switch (type) {
+    case T_FLOAT: {
+      __ strs(src->as_float_reg(), as_Address(to_addr));
+      break;
+    }
+
+    case T_DOUBLE: {
+      __ strd(src->as_double_reg(), as_Address(to_addr));
+      break;
+    }
+
+    case T_ARRAY:   // fall through
+    case T_OBJECT:  // fall through
+      if (UseCompressedOops && !wide) {
+        __ strw(compressed_src, as_Address(to_addr, rscratch2));
+      } else {
+         __ str(compressed_src, as_Address(to_addr));
+      }
+      break;
+    case T_METADATA:
+      // We get here to store a method pointer to the stack to pass to
+      // a dtrace runtime call. This can't work on 64 bit with
+      // compressed klass ptrs: T_METADATA can be a compressed klass
+      // ptr or a 64 bit method pointer.
+      ShouldNotReachHere();
+      __ str(src->as_register(), as_Address(to_addr));
+      break;
+    case T_ADDRESS:
+      __ str(src->as_register(), as_Address(to_addr));
+      break;
+    case T_INT:
+      __ strw(src->as_register(), as_Address(to_addr));
+      break;
+
+    case T_LONG: {
+      __ str(src->as_register_lo(), as_Address_lo(to_addr));
+      break;
+    }
+
+    case T_BYTE:    // fall through
+    case T_BOOLEAN: {
+      __ strb(src->as_register(), as_Address(to_addr));
+      break;
+    }
+
+    case T_CHAR:    // fall through
+    case T_SHORT:
+      __ strh(src->as_register(), as_Address(to_addr));
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+  if (info != NULL) {
+    add_debug_info_for_null_check(null_check_here, info);
+  }
+}
+
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  assert(src->is_stack(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  if (dest->is_single_cpu()) {
+    if (type == T_ARRAY || type == T_OBJECT) {
+      __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+      __ verify_oop(dest->as_register());
+    } else if (type == T_METADATA) {
+      __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+    } else {
+      __ ldrw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+    }
+
+  } else if (dest->is_double_cpu()) {
+    Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes);
+    __ ldr(dest->as_register_lo(), src_addr_LO);
+
+  } else if (dest->is_single_fpu()) {
+    Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
+    __ ldrs(dest->as_float_reg(), src_addr);
+
+  } else if (dest->is_double_fpu()) {
+    Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
+    __ ldrd(dest->as_double_reg(), src_addr);
+
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
+  address target = NULL;
+  relocInfo::relocType reloc_type = relocInfo::none;
+
+  switch (patching_id(info)) {
+  case PatchingStub::access_field_id:
+    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+    reloc_type = relocInfo::section_word_type;
+    break;
+  case PatchingStub::load_klass_id:
+    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+    reloc_type = relocInfo::metadata_type;
+    break;
+  case PatchingStub::load_mirror_id:
+    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+    reloc_type = relocInfo::oop_type;
+    break;
+  case PatchingStub::load_appendix_id:
+    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+    reloc_type = relocInfo::oop_type;
+    break;
+  default: ShouldNotReachHere();
+  }
+
+  __ far_call(RuntimeAddress(target));
+  add_call_info_here(info);
+}
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+
+  LIR_Opr temp;
+  if (type == T_LONG || type == T_DOUBLE)
+    temp = FrameMap::rscratch1_long_opr;
+  else
+    temp = FrameMap::rscratch1_opr;
+
+  stack2reg(src, temp, src->type());
+  reg2stack(temp, dest, dest->type(), false);
+}
+
+
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
+  LIR_Address* addr = src->as_address_ptr();
+  LIR_Address* from_addr = src->as_address_ptr();
+
+  if (addr->base()->type() == T_OBJECT) {
+    __ verify_oop(addr->base()->as_pointer_register());
+  }
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+  int null_check_here = code_offset();
+  switch (type) {
+    case T_FLOAT: {
+      __ ldrs(dest->as_float_reg(), as_Address(from_addr));
+      break;
+    }
+
+    case T_DOUBLE: {
+      __ ldrd(dest->as_double_reg(), as_Address(from_addr));
+      break;
+    }
+
+    case T_ARRAY:   // fall through
+    case T_OBJECT:  // fall through
+      if (UseCompressedOops && !wide) {
+        __ ldrw(dest->as_register(), as_Address(from_addr));
+      } else {
+         __ ldr(dest->as_register(), as_Address(from_addr));
+      }
+      break;
+    case T_METADATA:
+      // We get here to store a method pointer to the stack to pass to
+      // a dtrace runtime call. This can't work on 64 bit with
+      // compressed klass ptrs: T_METADATA can be a compressed klass
+      // ptr or a 64 bit method pointer.
+      ShouldNotReachHere();
+      __ ldr(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_ADDRESS:
+      // FIXME: OMG this is a horrible kludge.  Any offset from an
+      // address that matches klass_offset_in_bytes() will be loaded
+      // as a word, not a long.
+      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+        __ ldrw(dest->as_register(), as_Address(from_addr));
+      } else {
+        __ ldr(dest->as_register(), as_Address(from_addr));
+      }
+      break;
+    case T_INT:
+      __ ldrw(dest->as_register(), as_Address(from_addr));
+      break;
+
+    case T_LONG: {
+      __ ldr(dest->as_register_lo(), as_Address_lo(from_addr));
+      break;
+    }
+
+    case T_BYTE:
+      __ ldrsb(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_BOOLEAN: {
+      __ ldrb(dest->as_register(), as_Address(from_addr));
+      break;
+    }
+
+    case T_CHAR:
+      __ ldrh(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_SHORT:
+      __ ldrsh(dest->as_register(), as_Address(from_addr));
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+
+  if (type == T_ARRAY || type == T_OBJECT) {
+    if (UseCompressedOops && !wide) {
+      __ decode_heap_oop(dest->as_register());
+    }
+    __ verify_oop(dest->as_register());
+  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+    if (UseCompressedClassPointers) {
+      __ decode_klass_not_null(dest->as_register());
+    }
+  }
+}
+
+
+void LIR_Assembler::prefetchr(LIR_Opr src) { Unimplemented(); }
+
+
+void LIR_Assembler::prefetchw(LIR_Opr src) { Unimplemented(); }
+
+
+int LIR_Assembler::array_element_size(BasicType type) const {
+  int elem_size = type2aelembytes(type);
+  return exact_log2(elem_size);
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  Register Rdividend = op->in_opr1()->as_register();
+  Register Rdivisor  = op->in_opr2()->as_register();
+  Register Rscratch  = op->in_opr3()->as_register();
+  Register Rresult   = op->result_opr()->as_register();
+  int divisor = -1;
+
+  /*
+  TODO: For some reason, using the Rscratch that gets passed in is
+  not possible because the register allocator does not see the tmp reg
+  as used, and assignes it the same register as Rdividend. We use rscratch1
+   instead.
+
+  assert(Rdividend != Rscratch, "");
+  assert(Rdivisor  != Rscratch, "");
+  */
+
+  if (Rdivisor == noreg && is_power_of_2(divisor)) {
+    // convert division by a power of two into some shifts and logical operations
+  }
+
+  if (op->code() == lir_irem) {
+    __ corrected_idivl(Rresult, Rdividend, Rdivisor, true, rscratch1);
+   } else if (op->code() == lir_idiv) {
+    __ corrected_idivl(Rresult, Rdividend, Rdivisor, false, rscratch1);
+  } else
+    ShouldNotReachHere();
+}
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
+  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
+#endif
+
+  if (op->cond() == lir_cond_always) {
+    if (op->info() != NULL) add_debug_info_for_branch(op->info());
+    __ b(*(op->label()));
+  } else {
+    Assembler::Condition acond;
+    if (op->code() == lir_cond_float_branch) {
+      bool is_unordered = (op->ublock() == op->block());
+      // Assembler::EQ does not permit unordered branches, so we add
+      // another branch here.  Likewise, Assembler::NE does not permit
+      // ordered branches.
+      if (is_unordered && op->cond() == lir_cond_equal
+          || !is_unordered && op->cond() == lir_cond_notEqual)
+        __ br(Assembler::VS, *(op->ublock()->label()));
+      switch(op->cond()) {
+      case lir_cond_equal:        acond = Assembler::EQ; break;
+      case lir_cond_notEqual:     acond = Assembler::NE; break;
+      case lir_cond_less:         acond = (is_unordered ? Assembler::LT : Assembler::LO); break;
+      case lir_cond_lessEqual:    acond = (is_unordered ? Assembler::LE : Assembler::LS); break;
+      case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::HS : Assembler::GE); break;
+      case lir_cond_greater:      acond = (is_unordered ? Assembler::HI : Assembler::GT); break;
+      default:                    ShouldNotReachHere();
+      }
+    } else {
+      switch (op->cond()) {
+        case lir_cond_equal:        acond = Assembler::EQ; break;
+        case lir_cond_notEqual:     acond = Assembler::NE; break;
+        case lir_cond_less:         acond = Assembler::LT; break;
+        case lir_cond_lessEqual:    acond = Assembler::LE; break;
+        case lir_cond_greaterEqual: acond = Assembler::GE; break;
+        case lir_cond_greater:      acond = Assembler::GT; break;
+        case lir_cond_belowEqual:   acond = Assembler::LS; break;
+        case lir_cond_aboveEqual:   acond = Assembler::HS; break;
+        default:                         ShouldNotReachHere();
+      }
+    }
+    __ br(acond,*(op->label()));
+  }
+}
+
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  LIR_Opr src  = op->in_opr();
+  LIR_Opr dest = op->result_opr();
+
+  switch (op->bytecode()) {
+    case Bytecodes::_i2f:
+      {
+        __ scvtfws(dest->as_float_reg(), src->as_register());
+        break;
+      }
+    case Bytecodes::_i2d:
+      {
+        __ scvtfwd(dest->as_double_reg(), src->as_register());
+        break;
+      }
+    case Bytecodes::_l2d:
+      {
+        __ scvtfd(dest->as_double_reg(), src->as_register_lo());
+        break;
+      }
+    case Bytecodes::_l2f:
+      {
+        __ scvtfs(dest->as_float_reg(), src->as_register_lo());
+        break;
+      }
+    case Bytecodes::_f2d:
+      {
+        __ fcvts(dest->as_double_reg(), src->as_float_reg());
+        break;
+      }
+    case Bytecodes::_d2f:
+      {
+        __ fcvtd(dest->as_float_reg(), src->as_double_reg());
+        break;
+      }
+    case Bytecodes::_i2c:
+      {
+        __ ubfx(dest->as_register(), src->as_register(), 0, 16);
+        break;
+      }
+    case Bytecodes::_i2l:
+      {
+        __ sxtw(dest->as_register_lo(), src->as_register());
+        break;
+      }
+    case Bytecodes::_i2s:
+      {
+        __ sxth(dest->as_register(), src->as_register());
+        break;
+      }
+    case Bytecodes::_i2b:
+      {
+        __ sxtb(dest->as_register(), src->as_register());
+        break;
+      }
+    case Bytecodes::_l2i:
+      {
+        _masm->block_comment("FIXME: This could be a no-op");
+        __ uxtw(dest->as_register(), src->as_register_lo());
+        break;
+      }
+    case Bytecodes::_d2l:
+      {
+        __ fcvtzd(dest->as_register_lo(), src->as_double_reg());
+        break;
+      }
+    case Bytecodes::_f2i:
+      {
+        __ fcvtzsw(dest->as_register(), src->as_float_reg());
+        break;
+      }
+    case Bytecodes::_f2l:
+      {
+        __ fcvtzs(dest->as_register_lo(), src->as_float_reg());
+        break;
+      }
+    case Bytecodes::_d2i:
+      {
+        __ fcvtzdw(dest->as_register(), src->as_double_reg());
+        break;
+      }
+    default: ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+  if (op->init_check()) {
+    __ ldrb(rscratch1, Address(op->klass()->as_register(),
+                               InstanceKlass::init_state_offset()));
+    __ cmpw(rscratch1, InstanceKlass::fully_initialized);
+    add_debug_info_for_null_check_here(op->stub()->info());
+    __ br(Assembler::NE, *op->stub()->entry());
+  }
+  __ allocate_object(op->obj()->as_register(),
+                     op->tmp1()->as_register(),
+                     op->tmp2()->as_register(),
+                     op->header_size(),
+                     op->object_size(),
+                     op->klass()->as_register(),
+                     *op->stub()->entry());
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+  Register len =  op->len()->as_register();
+  __ uxtw(len, len);
+
+  if (UseSlowPath ||
+      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+    __ b(*op->stub()->entry());
+  } else {
+    Register tmp1 = op->tmp1()->as_register();
+    Register tmp2 = op->tmp2()->as_register();
+    Register tmp3 = op->tmp3()->as_register();
+    if (len == tmp1) {
+      tmp1 = tmp3;
+    } else if (len == tmp2) {
+      tmp2 = tmp3;
+    } else if (len == tmp3) {
+      // everything is ok
+    } else {
+      __ mov(tmp3, len);
+    }
+    __ allocate_array(op->obj()->as_register(),
+                      len,
+                      tmp1,
+                      tmp2,
+                      arrayOopDesc::header_size(op->type()),
+                      array_element_size(op->type()),
+                      op->klass()->as_register(),
+                      *op->stub()->entry());
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::type_profile_helper(Register mdo,
+                                        ciMethodData *md, ciProfileData *data,
+                                        Register recv, Label* update_done) {
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
+    __ ldr(rscratch1, Address(rscratch2));
+    __ cmp(recv, rscratch1);
+    __ br(Assembler::NE, next_test);
+    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
+    __ addptr(data_addr, DataLayout::counter_increment);
+    __ b(*update_done);
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    __ lea(rscratch2,
+           Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
+    Address recv_addr(rscratch2);
+    __ ldr(rscratch1, recv_addr);
+    __ cbnz(rscratch1, next_test);
+    __ str(recv, recv_addr);
+    __ mov(rscratch1, DataLayout::counter_increment);
+    __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
+    __ str(rscratch1, Address(rscratch2));
+    __ b(*update_done);
+    __ bind(next_test);
+  }
+}
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
+  // we always need a stub for the failure case.
+  CodeStub* stub = op->stub();
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register dst = op->result_opr()->as_register();
+  ciKlass* k = op->klass();
+  Register Rtmp1 = noreg;
+
+  // check if it needs to be profiled
+  ciMethodData* md;
+  ciProfileData* data;
+
+  if (op->should_profile()) {
+    ciMethod* method = op->profiled_method();
+    assert(method != NULL, "Should have method");
+    int bci = op->profiled_bci();
+    md = method->method_data_or_null();
+    assert(md != NULL, "Sanity");
+    data = md->bci_to_data(bci);
+    assert(data != NULL,                "need data for type check");
+    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+  }
+  Label profile_cast_success, profile_cast_failure;
+  Label *success_target = op->should_profile() ? &profile_cast_success : success;
+  Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
+
+  if (obj == k_RInfo) {
+    k_RInfo = dst;
+  } else if (obj == klass_RInfo) {
+    klass_RInfo = dst;
+  }
+  if (k->is_loaded() && !UseCompressedClassPointers) {
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
+  } else {
+    Rtmp1 = op->tmp3()->as_register();
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
+  }
+
+  assert_different_registers(obj, k_RInfo, klass_RInfo);
+
+    if (op->should_profile()) {
+      Label not_null;
+      __ cbnz(obj, not_null);
+      // Object is null; update MDO and exit
+      Register mdo  = klass_RInfo;
+      __ mov_metadata(mdo, md->constant_encoding());
+      Address data_addr
+        = __ form_address(rscratch2, mdo,
+                          md->byte_offset_of_slot(data, DataLayout::DataLayout::header_offset()),
+                          LogBytesPerWord);
+      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+      __ ldr(rscratch1, data_addr);
+      __ orr(rscratch1, rscratch1, header_bits);
+      __ str(rscratch1, data_addr);
+      __ b(*obj_is_null);
+      __ bind(not_null);
+    } else {
+      __ cbz(obj, *obj_is_null);
+    }
+
+  if (!k->is_loaded()) {
+    klass2reg_with_patching(k_RInfo, op->info_for_patch());
+  } else {
+    __ mov_metadata(k_RInfo, k->constant_encoding());
+  }
+  __ verify_oop(obj);
+
+  if (op->fast_check()) {
+    // get object class
+    // not a safepoint as obj null check happens earlier
+    __ load_klass(rscratch1, obj);
+    __ cmp( rscratch1, k_RInfo);
+
+    __ br(Assembler::NE, *failure_target);
+    // successful cast, fall through to profile or jump
+  } else {
+    // get object class
+    // not a safepoint as obj null check happens earlier
+    __ load_klass(klass_RInfo, obj);
+    if (k->is_loaded()) {
+      // See if we get an immediate positive hit
+      __ ldr(rscratch1, Address(klass_RInfo, long(k->super_check_offset())));
+      __ cmp(k_RInfo, rscratch1);
+      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
+        __ br(Assembler::NE, *failure_target);
+        // successful cast, fall through to profile or jump
+      } else {
+        // See if we get an immediate positive hit
+        __ br(Assembler::EQ, *success_target);
+        // check for self
+        __ cmp(klass_RInfo, k_RInfo);
+        __ br(Assembler::EQ, *success_target);
+
+        __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
+        __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+        __ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize)));
+        // result is a boolean
+        __ cbzw(klass_RInfo, *failure_target);
+        // successful cast, fall through to profile or jump
+      }
+    } else {
+      // perform the fast part of the checking logic
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+      __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
+      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+      __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
+      // result is a boolean
+      __ cbz(k_RInfo, *failure_target);
+      // successful cast, fall through to profile or jump
+    }
+  }
+  if (op->should_profile()) {
+    Register mdo  = klass_RInfo, recv = k_RInfo;
+    __ bind(profile_cast_success);
+    __ mov_metadata(mdo, md->constant_encoding());
+    __ load_klass(recv, obj);
+    Label update_done;
+    type_profile_helper(mdo, md, data, recv, success);
+    __ b(*success);
+
+    __ bind(profile_cast_failure);
+    __ mov_metadata(mdo, md->constant_encoding());
+    Address counter_addr
+      = __ form_address(rscratch2, mdo,
+                        md->byte_offset_of_slot(data, CounterData::count_offset()),
+                        LogBytesPerWord);
+    __ ldr(rscratch1, counter_addr);
+    __ sub(rscratch1, rscratch1, DataLayout::counter_increment);
+    __ str(rscratch1, counter_addr);
+    __ b(*failure);
+  }
+  __ b(*success);
+}
+
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  LIR_Code code = op->code();
+  if (code == lir_store_check) {
+    Register value = op->object()->as_register();
+    Register array = op->array()->as_register();
+    Register k_RInfo = op->tmp1()->as_register();
+    Register klass_RInfo = op->tmp2()->as_register();
+    Register Rtmp1 = op->tmp3()->as_register();
+
+    CodeStub* stub = op->stub();
+
+    // check if it needs to be profiled
+    ciMethodData* md;
+    ciProfileData* data;
+
+    if (op->should_profile()) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      int bci = op->profiled_bci();
+      md = method->method_data_or_null();
+      assert(md != NULL, "Sanity");
+      data = md->bci_to_data(bci);
+      assert(data != NULL,                "need data for type check");
+      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+    }
+    Label profile_cast_success, profile_cast_failure, done;
+    Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+    Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+    if (op->should_profile()) {
+      Label not_null;
+      __ cbnz(value, not_null);
+      // Object is null; update MDO and exit
+      Register mdo  = klass_RInfo;
+      __ mov_metadata(mdo, md->constant_encoding());
+      Address data_addr
+        = __ form_address(rscratch2, mdo,
+                          md->byte_offset_of_slot(data, DataLayout::header_offset()),
+                          LogBytesPerInt);
+      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+      __ ldrw(rscratch1, data_addr);
+      __ orrw(rscratch1, rscratch1, header_bits);
+      __ strw(rscratch1, data_addr);
+      __ b(done);
+      __ bind(not_null);
+    } else {
+      __ cbz(value, done);
+    }
+
+    add_debug_info_for_null_check_here(op->info_for_exception());
+    __ load_klass(k_RInfo, array);
+    __ load_klass(klass_RInfo, value);
+
+    // get instance klass (it's already uncompressed)
+    __ ldr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+    __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
+    // result is a boolean
+    __ cbzw(k_RInfo, *failure_target);
+    // fall through to the success case
+
+    if (op->should_profile()) {
+      Register mdo  = klass_RInfo, recv = k_RInfo;
+      __ bind(profile_cast_success);
+      __ mov_metadata(mdo, md->constant_encoding());
+      __ load_klass(recv, value);
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &done);
+      __ b(done);
+
+      __ bind(profile_cast_failure);
+      __ mov_metadata(mdo, md->constant_encoding());
+      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+      __ lea(rscratch2, counter_addr);
+      __ ldr(rscratch1, Address(rscratch2));
+      __ sub(rscratch1, rscratch1, DataLayout::counter_increment);
+      __ str(rscratch1, Address(rscratch2));
+      __ b(*stub->entry());
+    }
+
+    __ bind(done);
+  } else if (code == lir_checkcast) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success;
+    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+    __ bind(success);
+    if (dst != obj) {
+      __ mov(dst, obj);
+    }
+  } else if (code == lir_instanceof) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success, failure, done;
+    emit_typecheck_helper(op, &success, &failure, &failure);
+    __ bind(failure);
+    __ mov(dst, zr);
+    __ b(done);
+    __ bind(success);
+    __ mov(dst, 1);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
+  Label retry_load, nope;
+  // flush and load exclusive from the memory location
+  // and fail if it is not what we expect
+  __ bind(retry_load);
+  __ ldaxrw(rscratch1, addr);
+  __ cmpw(rscratch1, cmpval);
+  __ cset(rscratch1, Assembler::NE);
+  __ br(Assembler::NE, nope);
+  // if we store+flush with no intervening write rscratch1 wil be zero
+  __ stlxrw(rscratch1, newval, addr);
+  // retry so we only ever return after a load fails to compare
+  // ensures we don't return a stale value after a failed write.
+  __ cbnzw(rscratch1, retry_load);
+  __ bind(nope);
+  __ membar(__ AnyAny);
+}
+
+void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
+  Label retry_load, nope;
+  // flush and load exclusive from the memory location
+  // and fail if it is not what we expect
+  __ bind(retry_load);
+  __ ldaxr(rscratch1, addr);
+  __ cmp(rscratch1, cmpval);
+  __ cset(rscratch1, Assembler::NE);
+  __ br(Assembler::NE, nope);
+  // if we store+flush with no intervening write rscratch1 wil be zero
+  __ stlxr(rscratch1, newval, addr);
+  // retry so we only ever return after a load fails to compare
+  // ensures we don't return a stale value after a failed write.
+  __ cbnz(rscratch1, retry_load);
+  __ bind(nope);
+  __ membar(__ AnyAny);
+}
+
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  assert(VM_Version::supports_cx8(), "wrong machine");
+  Register addr = as_reg(op->addr());
+  Register newval = as_reg(op->new_value());
+  Register cmpval = as_reg(op->cmp_value());
+  Label succeed, fail, around;
+
+  if (op->code() == lir_cas_obj) {
+    if (UseCompressedOops) {
+      Register t1 = op->tmp1()->as_register();
+      assert(op->tmp1()->is_valid(), "must be");
+      __ encode_heap_oop(t1, cmpval);
+      cmpval = t1;
+      __ encode_heap_oop(rscratch2, newval);
+      newval = rscratch2;
+      casw(addr, newval, cmpval);
+    } else {
+      casl(addr, newval, cmpval);
+    }
+  } else if (op->code() == lir_cas_int) {
+    casw(addr, newval, cmpval);
+  } else {
+    casl(addr, newval, cmpval);
+  }
+}
+
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+
+  Assembler::Condition acond, ncond;
+  switch (condition) {
+  case lir_cond_equal:        acond = Assembler::EQ; ncond = Assembler::NE; break;
+  case lir_cond_notEqual:     acond = Assembler::NE; ncond = Assembler::EQ; break;
+  case lir_cond_less:         acond = Assembler::LT; ncond = Assembler::GE; break;
+  case lir_cond_lessEqual:    acond = Assembler::LE; ncond = Assembler::GT; break;
+  case lir_cond_greaterEqual: acond = Assembler::GE; ncond = Assembler::LT; break;
+  case lir_cond_greater:      acond = Assembler::GT; ncond = Assembler::LE; break;
+  case lir_cond_belowEqual:   Unimplemented(); break;
+  case lir_cond_aboveEqual:   Unimplemented(); break;
+  default:                    ShouldNotReachHere();
+  }
+
+  assert(result->is_single_cpu() || result->is_double_cpu(),
+         "expect single register for result");
+  if (opr1->is_constant() && opr2->is_constant()
+      && opr1->type() == T_INT && opr2->type() == T_INT) {
+    jint val1 = opr1->as_jint();
+    jint val2 = opr2->as_jint();
+    if (val1 == 0 && val2 == 1) {
+      __ cset(result->as_register(), ncond);
+      return;
+    } else if (val1 == 1 && val2 == 0) {
+      __ cset(result->as_register(), acond);
+      return;
+    }
+  }
+
+  if (opr1->is_constant() && opr2->is_constant()
+      && opr1->type() == T_LONG && opr2->type() == T_LONG) {
+    jlong val1 = opr1->as_jlong();
+    jlong val2 = opr2->as_jlong();
+    if (val1 == 0 && val2 == 1) {
+      __ cset(result->as_register_lo(), ncond);
+      return;
+    } else if (val1 == 1 && val2 == 0) {
+      __ cset(result->as_register_lo(), acond);
+      return;
+    }
+  }
+
+  if (opr1->is_stack()) {
+    stack2reg(opr1, FrameMap::rscratch1_opr, result->type());
+    opr1 = FrameMap::rscratch1_opr;
+  } else if (opr1->is_constant()) {
+    LIR_Opr tmp
+      = opr1->type() == T_LONG ? FrameMap::rscratch1_long_opr : FrameMap::rscratch1_opr;
+    const2reg(opr1, tmp, lir_patch_none, NULL);
+    opr1 = tmp;
+  }
+
+  if (opr2->is_stack()) {
+    stack2reg(opr2, FrameMap::rscratch2_opr, result->type());
+    opr2 = FrameMap::rscratch2_opr;
+  } else if (opr2->is_constant()) {
+    LIR_Opr tmp
+      = opr2->type() == T_LONG ? FrameMap::rscratch2_long_opr : FrameMap::rscratch2_opr;
+    const2reg(opr2, tmp, lir_patch_none, NULL);
+    opr2 = tmp;
+  }
+
+  if (result->type() == T_LONG)
+    __ csel(result->as_register_lo(), opr1->as_register_lo(), opr2->as_register_lo(), acond);
+  else
+    __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond);
+}
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+
+  if (left->is_single_cpu()) {
+    Register lreg = left->as_register();
+    Register dreg = as_reg(dest);
+
+    if (right->is_single_cpu()) {
+      // cpu register - cpu register
+
+      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT,
+             "should be");
+      Register rreg = right->as_register();
+      switch (code) {
+      case lir_add: __ addw (dest->as_register(), lreg, rreg); break;
+      case lir_sub: __ subw (dest->as_register(), lreg, rreg); break;
+      case lir_mul: __ mulw (dest->as_register(), lreg, rreg); break;
+      default:      ShouldNotReachHere();
+      }
+
+    } else if (right->is_double_cpu()) {
+      Register rreg = right->as_register_lo();
+      // single_cpu + double_cpu: can happen with obj+long
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      switch (code) {
+      case lir_add: __ add(dreg, lreg, rreg); break;
+      case lir_sub: __ sub(dreg, lreg, rreg); break;
+      default: ShouldNotReachHere();
+      }
+    } else if (right->is_constant()) {
+      // cpu register - constant
+      jlong c;
+
+      // FIXME.  This is fugly: we really need to factor all this logic.
+      switch(right->type()) {
+      case T_LONG:
+        c = right->as_constant_ptr()->as_jlong();
+        break;
+      case T_INT:
+      case T_ADDRESS:
+        c = right->as_constant_ptr()->as_jint();
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      if (c == 0 && dreg == lreg) {
+        COMMENT("effective nop elided");
+        return;
+      }
+      switch(left->type()) {
+      case T_INT:
+        switch (code) {
+        case lir_add: __ addw(dreg, lreg, c); break;
+        case lir_sub: __ subw(dreg, lreg, c); break;
+        default: ShouldNotReachHere();
+        }
+        break;
+      case T_OBJECT:
+      case T_ADDRESS:
+        switch (code) {
+        case lir_add: __ add(dreg, lreg, c); break;
+        case lir_sub: __ sub(dreg, lreg, c); break;
+        default: ShouldNotReachHere();
+        }
+        break;
+        ShouldNotReachHere();
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+
+  } else if (left->is_double_cpu()) {
+    Register lreg_lo = left->as_register_lo();
+
+    if (right->is_double_cpu()) {
+      // cpu register - cpu register
+      Register rreg_lo = right->as_register_lo();
+      switch (code) {
+      case lir_add: __ add (dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_sub: __ sub (dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_mul: __ mul (dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false, rscratch1); break;
+      case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true, rscratch1); break;
+      default:
+        ShouldNotReachHere();
+      }
+
+    } else if (right->is_constant()) {
+      jlong c = right->as_constant_ptr()->as_jlong_bits();
+      Register dreg = as_reg(dest);
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      if (c == 0 && dreg == lreg_lo) {
+        COMMENT("effective nop elided");
+        return;
+      }
+      switch (code) {
+        case lir_add: __ add(dreg, lreg_lo, c); break;
+        case lir_sub: __ sub(dreg, lreg_lo, c); break;
+        default:
+          ShouldNotReachHere();
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (left->is_single_fpu()) {
+    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
+    switch (code) {
+    case lir_add: __ fadds (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_sub: __ fsubs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_mul: __ fmuls (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_div: __ fdivs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    default:
+      ShouldNotReachHere();
+    }
+  } else if (left->is_double_fpu()) {
+    if (right->is_double_fpu()) {
+      // cpu register - cpu register
+      switch (code) {
+      case lir_add: __ faddd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_sub: __ fsubd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_mul: __ fmuld (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_div: __ fdivd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else {
+      if (right->is_constant()) {
+        ShouldNotReachHere();
+      }
+      ShouldNotReachHere();
+    }
+  } else if (left->is_single_stack() || left->is_address()) {
+    assert(left == dest, "left and dest must be equal");
+    ShouldNotReachHere();
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); }
+
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
+  switch(code) {
+  case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break;
+  case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break;
+  default      : ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
+
+  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
+  Register Rleft = left->is_single_cpu() ? left->as_register() :
+                                           left->as_register_lo();
+   if (dst->is_single_cpu()) {
+     Register Rdst = dst->as_register();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and: __ andw (Rdst, Rleft, right->as_jint()); break;
+         case lir_logic_or:  __ orrw (Rdst, Rleft, right->as_jint()); break;
+         case lir_logic_xor: __ eorw (Rdst, Rleft, right->as_jint()); break;
+         default: ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() :
+                                                  right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ andw (Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __ orrw (Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ eorw (Rdst, Rleft, Rright); break;
+         default: ShouldNotReachHere(); break;
+       }
+     }
+   } else {
+     Register Rdst = dst->as_register_lo();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and: __ andr (Rdst, Rleft, right->as_jlong()); break;
+         case lir_logic_or:  __ orr (Rdst, Rleft, right->as_jlong()); break;
+         case lir_logic_xor: __ eor (Rdst, Rleft, right->as_jlong()); break;
+         default: ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() :
+                                                  right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ andr (Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __ orr (Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ eor (Rdst, Rleft, Rright); break;
+         default: ShouldNotReachHere(); break;
+       }
+     }
+   }
+}
+
+
+
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); }
+
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+  if (opr1->is_constant() && opr2->is_single_cpu()) {
+    // tableswitch
+    Register reg = as_reg(opr2);
+    struct tableswitch &table = switches[opr1->as_constant_ptr()->as_jint()];
+    __ tableswitch(reg, table._first_key, table._last_key, table._branches, table._after);
+  } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
+    Register reg1 = as_reg(opr1);
+    if (opr2->is_single_cpu()) {
+      // cpu register - cpu register
+      Register reg2 = opr2->as_register();
+      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+        __ cmp(reg1, reg2);
+      } else {
+        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
+        __ cmpw(reg1, reg2);
+      }
+      return;
+    }
+    if (opr2->is_double_cpu()) {
+      // cpu register - cpu register
+      Register reg2 = opr2->as_register_lo();
+      __ cmp(reg1, reg2);
+      return;
+    }
+
+    if (opr2->is_constant()) {
+      jlong imm;
+      switch(opr2->type()) {
+      case T_LONG:
+        imm = opr2->as_constant_ptr()->as_jlong();
+        break;
+      case T_INT:
+      case T_ADDRESS:
+        imm = opr2->as_constant_ptr()->as_jint();
+        break;
+      case T_OBJECT:
+      case T_ARRAY:
+        imm = jlong(opr2->as_constant_ptr()->as_jobject());
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+
+      if (Assembler::operand_valid_for_add_sub_immediate(imm)) {
+        if (type2aelembytes(opr1->type()) <= 4)
+          __ cmpw(reg1, imm);
+        else
+          __ cmp(reg1, imm);
+        return;
+      } else {
+        __ mov(rscratch1, imm);
+        if (type2aelembytes(opr1->type()) <= 4)
+          __ cmpw(reg1, rscratch1);
+        else
+          __ cmp(reg1, rscratch1);
+        return;
+      }
+    } else
+      ShouldNotReachHere();
+  } else if (opr1->is_single_fpu()) {
+    FloatRegister reg1 = opr1->as_float_reg();
+    assert(opr2->is_single_fpu(), "expect single float register");
+    FloatRegister reg2 = opr2->as_float_reg();
+    __ fcmps(reg1, reg2);
+  } else if (opr1->is_double_fpu()) {
+    FloatRegister reg1 = opr1->as_double_reg();
+    assert(opr2->is_double_fpu(), "expect double float register");
+    FloatRegister reg2 = opr2->as_double_reg();
+    __ fcmpd(reg1, reg2);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
+  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+    bool is_unordered_less = (code == lir_ucmp_fd2i);
+    if (left->is_single_fpu()) {
+      __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register());
+    } else if (left->is_double_fpu()) {
+      __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register());
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (code == lir_cmp_l2i) {
+    Label done;
+    __ cmp(left->as_register_lo(), right->as_register_lo());
+    __ mov(dst->as_register(), (u_int64_t)-1L);
+    __ br(Assembler::LT, done);
+    __ csinc(dst->as_register(), zr, zr, Assembler::EQ);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::align_call(LIR_Code code) {  }
+
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+  __ trampoline_call(Address(op->addr(), rtype));
+  add_call_info(code_offset(), op->info());
+}
+
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+  __ ic_call(op->addr());
+  add_call_info(code_offset(), op->info());
+}
+
+
+/* Currently, vtable-dispatch is only enabled for sparc platforms */
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere();
+}
+
+
+void LIR_Assembler::emit_static_call_stub() {
+  address call_pc = __ pc();
+  address stub = __ start_a_stub(call_stub_size);
+  if (stub == NULL) {
+    bailout("static call stub overflow");
+    return;
+  }
+
+  int start = __ offset();
+
+  __ relocate(static_stub_Relocation::spec(call_pc));
+  __ mov_metadata(rmethod, (Metadata*)NULL);
+  __ movptr(rscratch1, 0);
+  __ br(rscratch1);
+
+  assert(__ offset() - start <= call_stub_size, "stub too big");
+  __ end_a_stub();
+}
+
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+  assert(exceptionOop->as_register() == r0, "must match");
+  assert(exceptionPC->as_register() == r3, "must match");
+
+  // exception object is not added to oop map by LinearScan
+  // (LinearScan assumes that no oops are in fixed registers)
+  info->add_register_oop(exceptionOop);
+  Runtime1::StubID unwind_id;
+
+  // get current pc information
+  // pc is only needed if the method has an exception handler, the unwind code does not need it.
+  int pc_for_athrow_offset = __ offset();
+  InternalAddress pc_for_athrow(__ pc());
+  __ adr(exceptionPC->as_register(), pc_for_athrow);
+  add_call_info(pc_for_athrow_offset, info); // for exception handler
+
+  __ verify_not_null_oop(r0);
+  // search an exception handler (r0: exception oop, r3: throwing pc)
+  if (compilation()->has_fpu_code()) {
+    unwind_id = Runtime1::handle_exception_id;
+  } else {
+    unwind_id = Runtime1::handle_exception_nofpu_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
+
+  // FIXME: enough room for two byte trap   ????
+  __ nop();
+}
+
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+  assert(exceptionOop->as_register() == r0, "must match");
+
+  __ b(_unwind_handler_entry);
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+
+  switch (left->type()) {
+    case T_INT: {
+      switch (code) {
+      case lir_shl:  __ lslvw (dreg, lreg, count->as_register()); break;
+      case lir_shr:  __ asrvw (dreg, lreg, count->as_register()); break;
+      case lir_ushr: __ lsrvw (dreg, lreg, count->as_register()); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    case T_LONG:
+    case T_ADDRESS:
+    case T_OBJECT:
+      switch (code) {
+      case lir_shl:  __ lslv (dreg, lreg, count->as_register()); break;
+      case lir_shr:  __ asrv (dreg, lreg, count->as_register()); break;
+      case lir_ushr: __ lsrv (dreg, lreg, count->as_register()); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+
+  switch (left->type()) {
+    case T_INT: {
+      switch (code) {
+      case lir_shl:  __ lslw (dreg, lreg, count); break;
+      case lir_shr:  __ asrw (dreg, lreg, count); break;
+      case lir_ushr: __ lsrw (dreg, lreg, count); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    case T_LONG:
+    case T_ADDRESS:
+    case T_OBJECT:
+      switch (code) {
+      case lir_shl:  __ lsl (dreg, lreg, count); break;
+      case lir_shr:  __ asr (dreg, lreg, count); break;
+      case lir_ushr: __ lsr (dreg, lreg, count); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+
+void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ str (r, Address(sp, offset_from_rsp_in_bytes));
+}
+
+
+void LIR_Assembler::store_parameter(jint c,     int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ mov (rscratch1, c);
+  __ str (rscratch1, Address(sp, offset_from_rsp_in_bytes));
+}
+
+
+void LIR_Assembler::store_parameter(jobject o,  int offset_from_rsp_in_words) {
+  ShouldNotReachHere();
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ lea(rscratch1, __ constant_oop_address(o));
+  __ str(rscratch1, Address(sp, offset_from_rsp_in_bytes));
+}
+
+
+// This code replaces a call to arraycopy; no exception may
+// be thrown in this code, they must be thrown in the System.arraycopy
+// activation frame; we could save some checks if this would not be the case
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+  ciArrayKlass* default_type = op->expected_type();
+  Register src = op->src()->as_register();
+  Register dst = op->dst()->as_register();
+  Register src_pos = op->src_pos()->as_register();
+  Register dst_pos = op->dst_pos()->as_register();
+  Register length  = op->length()->as_register();
+  Register tmp = op->tmp()->as_register();
+
+  CodeStub* stub = op->stub();
+  int flags = op->flags();
+  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+  if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+  // if we don't know anything, just go through the generic arraycopy
+  if (default_type == NULL // || basic_type == T_OBJECT
+      ) {
+    Label done;
+    assert(src == r1 && src_pos == r2, "mismatch in calling convention");
+
+    // Save the arguments in case the generic arraycopy fails and we
+    // have to fall back to the JNI stub
+    __ stp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+    __ stp(length,  src_pos, Address(sp, 2*BytesPerWord));
+    __ str(src,              Address(sp, 4*BytesPerWord));
+
+    address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
+    address copyfunc_addr = StubRoutines::generic_arraycopy();
+
+    // The arguments are in java calling convention so we shift them
+    // to C convention
+    assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
+    __ mov(c_rarg0, j_rarg0);
+    assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
+    __ mov(c_rarg1, j_rarg1);
+    assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
+    __ mov(c_rarg2, j_rarg2);
+    assert_different_registers(c_rarg3, j_rarg4);
+    __ mov(c_rarg3, j_rarg3);
+    __ mov(c_rarg4, j_rarg4);
+    if (copyfunc_addr == NULL) { // Use C version if stub was not generated
+      __ mov(rscratch1, RuntimeAddress(C_entry));
+      __ blrt(rscratch1, 5, 0, 1);
+    } else {
+#ifndef PRODUCT
+      if (PrintC1Statistics) {
+        __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
+      }
+#endif
+      __ far_call(RuntimeAddress(copyfunc_addr));
+    }
+
+    __ cbz(r0, *stub->continuation());
+
+    // Reload values from the stack so they are where the stub
+    // expects them.
+    __ ldp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+    __ ldp(length,  src_pos, Address(sp, 2*BytesPerWord));
+    __ ldr(src,              Address(sp, 4*BytesPerWord));
+
+    if (copyfunc_addr != NULL) {
+      // r0 is -1^K where K == partial copied count
+      __ eonw(rscratch1, r0, 0);
+      // adjust length down and src/end pos up by partial copied count
+      __ subw(length, length, rscratch1);
+      __ addw(src_pos, src_pos, rscratch1);
+      __ addw(dst_pos, dst_pos, rscratch1);
+    }
+    __ b(*stub->entry());
+
+    __ bind(*stub->continuation());
+    return;
+  }
+
+  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
+
+  int elem_size = type2aelembytes(basic_type);
+  int shift_amount;
+  int scale = exact_log2(elem_size);
+
+  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
+  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
+  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
+  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
+
+  // test for NULL
+  if (flags & LIR_OpArrayCopy::src_null_check) {
+    __ cbz(src, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_null_check) {
+    __ cbz(dst, *stub->entry());
+  }
+
+  // check if negative
+  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+    __ cmpw(src_pos, 0);
+    __ br(Assembler::LT, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+    __ cmpw(dst_pos, 0);
+    __ br(Assembler::LT, *stub->entry());
+  }
+
+  if (flags & LIR_OpArrayCopy::length_positive_check) {
+    __ cmpw(length, 0);
+    __ br(Assembler::LT, *stub->entry());
+  }
+
+  if (flags & LIR_OpArrayCopy::src_range_check) {
+    __ addw(tmp, src_pos, length);
+    __ ldrw(rscratch1, src_length_addr);
+    __ cmpw(tmp, rscratch1);
+    __ br(Assembler::HI, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_range_check) {
+    __ addw(tmp, dst_pos, length);
+    __ ldrw(rscratch1, dst_length_addr);
+    __ cmpw(tmp, rscratch1);
+    __ br(Assembler::HI, *stub->entry());
+  }
+
+  // FIXME: The logic in LIRGenerator::arraycopy_helper clears
+  // length_positive_check if the source of our length operand is an
+  // arraylength.  However, that arraylength might be zero, and the
+  // stub that we're about to call contains an assertion that count !=
+  // 0 .  So we make this check purely in order not to trigger an
+  // assertion failure.
+  __ cbzw(length, *stub->continuation());
+
+  if (flags & LIR_OpArrayCopy::type_check) {
+    // We don't know the array types are compatible
+    if (basic_type != T_OBJECT) {
+      // Simple test for basic type arrays
+      if (UseCompressedClassPointers) {
+        __ ldrw(tmp, src_klass_addr);
+        __ ldrw(rscratch1, dst_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(tmp, src_klass_addr);
+        __ ldr(rscratch1, dst_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::NE, *stub->entry());
+    } else {
+      // For object arrays, if src is a sub class of dst then we can
+      // safely do the copy.
+      Label cont, slow;
+
+#define PUSH(r1, r2)                                    \
+      stp(r1, r2, __ pre(sp, -2 * wordSize));
+
+#define POP(r1, r2)                                     \
+      ldp(r1, r2, __ post(sp, 2 * wordSize));
+
+      __ PUSH(src, dst);
+
+      __ load_klass(src, src);
+      __ load_klass(dst, dst);
+
+      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
+
+      __ PUSH(src, dst);
+      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+      __ POP(src, dst);
+
+      __ cbnz(src, cont);
+
+      __ bind(slow);
+      __ POP(src, dst);
+
+      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+      if (copyfunc_addr != NULL) { // use stub if available
+        // src is not a sub class of dst so we have to do a
+        // per-element check.
+
+        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+        if ((flags & mask) != mask) {
+          // Check that at least both of them object arrays.
+          assert(flags & mask, "one of the two should be known to be an object array");
+
+          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+            __ load_klass(tmp, src);
+          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+            __ load_klass(tmp, dst);
+          }
+          int lh_offset = in_bytes(Klass::layout_helper_offset());
+          Address klass_lh_addr(tmp, lh_offset);
+          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+          __ ldrw(rscratch1, klass_lh_addr);
+          __ mov(rscratch2, objArray_lh);
+          __ eorw(rscratch1, rscratch1, rscratch2);
+          __ cbnzw(rscratch1, *stub->entry());
+        }
+
+       // Spill because stubs can use any register they like and it's
+       // easier to restore just those that we care about.
+        __ stp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+        __ stp(length,  src_pos, Address(sp, 2*BytesPerWord));
+        __ str(src,              Address(sp, 4*BytesPerWord));
+
+        __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale)));
+        __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
+        assert_different_registers(c_rarg0, dst, dst_pos, length);
+        __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale)));
+        __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
+        assert_different_registers(c_rarg1, dst, length);
+        __ uxtw(c_rarg2, length);
+        assert_different_registers(c_rarg2, dst);
+
+        __ load_klass(c_rarg4, dst);
+        __ ldr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
+        __ ldrw(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
+        __ far_call(RuntimeAddress(copyfunc_addr));
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          Label failed;
+          __ cbnz(r0, failed);
+          __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
+          __ bind(failed);
+        }
+#endif
+
+        __ cbz(r0, *stub->continuation());
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
+        }
+#endif
+        assert_different_registers(dst, dst_pos, length, src_pos, src, r0, rscratch1);
+
+        // Restore previously spilled arguments
+        __ ldp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
+        __ ldp(length,  src_pos, Address(sp, 2*BytesPerWord));
+        __ ldr(src,              Address(sp, 4*BytesPerWord));
+
+        // return value is -1^K where K is partial copied count
+        __ eonw(rscratch1, r0, zr);
+        // adjust length down and src/end pos up by partial copied count
+        __ subw(length, length, rscratch1);
+        __ addw(src_pos, src_pos, rscratch1);
+        __ addw(dst_pos, dst_pos, rscratch1);
+      }
+
+      __ b(*stub->entry());
+
+      __ bind(cont);
+      __ POP(src, dst);
+    }
+  }
+
+#ifdef ASSERT
+  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+    // Sanity check the known type with the incoming class.  For the
+    // primitive case the types must match exactly with src.klass and
+    // dst.klass each exactly matching the default type.  For the
+    // object array case, if no type check is needed then either the
+    // dst type is exactly the expected type and the src type is a
+    // subtype which we can't check or src is the same array as dst
+    // but not necessarily exactly of type default_type.
+    Label known_ok, halt;
+    __ mov_metadata(tmp, default_type->constant_encoding());
+    if (UseCompressedClassPointers) {
+      __ encode_klass_not_null(tmp);
+    }
+
+    if (basic_type != T_OBJECT) {
+
+      if (UseCompressedClassPointers) {
+        __ ldrw(rscratch1, dst_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(rscratch1, dst_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::NE, halt);
+      if (UseCompressedClassPointers) {
+        __ ldrw(rscratch1, src_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(rscratch1, src_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::EQ, known_ok);
+    } else {
+      if (UseCompressedClassPointers) {
+        __ ldrw(rscratch1, dst_klass_addr);
+        __ cmpw(tmp, rscratch1);
+      } else {
+        __ ldr(rscratch1, dst_klass_addr);
+        __ cmp(tmp, rscratch1);
+      }
+      __ br(Assembler::EQ, known_ok);
+      __ cmp(src, dst);
+      __ br(Assembler::EQ, known_ok);
+    }
+    __ bind(halt);
+    __ stop("incorrect type information in arraycopy");
+    __ bind(known_ok);
+  }
+#endif
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
+  }
+#endif
+
+  __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale)));
+  __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(c_rarg0, dst, dst_pos, length);
+  __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale)));
+  __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(c_rarg1, dst, length);
+  __ uxtw(c_rarg2, length);
+  assert_different_registers(c_rarg2, dst);
+
+  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+  const char *name;
+  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+
+ CodeBlob *cb = CodeCache::find_blob(entry);
+ if (cb) {
+   __ far_call(RuntimeAddress(entry));
+ } else {
+   __ call_VM_leaf(entry, 3);
+ }
+
+  __ bind(*stub->continuation());
+}
+
+
+
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+  Register obj = op->obj_opr()->as_register();  // may not be an oop
+  Register hdr = op->hdr_opr()->as_register();
+  Register lock = op->lock_opr()->as_register();
+  if (!UseFastLocking) {
+    __ b(*op->stub()->entry());
+  } else if (op->code() == lir_lock) {
+    Register scratch = noreg;
+    if (UseBiasedLocking) {
+      scratch = op->scratch_opr()->as_register();
+    }
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    // add debug info for NullPointerException only if one is possible
+    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
+    if (op->info() != NULL) {
+      add_debug_info_for_null_check(null_check_offset, op->info());
+    }
+    // done
+  } else if (op->code() == lir_unlock) {
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+  } else {
+    Unimplemented();
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+  ciMethod* method = op->profiled_method();
+  int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
+
+  // Update counter for all call types
+  ciMethodData* md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  ciProfileData* data = md->bci_to_data(bci);
+  assert(data->is_CounterData(), "need CounterData for calls");
+  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo  = op->mdo()->as_register();
+  __ mov_metadata(mdo, md->constant_encoding());
+  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+  Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+  // Perform additional virtual call profiling for invokevirtual and
+  // invokeinterface bytecodes
+  if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // required for optimized MH invokes
+      C1ProfileVirtualCalls) {
+    assert(op->recv()->is_single_cpu(), "recv must be allocated");
+    Register recv = op->recv()->as_register();
+    assert_different_registers(mdo, recv);
+    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+    ciKlass* known_klass = op->known_holder();
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+      // We know the type that will be seen at this call site; we can
+      // statically update the MethodData* rather than needing to do
+      // dynamic tests on the receiver type
+
+      // NOTE: we should probably put a lock around this search to
+      // avoid collisions by concurrent compilations
+      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+      uint i;
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (known_klass->equals(receiver)) {
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ addptr(data_addr, DataLayout::counter_increment);
+          return;
+        }
+      }
+
+      // Receiver type not found in profile data; select an empty slot
+
+      // Note that this is less efficient than it should be because it
+      // always does a write to the receiver part of the
+      // VirtualCallData rather than just the first time
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (receiver == NULL) {
+          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+          __ mov_metadata(rscratch1, known_klass->constant_encoding());
+          __ lea(rscratch2, recv_addr);
+          __ str(rscratch1, Address(rscratch2));
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ addptr(data_addr, DataLayout::counter_increment);
+          return;
+        }
+      }
+    } else {
+      __ load_klass(recv, recv);
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &update_done);
+      // Receiver did not match any saved receiver and there is no empty row for it.
+      // Increment total counter to indicate polymorphic case.
+      __ addptr(counter_addr, DataLayout::counter_increment);
+
+      __ bind(update_done);
+    }
+  } else {
+    // Static call
+    __ addptr(counter_addr, DataLayout::counter_increment);
+  }
+}
+
+
+void LIR_Assembler::emit_delay(LIR_OpDelay*) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
+  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
+}
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  assert(op->crc()->is_single_cpu(),  "crc must be register");
+  assert(op->val()->is_single_cpu(),  "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register res = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, res);
+  unsigned long offset;
+  __ adrp(res, ExternalAddress(StubRoutines::crc_table_addr()), offset);
+  if (offset) __ add(res, res, offset);
+
+  __ ornw(crc, zr, crc); // ~crc
+  __ update_byte_crc32(crc, val, res);
+  __ ornw(res, zr, crc); // ~crc
+}
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+  COMMENT("emit_profile_type {");
+  Register obj = op->obj()->as_register();
+  Register tmp = op->tmp()->as_pointer_register();
+  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label update, next, none;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+  assert(mdo_addr.base() != rscratch1, "wrong register");
+
+  __ verify_oop(obj);
+
+  if (tmp != obj) {
+    __ mov(tmp, obj);
+  }
+  if (do_null) {
+    __ cbnz(tmp, update);
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ ldr(rscratch2, mdo_addr);
+      __ orr(rscratch2, rscratch2, TypeEntries::null_seen);
+      __ str(rscratch2, mdo_addr);
+    }
+    if (do_update) {
+#ifndef ASSERT
+      __ b(next);
+    }
+#else
+      __ b(next);
+    }
+  } else {
+    __ cbnz(tmp, update);
+    __ stop("unexpected null obj");
+#endif
+  }
+
+  __ bind(update);
+
+  if (do_update) {
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      Label ok;
+      __ load_klass(tmp, tmp);
+      __ mov_metadata(rscratch1, exact_klass->constant_encoding());
+      __ eor(rscratch1, tmp, rscratch1);
+      __ cbz(rscratch1, ok);
+      __ stop("exact klass and actual klass differ");
+      __ bind(ok);
+    }
+#endif
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        if (exact_klass != NULL) {
+          __ mov_metadata(tmp, exact_klass->constant_encoding());
+        } else {
+          __ load_klass(tmp, tmp);
+        }
+
+        __ ldr(rscratch2, mdo_addr);
+        __ eor(tmp, tmp, rscratch2);
+        __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
+        // klass seen before, nothing to do. The unknown bit may have been
+        // set already but no need to check.
+        __ cbz(rscratch1, next);
+
+        __ andr(rscratch1, tmp, TypeEntries::type_unknown);
+        __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore.
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ cbz(rscratch2, none);
+          __ cmp(rscratch2, TypeEntries::null_seen);
+          __ br(Assembler::EQ, none);
+          // There is a chance that the checks above (re-reading profiling
+          // data from memory) fail if another thread has just set the
+          // profiling to this obj's klass
+          __ dmb(Assembler::ISHLD);
+          __ ldr(rscratch2, mdo_addr);
+          __ eor(tmp, tmp, rscratch2);
+          __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
+          __ cbz(rscratch1, next);
+        }
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        __ ldr(tmp, mdo_addr);
+        __ andr(rscratch1, tmp, TypeEntries::type_unknown);
+        __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore.
+      }
+
+      // different than before. Cannot keep accurate profile.
+      __ ldr(rscratch2, mdo_addr);
+      __ orr(rscratch2, rscratch2, TypeEntries::type_unknown);
+      __ str(rscratch2, mdo_addr);
+
+      if (TypeEntries::is_type_none(current_klass)) {
+        __ b(next);
+
+        __ bind(none);
+        // first time here. Set profile type.
+        __ str(tmp, mdo_addr);
+      }
+    } else {
+      // There's a single possible klass at this profile point
+      assert(exact_klass != NULL, "should be");
+      if (TypeEntries::is_type_none(current_klass)) {
+        __ mov_metadata(tmp, exact_klass->constant_encoding());
+        __ ldr(rscratch2, mdo_addr);
+        __ eor(tmp, tmp, rscratch2);
+        __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
+        __ cbz(rscratch1, next);
+#ifdef ASSERT
+        {
+          Label ok;
+          __ ldr(rscratch1, mdo_addr);
+          __ cbz(rscratch1, ok);
+          __ cmp(rscratch1, TypeEntries::null_seen);
+          __ br(Assembler::EQ, ok);
+          // may have been set by another thread
+          __ dmb(Assembler::ISHLD);
+          __ mov_metadata(rscratch1, exact_klass->constant_encoding());
+          __ ldr(rscratch2, mdo_addr);
+          __ eor(rscratch2, rscratch1, rscratch2);
+          __ andr(rscratch2, rscratch2, TypeEntries::type_mask);
+          __ cbz(rscratch2, ok);
+
+          __ stop("unexpected profiling mismatch");
+          __ bind(ok);
+          __ pop(tmp);
+        }
+#endif
+        // first time here. Set profile type.
+        __ ldr(tmp, mdo_addr);
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        __ ldr(tmp, mdo_addr);
+        __ andr(rscratch1, tmp, TypeEntries::type_unknown);
+        __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore.
+
+        __ orr(tmp, tmp, TypeEntries::type_unknown);
+        __ str(tmp, mdo_addr);
+        // FIXME: Write barrier needed here?
+      }
+    }
+
+    __ bind(next);
+  }
+  COMMENT("} emit_profile_type");
+}
+
+
+void LIR_Assembler::align_backward_branch_target() {
+}
+
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+  if (left->is_single_cpu()) {
+    assert(dest->is_single_cpu(), "expect single result reg");
+    __ negw(dest->as_register(), left->as_register());
+  } else if (left->is_double_cpu()) {
+    assert(dest->is_double_cpu(), "expect double result reg");
+    __ neg(dest->as_register_lo(), left->as_register_lo());
+  } else if (left->is_single_fpu()) {
+    assert(dest->is_single_fpu(), "expect single float result reg");
+    __ fnegs(dest->as_float_reg(), left->as_float_reg());
+  } else {
+    assert(left->is_double_fpu(), "expect double float operand reg");
+    assert(dest->is_double_fpu(), "expect double float result reg");
+    __ fnegd(dest->as_double_reg(), left->as_double_reg());
+  }
+}
+
+
+void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
+  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
+}
+
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+  assert(!tmp->is_valid(), "don't need temporary");
+
+  CodeBlob *cb = CodeCache::find_blob(dest);
+  if (cb) {
+    __ far_call(RuntimeAddress(dest));
+  } else {
+    __ mov(rscratch1, RuntimeAddress(dest));
+    int len = args->length();
+    int type = 0;
+    if (! result->is_illegal()) {
+      switch (result->type()) {
+      case T_VOID:
+        type = 0;
+        break;
+      case T_INT:
+      case T_LONG:
+      case T_OBJECT:
+        type = 1;
+        break;
+      case T_FLOAT:
+        type = 2;
+        break;
+      case T_DOUBLE:
+        type = 3;
+        break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+    int num_gpargs = 0;
+    int num_fpargs = 0;
+    for (int i = 0; i < args->length(); i++) {
+      LIR_Opr arg = args->at(i);
+      if (arg->type() == T_FLOAT || arg->type() == T_DOUBLE) {
+        num_fpargs++;
+      } else {
+        num_gpargs++;
+      }
+    }
+    __ blrt(rscratch1, num_gpargs, num_fpargs, type);
+  }
+
+  if (info != NULL) {
+    add_call_info_here(info);
+  }
+  __ maybe_isb();
+}
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+  if (dest->is_address() || src->is_address()) {
+    move_op(src, dest, type, lir_patch_none, info,
+            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+#ifdef ASSERT
+// emit run-time assertion
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+  assert(op->code() == lir_assert, "must be");
+
+  if (op->in_opr1()->is_valid()) {
+    assert(op->in_opr2()->is_valid(), "both operands must be valid");
+    comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
+  } else {
+    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
+    assert(op->condition() == lir_cond_always, "no other conditions allowed");
+  }
+
+  Label ok;
+  if (op->condition() != lir_cond_always) {
+    Assembler::Condition acond = Assembler::AL;
+    switch (op->condition()) {
+      case lir_cond_equal:        acond = Assembler::EQ;  break;
+      case lir_cond_notEqual:     acond = Assembler::NE;  break;
+      case lir_cond_less:         acond = Assembler::LT;  break;
+      case lir_cond_lessEqual:    acond = Assembler::LE;  break;
+      case lir_cond_greaterEqual: acond = Assembler::GE;  break;
+      case lir_cond_greater:      acond = Assembler::GT;  break;
+      case lir_cond_belowEqual:   acond = Assembler::LS;  break;
+      case lir_cond_aboveEqual:   acond = Assembler::HS;  break;
+      default:                    ShouldNotReachHere();
+    }
+    __ br(acond, ok);
+  }
+  if (op->halt()) {
+    const char* str = __ code_string(op->msg());
+    __ stop(str);
+  } else {
+    breakpoint();
+  }
+  __ bind(ok);
+}
+#endif
+
+#ifndef PRODUCT
+#define COMMENT(x)   do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+void LIR_Assembler::membar() {
+  COMMENT("membar");
+  __ membar(MacroAssembler::AnyAny);
+}
+
+void LIR_Assembler::membar_acquire() {
+  __ membar(Assembler::LoadLoad|Assembler::LoadStore);
+}
+
+void LIR_Assembler::membar_release() {
+  __ membar(Assembler::LoadStore|Assembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadload() {
+  __ membar(Assembler::LoadLoad);
+}
+
+void LIR_Assembler::membar_storestore() {
+  __ membar(MacroAssembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
+
+void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+  __ mov(result_reg->as_register(), rthread);
+}
+
+
+void LIR_Assembler::peephole(LIR_List *lir) {
+#if 0
+  if (tableswitch_count >= max_tableswitches)
+    return;
+
+  /*
+    This finite-state automaton recognizes sequences of compare-and-
+    branch instructions.  We will turn them into a tableswitch.  You
+    could argue that C1 really shouldn't be doing this sort of
+    optimization, but without it the code is really horrible.
+  */
+
+  enum { start_s, cmp1_s, beq_s, cmp_s } state;
+  int first_key, last_key = -2147483648;
+  int next_key = 0;
+  int start_insn = -1;
+  int last_insn = -1;
+  Register reg = noreg;
+  LIR_Opr reg_opr;
+  state = start_s;
+
+  LIR_OpList* inst = lir->instructions_list();
+  for (int i = 0; i < inst->length(); i++) {
+    LIR_Op* op = inst->at(i);
+    switch (state) {
+    case start_s:
+      first_key = -1;
+      start_insn = i;
+      switch (op->code()) {
+      case lir_cmp:
+        LIR_Opr opr1 = op->as_Op2()->in_opr1();
+        LIR_Opr opr2 = op->as_Op2()->in_opr2();
+        if (opr1->is_cpu_register() && opr1->is_single_cpu()
+            && opr2->is_constant()
+            && opr2->type() == T_INT) {
+          reg_opr = opr1;
+          reg = opr1->as_register();
+          first_key = opr2->as_constant_ptr()->as_jint();
+          next_key = first_key + 1;
+          state = cmp_s;
+          goto next_state;
+        }
+        break;
+      }
+      break;
+    case cmp_s:
+      switch (op->code()) {
+      case lir_branch:
+        if (op->as_OpBranch()->cond() == lir_cond_equal) {
+          state = beq_s;
+          last_insn = i;
+          goto next_state;
+        }
+      }
+      state = start_s;
+      break;
+    case beq_s:
+      switch (op->code()) {
+      case lir_cmp: {
+        LIR_Opr opr1 = op->as_Op2()->in_opr1();
+        LIR_Opr opr2 = op->as_Op2()->in_opr2();
+        if (opr1->is_cpu_register() && opr1->is_single_cpu()
+            && opr1->as_register() == reg
+            && opr2->is_constant()
+            && opr2->type() == T_INT
+            && opr2->as_constant_ptr()->as_jint() == next_key) {
+          last_key = next_key;
+          next_key++;
+          state = cmp_s;
+          goto next_state;
+        }
+      }
+      }
+      last_key = next_key;
+      state = start_s;
+      break;
+    default:
+      assert(false, "impossible state");
+    }
+    if (state == start_s) {
+      if (first_key < last_key - 5L && reg != noreg) {
+        {
+          // printf("found run register %d starting at insn %d low value %d high value %d\n",
+          //        reg->encoding(),
+          //        start_insn, first_key, last_key);
+          //   for (int i = 0; i < inst->length(); i++) {
+          //     inst->at(i)->print();
+          //     tty->print("\n");
+          //   }
+          //   tty->print("\n");
+        }
+
+        struct tableswitch *sw = &switches[tableswitch_count];
+        sw->_insn_index = start_insn, sw->_first_key = first_key,
+          sw->_last_key = last_key, sw->_reg = reg;
+        inst->insert_before(last_insn + 1, new LIR_OpLabel(&sw->_after));
+        {
+          // Insert the new table of branches
+          int offset = last_insn;
+          for (int n = first_key; n < last_key; n++) {
+            inst->insert_before
+              (last_insn + 1,
+               new LIR_OpBranch(lir_cond_always, T_ILLEGAL,
+                                inst->at(offset)->as_OpBranch()->label()));
+            offset -= 2, i++;
+          }
+        }
+        // Delete all the old compare-and-branch instructions
+        for (int n = first_key; n < last_key; n++) {
+          inst->remove_at(start_insn);
+          inst->remove_at(start_insn);
+        }
+        // Insert the tableswitch instruction
+        inst->insert_before(start_insn,
+                            new LIR_Op2(lir_cmp, lir_cond_always,
+                                        LIR_OprFact::intConst(tableswitch_count),
+                                        reg_opr));
+        inst->insert_before(start_insn + 1, new LIR_OpLabel(&sw->_branches));
+        tableswitch_count++;
+      }
+      reg = noreg;
+      last_key = -2147483648;
+    }
+  next_state:
+    ;
+  }
+#endif
+}
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
+  Address addr = as_Address(src->as_address_ptr(), noreg);
+  BasicType type = src->type();
+  bool is_oop = type == T_OBJECT || type == T_ARRAY;
+
+  void (MacroAssembler::* lda)(Register Rd, Register Ra);
+  void (MacroAssembler::* add)(Register Rd, Register Rn, RegisterOrConstant increment);
+  void (MacroAssembler::* stl)(Register Rs, Register Rt, Register Rn);
+
+  switch(type) {
+  case T_INT:
+    lda = &MacroAssembler::ldaxrw;
+    add = &MacroAssembler::addw;
+    stl = &MacroAssembler::stlxrw;
+    break;
+  case T_LONG:
+    lda = &MacroAssembler::ldaxr;
+    add = &MacroAssembler::add;
+    stl = &MacroAssembler::stlxr;
+    break;
+  case T_OBJECT:
+  case T_ARRAY:
+    if (UseCompressedOops) {
+      lda = &MacroAssembler::ldaxrw;
+      add = &MacroAssembler::addw;
+      stl = &MacroAssembler::stlxrw;
+    } else {
+      lda = &MacroAssembler::ldaxr;
+      add = &MacroAssembler::add;
+      stl = &MacroAssembler::stlxr;
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  switch (code) {
+  case lir_xadd:
+    {
+      RegisterOrConstant inc;
+      Register tmp = as_reg(tmp_op);
+      Register dst = as_reg(dest);
+      if (data->is_constant()) {
+        inc = RegisterOrConstant(as_long(data));
+        assert_different_registers(dst, addr.base(), tmp,
+                                   rscratch1, rscratch2);
+      } else {
+        inc = RegisterOrConstant(as_reg(data));
+        assert_different_registers(inc.as_register(), dst, addr.base(), tmp,
+                                   rscratch1, rscratch2);
+      }
+      Label again;
+      __ lea(tmp, addr);
+      __ bind(again);
+      (_masm->*lda)(dst, tmp);
+      (_masm->*add)(rscratch1, dst, inc);
+      (_masm->*stl)(rscratch2, rscratch1, tmp);
+      __ cbnzw(rscratch2, again);
+      break;
+    }
+  case lir_xchg:
+    {
+      Register tmp = tmp_op->as_register();
+      Register obj = as_reg(data);
+      Register dst = as_reg(dest);
+      if (is_oop && UseCompressedOops) {
+        __ encode_heap_oop(obj);
+      }
+      assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
+      Label again;
+      __ lea(tmp, addr);
+      __ bind(again);
+      (_masm->*lda)(dst, tmp);
+      (_masm->*stl)(rscratch2, obj, tmp);
+      __ cbnzw(rscratch2, again);
+      if (is_oop && UseCompressedOops) {
+        __ decode_heap_oop(dst);
+      }
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  __ membar(__ AnyAny);
+}
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
+#define CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
+
+ private:
+
+  int array_element_size(BasicType type) const;
+
+  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack);
+
+  // helper functions which checks for overflow and sets bailout if it
+  // occurs.  Always returns a valid embeddable pointer but in the
+  // bailout case the pointer won't be to unique storage.
+  address float_constant(float f);
+  address double_constant(double d);
+
+  address int_constant(jlong n);
+
+  bool is_literal_address(LIR_Address* addr);
+
+  // When we need to use something other than rscratch1 use this
+  // method.
+  Address as_Address(LIR_Address* addr, Register tmp);
+
+  // Record the type of the receiver in ReceiverTypeData
+  void type_profile_helper(Register mdo,
+                           ciMethodData *md, ciProfileData *data,
+                           Register recv, Label* update_done);
+  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
+
+  void casw(Register addr, Register newval, Register cmpval);
+  void casl(Register addr, Register newval, Register cmpval);
+
+  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
+
+  static const int max_tableswitches = 20;
+  struct tableswitch switches[max_tableswitches];
+  int tableswitch_count;
+
+  void init() { tableswitch_count = 0; }
+
+  void deoptimize_trap(CodeEmitInfo *info);
+
+public:
+
+  void store_parameter(Register r, int offset_from_esp_in_words);
+  void store_parameter(jint c,     int offset_from_esp_in_words);
+  void store_parameter(jobject c,  int offset_from_esp_in_words);
+
+enum { call_stub_size = 12 * NativeInstruction::instruction_size,
+       exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
+       deopt_handler_size = 7 * NativeInstruction::instruction_size };
+
+#endif // CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,1392 @@
+/*
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_aarch64.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+// Item will be loaded into a byte register; Intel only
+void LIRItem::load_byte_item() {
+  load_item();
+}
+
+
+void LIRItem::load_nonconstant() {
+  LIR_Opr r = value()->operand();
+  if (r->is_constant()) {
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+
+
+LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r0_oop_opr; }
+LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::r3_opr; }
+LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::r0_opr; }
+LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
+
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+    case intTag:     opr = FrameMap::r0_opr;          break;
+    case objectTag:  opr = FrameMap::r0_oop_opr;      break;
+    case longTag:    opr = FrameMap::long0_opr;        break;
+    case floatTag:   opr = FrameMap::fpu0_float_opr;  break;
+    case doubleTag:  opr = FrameMap::fpu0_double_opr;  break;
+
+    case addressTag:
+    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+  }
+
+  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+  return opr;
+}
+
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  LIR_Opr reg = new_register(T_INT);
+  set_vreg_flag(reg, LIRGenerator::byte_reg);
+  return reg;
+}
+
+
+//--------- loading items into registers --------------------------------
+
+
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    return v->type()->as_IntConstant()->value() == 0L;
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return v->type()->as_LongConstant()->value() == 0L;
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+bool LIRGenerator::can_inline_as_constant(Value v) const {
+  // FIXME: Just a guess
+  if (v->type()->as_IntConstant() != NULL) {
+    return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_IntConstant()->value());
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return v->type()->as_LongConstant()->value() == 0L;
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
+
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return LIR_OprFact::illegalOpr;
+}
+
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+  assert(base->is_register(), "must be");
+
+  // accumulate fixed displacements
+  if (index->is_constant()) {
+    disp += index->as_constant_ptr()->as_jint() << shift;
+    index = LIR_OprFact::illegalOpr;
+  }
+
+  if (index->is_register()) {
+    // apply the shift and accumulate the displacement
+    if (shift > 0) {
+      LIR_Opr tmp = new_pointer_register();
+      __ shift_left(index, shift, tmp);
+      index = tmp;
+    }
+    if (disp != 0) {
+      LIR_Opr tmp = new_pointer_register();
+      if (Assembler::operand_valid_for_add_sub_immediate(disp)) {
+        __ add(tmp, tmp, LIR_OprFact::intptrConst(disp));
+        index = tmp;
+      } else {
+        __ move(tmp, LIR_OprFact::intptrConst(disp));
+        __ add(tmp, index, tmp);
+        index = tmp;
+      }
+      disp = 0;
+    }
+  } else if (disp != 0 && !Address::offset_ok_for_immed(disp, shift)) {
+    // index is illegal so replace it with the displacement loaded into a register
+    index = new_pointer_register();
+    __ move(LIR_OprFact::intptrConst(disp), index);
+    disp = 0;
+  }
+
+  // at this point we either have base + index or base + displacement
+  if (disp == 0) {
+    return new LIR_Address(base, index, type);
+  } else {
+    assert(Address::offset_ok_for_immed(disp, 0), "must be");
+    return new LIR_Address(base, disp, type);
+  }
+}
+
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+                                              BasicType type, bool needs_card_mark) {
+  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+  int elem_size = type2aelembytes(type);
+  int shift = exact_log2(elem_size);
+
+  LIR_Address* addr;
+  if (index_opr->is_constant()) {
+    addr = new LIR_Address(array_opr,
+                           offset_in_bytes + index_opr->as_jint() * elem_size, type);
+  } else {
+    if (offset_in_bytes) {
+      LIR_Opr tmp = new_pointer_register();
+      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
+      array_opr = tmp;
+      offset_in_bytes = 0;
+    }
+    addr =  new LIR_Address(array_opr,
+                            index_opr,
+                            LIR_Address::scale(type),
+                            offset_in_bytes, type);
+  }
+  if (needs_card_mark) {
+    // This store will need a precise card mark, so go ahead and
+    // compute the full adddres instead of computing once for the
+    // store and again for the card mark.
+    LIR_Opr tmp = new_pointer_register();
+    __ leal(LIR_OprFact::address(addr), tmp);
+    return new LIR_Address(tmp, type);
+  } else {
+    return addr;
+  }
+}
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r;
+  if (type == T_LONG) {
+    r = LIR_OprFact::longConst(x);
+    if (!Assembler::operand_valid_for_logical_immediate(false, x)) {
+      LIR_Opr tmp = new_register(type);
+      __ move(r, tmp);
+      return tmp;
+    }
+  } else if (type == T_INT) {
+    r = LIR_OprFact::intConst(x);
+    if (!Assembler::operand_valid_for_logical_immediate(true, x)) {
+      // This is all rather nasty.  We don't know whether our constant
+      // is required for a logical or an arithmetic operation, wo we
+      // don't know what the range of valid values is!!
+      LIR_Opr tmp = new_register(type);
+      __ move(r, tmp);
+      return tmp;
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  return r;
+}
+
+
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+  LIR_Opr pointer = new_pointer_register();
+  __ move(LIR_OprFact::intptrConst(counter), pointer);
+  LIR_Address* addr = new LIR_Address(pointer, type);
+  increment_counter(addr, step);
+}
+
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  LIR_Opr imm = NULL;
+  switch(addr->type()) {
+  case T_INT:
+    imm = LIR_OprFact::intConst(step);
+    break;
+  case T_LONG:
+    imm = LIR_OprFact::longConst(step);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  LIR_Opr reg = new_register(addr->type());
+  __ load(addr, reg);
+  __ add(reg, imm, reg);
+  __ store(reg, addr);
+}
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+  LIR_Opr reg = new_register(T_INT);
+  __ load(generate_address(base, disp, T_INT), reg, info);
+  __ cmp(condition, reg, LIR_OprFact::intConst(c));
+}
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+  LIR_Opr reg1 = new_register(T_INT);
+  __ load(generate_address(base, disp, type), reg1, info);
+  __ cmp(condition, reg, reg1);
+}
+
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+
+  if (is_power_of_2(c - 1)) {
+    __ shift_left(left, exact_log2(c - 1), tmp);
+    __ add(tmp, left, result);
+    return true;
+  } else if (is_power_of_2(c + 1)) {
+    __ shift_left(left, exact_log2(c + 1), tmp);
+    __ sub(tmp, left, result);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
+  BasicType type = item->type();
+  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
+}
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+  assert(x->is_pinned(),"");
+  bool needs_range_check = x->compute_needs_range_check();
+  bool use_length = x->length() != NULL;
+  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+                                         !get_jobject_constant(x->value())->is_null_object() ||
+                                         x->should_profile());
+
+  LIRItem array(x->array(), this);
+  LIRItem index(x->index(), this);
+  LIRItem value(x->value(), this);
+  LIRItem length(this);
+
+  array.load_item();
+  index.load_nonconstant();
+
+  if (use_length && needs_range_check) {
+    length.set_instruction(x->length());
+    length.load_item();
+
+  }
+  if (needs_store_check) {
+    value.load_item();
+  } else {
+    value.load_for_store(x->elt_type());
+  }
+
+  set_no_result(x);
+
+  // the CodeEmitInfo must be duplicated for each different
+  // LIR-instruction because spilling can occur anywhere between two
+  // instructions and so the debug information must be different
+  CodeEmitInfo* range_check_info = state_for(x);
+  CodeEmitInfo* null_check_info = NULL;
+  if (x->needs_null_check()) {
+    null_check_info = new CodeEmitInfo(range_check_info);
+  }
+
+  // emit array address setup early so it schedules better
+  // FIXME?  No harm in this on aarch64, and it might help
+  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+
+  if (GenerateRangeChecks && needs_range_check) {
+    if (use_length) {
+      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+    } else {
+      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+      // range_check also does the null check
+      null_check_info = NULL;
+    }
+  }
+
+  if (GenerateArrayStoreCheck && needs_store_check) {
+    LIR_Opr tmp1 = new_register(objectType);
+    LIR_Opr tmp2 = new_register(objectType);
+    LIR_Opr tmp3 = new_register(objectType);
+
+    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci());
+  }
+
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(value.result(), array_addr, null_check_info);
+    // Seems to be a precise
+    post_barrier(LIR_OprFact::address(array_addr), value.result());
+  } else {
+    __ move(value.result(), array_addr, null_check_info);
+  }
+}
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // "lock" stores the address of the monitor stack slot, so this is not an oop
+  LIR_Opr lock = new_register(T_INT);
+  // Need a scratch register for biased locking
+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
+  if (UseBiasedLocking) {
+    scratch = new_register(T_INT);
+  }
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for(x);
+  }
+  // this CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expect object to be unlocked)
+  CodeEmitInfo* info = state_for(x, x->state(), true);
+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
+                        x->monitor_no(), info_for_exception, info);
+}
+
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_pinned(),"");
+
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  LIR_Opr lock = new_register(T_INT);
+  LIR_Opr obj_temp = new_register(T_INT);
+  set_no_result(x);
+  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+
+  LIRItem from(x->x(), this);
+  from.load_item();
+  LIR_Opr result = rlock_result(x);
+  __ negate (from.result(), result);
+
+}
+
+// for  _fadd, _fmul, _fsub, _fdiv, _frem
+//      _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+
+  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
+    // float remainder is implemented as a direct call into the runtime
+    LIRItem right(x->x(), this);
+    LIRItem left(x->y(), this);
+
+    BasicTypeList signature(2);
+    if (x->op() == Bytecodes::_frem) {
+      signature.append(T_FLOAT);
+      signature.append(T_FLOAT);
+    } else {
+      signature.append(T_DOUBLE);
+      signature.append(T_DOUBLE);
+    }
+    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+    const LIR_Opr result_reg = result_register_for(x->type());
+    left.load_item_force(cc->at(1));
+    right.load_item();
+
+    __ move(right.result(), cc->at(0));
+
+    address entry;
+    if (x->op() == Bytecodes::_frem) {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+    } else {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+    }
+
+    LIR_Opr result = rlock_result(x);
+    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
+    __ move(result_reg, result);
+
+    return;
+  }
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg  = &left;
+  LIRItem* right_arg = &right;
+
+  // Always load right hand side.
+  right.load_item();
+
+  if (!left.is_register())
+    left.load_item();
+
+  LIR_Opr reg = rlock(x);
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
+    tmp = new_register(T_DOUBLE);
+  }
+
+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), NULL);
+
+  set_result(x, round_item(reg));
+}
+
+// for  _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+
+  // missing test if instr is commutative and if we should swap
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+
+    // the check for division by zero destroys the right operand
+    right.set_destroys_register();
+
+    // check for division by zero (destroys registers of right operand!)
+    CodeEmitInfo* info = state_for(x);
+
+    left.load_item();
+    right.load_item();
+
+    __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+    __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+
+    rlock_result(x);
+    switch (x->op()) {
+    case Bytecodes::_lrem:
+      __ rem (left.result(), right.result(), x->operand());
+      break;
+    case Bytecodes::_ldiv:
+      __ div (left.result(), right.result(), x->operand());
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+
+
+  } else {
+    assert (x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
+            "expect lmul, ladd or lsub");
+    // add, sub, mul
+    left.load_item();
+    if (! right.is_register()) {
+      if (x->op() == Bytecodes::_lmul
+          || ! right.is_constant()
+          || ! Assembler::operand_valid_for_add_sub_immediate(right.get_jlong_constant())) {
+        right.load_item();
+      } else { // add, sub
+        assert (x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
+        // don't load constants to save register
+        right.load_nonconstant();
+      }
+    }
+    rlock_result(x);
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+  }
+}
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+
+  // Test if instr is commutative and if we should swap
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg = &left;
+  LIRItem* right_arg = &right;
+  if (x->is_commutative() && left.is_stack() && right.is_register()) {
+    // swap them if left is real stack (or cached) and right is real register(not cached)
+    left_arg = &right;
+    right_arg = &left;
+  }
+
+  left_arg->load_item();
+
+  // do not need to load right, as we can handle stack and constants
+  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
+
+    right_arg->load_item();
+    rlock_result(x);
+
+    CodeEmitInfo* info = state_for(x);
+    LIR_Opr tmp = new_register(T_INT);
+    __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
+    __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+    info = state_for(x);
+
+    if (x->op() == Bytecodes::_irem) {
+      __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+    } else if (x->op() == Bytecodes::_idiv) {
+      __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+    }
+
+  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
+    if (right.is_constant()
+        && Assembler::operand_valid_for_add_sub_immediate(right.get_jint_constant())) {
+      right.load_nonconstant();
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
+  } else {
+    assert (x->op() == Bytecodes::_imul, "expect imul");
+    if (right.is_constant()) {
+      int c = right.get_jint_constant();
+      if (! is_power_of_2(c) && ! is_power_of_2(c + 1) && ! is_power_of_2(c - 1)) {
+        // Cannot use constant op.
+        right.load_item();
+      } else {
+        right.dont_load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
+  }
+}
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  // when an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag:  do_ArithmeticOp_FPU(x);  return;
+    case longTag:    do_ArithmeticOp_Long(x); return;
+    case intTag:     do_ArithmeticOp_Int(x);  return;
+  }
+  ShouldNotReachHere();
+}
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  rlock_result(x);
+  if (right.is_constant()) {
+    right.dont_load_item();
+
+    switch (x->op()) {
+    case Bytecodes::_ishl: {
+      int c = right.get_jint_constant() & 0x1f;
+      __ shift_left(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_ishr: {
+      int c = right.get_jint_constant() & 0x1f;
+      __ shift_right(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_iushr: {
+      int c = right.get_jint_constant() & 0x1f;
+      __ unsigned_shift_right(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_lshl: {
+      int c = right.get_jint_constant() & 0x3f;
+      __ shift_left(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_lshr: {
+      int c = right.get_jint_constant() & 0x3f;
+      __ shift_right(left.result(), c, x->operand());
+      break;
+    }
+    case Bytecodes::_lushr: {
+      int c = right.get_jint_constant() & 0x3f;
+      __ unsigned_shift_right(left.result(), c, x->operand());
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+  } else {
+    right.load_item();
+    LIR_Opr tmp = new_register(T_INT);
+    switch (x->op()) {
+    case Bytecodes::_ishl: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ shift_left(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_ishr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_iushr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_lshl: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ shift_left(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_lshr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    case Bytecodes::_lushr: {
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+  }
+}
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  rlock_result(x);
+  if (right.is_constant()
+      && ((right.type()->tag() == intTag
+           && Assembler::operand_valid_for_logical_immediate(true, right.get_jint_constant()))
+          || (right.type()->tag() == longTag
+              && Assembler::operand_valid_for_logical_immediate(false, right.get_jlong_constant()))))  {
+    right.dont_load_item();
+  } else {
+    right.load_item();
+  }
+  switch (x->op()) {
+  case Bytecodes::_iand:
+  case Bytecodes::_land:
+    __ logical_and(left.result(), right.result(), x->operand()); break;
+  case Bytecodes::_ior:
+  case Bytecodes::_lor:
+    __ logical_or (left.result(), right.result(), x->operand()); break;
+  case Bytecodes::_ixor:
+  case Bytecodes::_lxor:
+    __ logical_xor(left.result(), right.result(), x->operand()); break;
+  default: Unimplemented();
+  }
+}
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  ValueTag tag = x->x()->type()->tag();
+  if (tag == longTag) {
+    left.set_destroys_register();
+  }
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    Unimplemented();
+  }
+}
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+  assert(x->number_of_arguments() == 4, "wrong type");
+  LIRItem obj   (x->argument_at(0), this);  // object
+  LIRItem offset(x->argument_at(1), this);  // offset of field
+  LIRItem cmp   (x->argument_at(2), this);  // value to compare with field
+  LIRItem val   (x->argument_at(3), this);  // replace field with val if matches cmp
+
+  assert(obj.type()->tag() == objectTag, "invalid type");
+
+  // In 64bit the type can be long, sparc doesn't have this assert
+  // assert(offset.type()->tag() == intTag, "invalid type");
+
+  assert(cmp.type()->tag() == type->tag(), "invalid type");
+  assert(val.type()->tag() == type->tag(), "invalid type");
+
+  // get address of field
+  obj.load_item();
+  offset.load_nonconstant();
+  val.load_item();
+  cmp.load_item();
+
+  LIR_Address* a;
+  if(offset.result()->is_constant()) {
+    jlong c = offset.result()->as_jlong();
+    if ((jlong)((jint)c) == c) {
+      a = new LIR_Address(obj.result(),
+                          (jint)c,
+                          as_BasicType(type));
+    } else {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ move(offset.result(), tmp);
+      a = new LIR_Address(obj.result(),
+                          tmp,
+                          as_BasicType(type));
+    }
+  } else {
+    a = new LIR_Address(obj.result(),
+                        offset.result(),
+                        LIR_Address::times_1,
+                        0,
+                        as_BasicType(type));
+  }
+  LIR_Opr addr = new_pointer_register();
+  __ leal(LIR_OprFact::address(a), addr);
+
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Do the pre-write barrier, if any.
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+  }
+
+  LIR_Opr result = rlock_result(x);
+
+  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
+  if (type == objectType)
+    __ cas_obj(addr, cmp.result(), val.result(), new_register(T_INT), new_register(T_INT),
+               result);
+  else if (type == intType)
+    __ cas_int(addr, cmp.result(), val.result(), ill, ill);
+  else if (type == longType)
+    __ cas_long(addr, cmp.result(), val.result(), ill, ill);
+  else {
+    ShouldNotReachHere();
+  }
+
+  __ logical_xor(FrameMap::r8_opr, LIR_OprFact::intConst(1), result);
+
+  if (type == objectType) {   // Write-barrier needed for Object fields.
+    // Seems to be precise
+    post_barrier(addr, val.result());
+  }
+}
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+  switch (x->id()) {
+    case vmIntrinsics::_dabs:
+    case vmIntrinsics::_dsqrt: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+      LIRItem value(x->argument_at(0), this);
+      value.load_item();
+      LIR_Opr dst = rlock_result(x);
+
+      switch (x->id()) {
+      case vmIntrinsics::_dsqrt: {
+        __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      }
+      case vmIntrinsics::_dabs: {
+        __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      }
+      }
+      break;
+    }
+    case vmIntrinsics::_dlog10: // fall through
+    case vmIntrinsics::_dlog: // fall through
+    case vmIntrinsics::_dsin: // fall through
+    case vmIntrinsics::_dtan: // fall through
+    case vmIntrinsics::_dcos: // fall through
+    case vmIntrinsics::_dexp: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+
+      address runtime_entry = NULL;
+      switch (x->id()) {
+      case vmIntrinsics::_dsin:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+        break;
+      case vmIntrinsics::_dcos:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+        break;
+      case vmIntrinsics::_dtan:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+        break;
+      case vmIntrinsics::_dlog:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+        break;
+      case vmIntrinsics::_dlog10:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+        break;
+      case vmIntrinsics::_dexp:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+
+      LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+    case vmIntrinsics::_dpow: {
+      assert(x->number_of_arguments() == 2, "wrong type");
+      address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+      LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+  }
+}
+
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+  assert(x->number_of_arguments() == 5, "wrong type");
+
+  // Make all state_for calls early since they can emit code
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem src(x->argument_at(0), this);
+  LIRItem src_pos(x->argument_at(1), this);
+  LIRItem dst(x->argument_at(2), this);
+  LIRItem dst_pos(x->argument_at(3), this);
+  LIRItem length(x->argument_at(4), this);
+
+  // operands for arraycopy must use fixed registers, otherwise
+  // LinearScan will fail allocation (because arraycopy always needs a
+  // call)
+
+  // The java calling convention will give us enough registers
+  // so that on the stub side the args will be perfect already.
+  // On the other slow/special case side we call C and the arg
+  // positions are not similar enough to pick one as the best.
+  // Also because the java calling convention is a "shifted" version
+  // of the C convention we can process the java args trivially into C
+  // args without worry of overwriting during the xfer
+
+  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
+  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
+  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
+  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
+  length.load_item_force  (FrameMap::as_opr(j_rarg4));
+
+  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
+
+  set_no_result(x);
+
+  int flags;
+  ciArrayKlass* expected_type;
+  arraycopy_helper(x, &flags, &expected_type);
+
+  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint
+}
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  assert(UseCRC32Intrinsics, "why are we here?");
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      // val is destroyed by update_crc32
+      val.set_destroys_register();
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+       offset += off.result()->as_jint();
+      }
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+      }
+
+      if (offset) {
+        LIR_Opr tmp = new_pointer_register();
+        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
+        base_op = tmp;
+        offset = 0;
+      }
+
+      LIR_Address* a = new LIR_Address(base_op,
+                                       index,
+                                       LIR_Address::times_1,
+                                       offset,
+                                       T_BYTE);
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      len.load_item_force(cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+  LIRItem value(x->value(), this);
+  value.load_item();
+  LIR_Opr input = value.result();
+  LIR_Opr result = rlock(x);
+
+  // arguments of lir_convert
+  LIR_Opr conv_input = input;
+  LIR_Opr conv_result = result;
+  ConversionStub* stub = NULL;
+
+  __ convert(x->op(), conv_input, conv_result);
+
+  assert(result->is_virtual(), "result must be virtual register");
+  set_result(x, result);
+}
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+#ifndef PRODUCT
+  if (PrintNotLoaded && !x->klass()->is_loaded()) {
+    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
+  }
+#endif
+  CodeEmitInfo* info = state_for(x, x->state());
+  LIR_Opr reg = result_register_for(x->type());
+  new_instance(reg, x->klass(), x->is_unresolved(),
+                       FrameMap::r2_oop_opr,
+                       FrameMap::r5_oop_opr,
+                       FrameMap::r4_oop_opr,
+                       LIR_OprFact::illegalOpr,
+                       FrameMap::r3_metadata_opr, info);
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem length(x->length(), this);
+  length.load_item_force(FrameMap::r19_opr);
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::r2_oop_opr;
+  LIR_Opr tmp2 = FrameMap::r4_oop_opr;
+  LIR_Opr tmp3 = FrameMap::r5_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
+  LIR_Opr len = length.result();
+  BasicType elem_type = x->elt_type();
+
+  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+  LIRItem length(x->length(), this);
+  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
+  // and therefore provide the state before the parameters have been consumed
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info =  state_for(x, x->state_before());
+  }
+
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::r2_oop_opr;
+  LIR_Opr tmp2 = FrameMap::r4_oop_opr;
+  LIR_Opr tmp3 = FrameMap::r5_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
+
+  length.load_item_force(FrameMap::r19_opr);
+  LIR_Opr len = length.result();
+
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
+  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+  }
+  klass2reg_with_patching(klass_reg, obj, patching_info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+  Values* dims = x->dims();
+  int i = dims->length();
+  LIRItemList* items = new LIRItemList(dims->length(), NULL);
+  while (i-- > 0) {
+    LIRItem* size = new LIRItem(dims->at(i), this);
+    items->at_put(i, size);
+  }
+
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+
+    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+    // clone all handlers (NOTE: Usually this is handled transparently
+    // by the CodeEmitInfo cloning logic in CodeStub constructors but
+    // is done explicitly here because a stub isn't being used).
+    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+  }
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  i = dims->length();
+  while (i-- > 0) {
+    LIRItem* size = items->at(i);
+    size->load_item();
+
+    store_stack_parameter(size->result(), in_ByteSize(i*4));
+  }
+
+  LIR_Opr klass_reg = FrameMap::r0_metadata_opr;
+  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+  LIR_Opr rank = FrameMap::r19_opr;
+  __ move(LIR_OprFact::intConst(x->rank()), rank);
+  LIR_Opr varargs = FrameMap::r2_opr;
+  __ move(FrameMap::sp_opr, varargs);
+  LIR_OprList* args = new LIR_OprList(3);
+  args->append(klass_reg);
+  args->append(rank);
+  args->append(varargs);
+  LIR_Opr reg = result_register_for(x->type());
+  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+                  LIR_OprFact::illegalOpr,
+                  reg, args, info);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // nothing to do for now
+}
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+  LIRItem obj(x->obj(), this);
+
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+    // must do this before locking the destination register as an oop register,
+    // and before the obj is loaded (the latter is for deoptimization)
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+
+  // info for exceptions
+  CodeEmitInfo* info_for_exception = state_for(x);
+
+  CodeStub* stub;
+  if (x->is_incompatible_class_change_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+  } else {
+    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+  }
+  LIR_Opr reg = rlock_result(x);
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
+    tmp3 = new_register(objectType);
+  }
+  __ checkcast(reg, obj.result(), x->klass(),
+               new_register(objectType), new_register(objectType), tmp3,
+               x->direct_compare(), info_for_exception, patching_info, stub,
+               x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+  LIRItem obj(x->obj(), this);
+
+  // result and test object may not be in same register
+  LIR_Opr reg = rlock_result(x);
+  CodeEmitInfo* patching_info = NULL;
+  if ((!x->klass()->is_loaded() || PatchALot)) {
+    // must do this before locking the destination register as an oop register
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
+    tmp3 = new_register(objectType);
+  }
+  __ instanceof(reg, obj.result(), x->klass(),
+                new_register(objectType), new_register(objectType), tmp3,
+                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_If(If* x) {
+  assert(x->number_of_sux() == 2, "inconsistency");
+  ValueTag tag = x->x()->type()->tag();
+  bool is_safepoint = x->is_safepoint();
+
+  If::Condition cond = x->cond();
+
+  LIRItem xitem(x->x(), this);
+  LIRItem yitem(x->y(), this);
+  LIRItem* xin = &xitem;
+  LIRItem* yin = &yitem;
+
+  if (tag == longTag) {
+    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
+    // mirror for other conditions
+    if (cond == If::gtr || cond == If::leq) {
+      cond = Instruction::mirror(cond);
+      xin = &yitem;
+      yin = &xitem;
+    }
+    xin->set_destroys_register();
+  }
+  xin->load_item();
+
+  if (tag == longTag) {
+    if (yin->is_constant()
+        && Assembler::operand_valid_for_add_sub_immediate(yin->get_jlong_constant())) {
+      yin->dont_load_item();
+    } else {
+      yin->load_item();
+    }
+  } else if (tag == intTag) {
+    if (yin->is_constant()
+        && Assembler::operand_valid_for_add_sub_immediate(yin->get_jint_constant()))  {
+      yin->dont_load_item();
+    } else {
+      yin->load_item();
+    }
+  } else {
+    yin->load_item();
+  }
+
+  // add safepoint before generating condition code so it can be recomputed
+  if (x->is_safepoint()) {
+    // increment backedge counter if needed
+    increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
+    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+  }
+  set_no_result(x);
+
+  LIR_Opr left = xin->result();
+  LIR_Opr right = yin->result();
+
+  __ cmp(lir_cond(cond), left, right);
+  // Generate branch profiling. Profiling code doesn't kill flags.
+  profile_branch(x, cond);
+  move_to_phi(x->state());
+  if (x->x()->type()->is_float_kind()) {
+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+  } else {
+    __ branch(lir_cond(cond), right->type(), x->tsux());
+  }
+  assert(x->default_sux() == x->fsux(), "wrong destination above");
+  __ jump(x->default_sux());
+}
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+   return FrameMap::as_pointer_opr(rthread);
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+                                        CodeEmitInfo* info) {
+  __ volatile_store_mem_reg(value, address, info);
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+                                       CodeEmitInfo* info) {
+  __ volatile_load_mem_reg(address, result, info);
+}
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  __ load(addr, dst);
+}
+
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  if (is_obj) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(data, addr);
+    assert(src->is_register(), "must be register");
+    // Seems to be a precise address
+    post_barrier(LIR_OprFact::address(addr), data);
+  } else {
+    __ move(data, addr);
+  }
+}
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  BasicType type = x->basic_type();
+  LIRItem src(x->object(), this);
+  LIRItem off(x->offset(), this);
+  LIRItem value(x->value(), this);
+
+  src.load_item();
+  off.load_nonconstant();
+
+  // We can cope with a constant increment in an xadd
+  if (! (x->is_add()
+         && value.is_constant()
+         && can_inline_as_constant(x->value()))) {
+    value.load_item();
+  }
+
+  LIR_Opr dst = rlock_result(x, type);
+  LIR_Opr data = value.result();
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  LIR_Opr offset = off.result();
+
+  if (data == dst) {
+    LIR_Opr tmp = new_register(data->type());
+    __ move(data, tmp);
+    data = tmp;
+  }
+
+  LIR_Address* addr;
+  if (offset->is_constant()) {
+    jlong l = offset->as_jlong();
+    assert((jlong)((jint)l) == l, "offset too large for constant");
+    jint c = (jint)l;
+    addr = new LIR_Address(src.result(), c, type);
+  } else {
+    addr = new LIR_Address(src.result(), offset, type);
+  }
+
+  LIR_Opr tmp = new_register(T_INT);
+  LIR_Opr ptr = LIR_OprFact::illegalOpr;
+
+  if (x->is_add()) {
+    __ xadd(LIR_OprFact::address(addr), data, dst, tmp);
+  } else {
+    if (is_obj) {
+      // Do the pre-write barrier, if any.
+      ptr = new_pointer_register();
+      __ add(src.result(), off.result(), ptr);
+      pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */,
+                  true /* do_load */, false /* patch */, NULL);
+    }
+    __ xchg(LIR_OprFact::address(addr), data, dst, tmp);
+    if (is_obj) {
+      post_barrier(ptr, data);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/bitMap.inline.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+  // No FPU stack on AArch64
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_LinearScan_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_LINEARSCAN_HPP
+#define CPU_AARCH64_VM_C1_LINEARSCAN_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+  return 1;
+}
+
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+  return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+  if (assigned_reg < pd_first_callee_saved_reg)
+    return true;
+  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
+    return true;
+  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
+    return true;
+  return false;
+}
+
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+  // FIXME ??
+}
+
+
+// Implementation of LinearScanWalker
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
+    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
+    _first_reg = pd_first_callee_saved_reg;
+    _last_reg = pd_last_callee_saved_reg;
+    return true;
+  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
+    _first_reg = pd_first_cpu_reg;
+    _last_reg = pd_last_allocatable_cpu_reg;
+    return true;
+  }
+  return false;
+}
+
+
+#endif // CPU_AARCH64_VM_C1_LINEARSCAN_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc_interface/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/stubRoutines.hpp"
+
+void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
+                                  FloatRegister f0, FloatRegister f1,
+                                  Register result)
+{
+  Label done;
+  if (is_float) {
+    fcmps(f0, f1);
+  } else {
+    fcmpd(f0, f1);
+  }
+  if (unordered_result < 0) {
+    // we want -1 for unordered or less than, 0 for equal and 1 for
+    // greater than.
+    cset(result, NE);  // Not equal or unordered
+    cneg(result, result, LT);  // Less than or unordered
+  } else {
+    // we want -1 for less than, 0 for equal and 1 for unordered or
+    // greater than.
+    cset(result, NE);  // Not equal or unordered
+    cneg(result, result, LO);  // Less than
+  }
+}
+
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
+  const int aligned_mask = BytesPerWord -1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  Label done, fail;
+  int null_check_offset = -1;
+
+  verify_oop(obj);
+
+  // save object being locked into the BasicObjectLock
+  str(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+
+  if (UseBiasedLocking) {
+    assert(scratch != noreg, "should have scratch register at this point");
+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
+  } else {
+    null_check_offset = offset();
+  }
+
+  // Load object header
+  ldr(hdr, Address(obj, hdr_offset));
+  // and mark it as unlocked
+  orr(hdr, hdr, markOopDesc::unlocked_value);
+  // save unlocked object header into the displaced header location on the stack
+  str(hdr, Address(disp_hdr, 0));
+  // test if object header is still the same (i.e. unlocked), and if so, store the
+  // displaced header address in the object header - if it is not the same, get the
+  // object header instead
+  lea(rscratch2, Address(obj, hdr_offset));
+  cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
+  // if the object header was the same, we're done
+  // if the object header was not the same, it is now in the hdr register
+  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
+  //
+  // 1) (hdr & aligned_mask) == 0
+  // 2) sp <= hdr
+  // 3) hdr <= sp + page_size
+  //
+  // these 3 tests can be done by evaluating the following expression:
+  //
+  // (hdr - sp) & (aligned_mask - page_size)
+  //
+  // assuming both the stack pointer and page_size have their least
+  // significant 2 bits cleared and page_size is a power of 2
+  mov(rscratch1, sp);
+  sub(hdr, hdr, rscratch1);
+  ands(hdr, hdr, aligned_mask - os::vm_page_size());
+  // for recursive locking, the result is zero => save it in the displaced header
+  // location (NULL in the displaced hdr location indicates recursive locking)
+  str(hdr, Address(disp_hdr, 0));
+  // otherwise we don't care about the result and handle locking via runtime call
+  cbnz(hdr, slow_case);
+  // done
+  bind(done);
+  if (PrintBiasedLockingStatistics) {
+    lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
+    addmw(Address(rscratch2, 0), 1, rscratch1);
+  }
+  return null_check_offset;
+}
+
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+  const int aligned_mask = BytesPerWord -1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  Label done;
+
+  if (UseBiasedLocking) {
+    // load object
+    ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+    biased_locking_exit(obj, hdr, done);
+  }
+
+  // load displaced header
+  ldr(hdr, Address(disp_hdr, 0));
+  // if the loaded hdr is NULL we had recursive locking
+  // if we had recursive locking, we are done
+  cbz(hdr, done);
+  if (!UseBiasedLocking) {
+    // load object
+    ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+  }
+  verify_oop(obj);
+  // test if object header is pointing to the displaced header, and if so, restore
+  // the displaced header in the object - if the object header is not pointing to
+  // the displaced header, get the object header instead
+  // if the object header was not pointing to the displaced header,
+  // we do unlocking via runtime call
+  if (hdr_offset) {
+    lea(rscratch1, Address(obj, hdr_offset));
+    cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
+  } else {
+    cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
+  }
+  // done
+  bind(done);
+}
+
+
+// Defines obj, preserves var_size_in_bytes
+void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) {
+  if (UseTLAB) {
+    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+  } else {
+    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+    incr_allocated_bytes(noreg, var_size_in_bytes, con_size_in_bytes, t1);
+  }
+}
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
+  assert_different_registers(obj, klass, len);
+  if (UseBiasedLocking && !len->is_valid()) {
+    assert_different_registers(obj, klass, len, t1, t2);
+    ldr(t1, Address(klass, Klass::prototype_header_offset()));
+  } else {
+    // This assumes that all prototype bits fit in an int32_t
+    mov(t1, (int32_t)(intptr_t)markOopDesc::prototype());
+  }
+  str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+  if (UseCompressedClassPointers) { // Take care not to kill klass
+    encode_klass_not_null(t1, klass);
+    strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
+  } else {
+    str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+
+  if (len->is_valid()) {
+    strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
+  } else if (UseCompressedClassPointers) {
+    store_klass_gap(obj, zr);
+  }
+}
+
+// Zero words; len is in bytes
+// Destroys all registers except addr
+// len must be a nonzero multiple of wordSize
+void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
+  assert_different_registers(addr, len, t1, rscratch1, rscratch2);
+
+#ifdef ASSERT
+  { Label L;
+    tst(len, BytesPerWord - 1);
+    br(Assembler::EQ, L);
+    stop("len is not a multiple of BytesPerWord");
+    bind(L);
+  }
+#endif
+
+#ifndef PRODUCT
+  block_comment("zero memory");
+#endif
+
+  Label loop;
+  Label entry;
+
+//  Algorithm:
+//
+//    scratch1 = cnt & 7;
+//    cnt -= scratch1;
+//    p += scratch1;
+//    switch (scratch1) {
+//      do {
+//        cnt -= 8;
+//          p[-8] = 0;
+//        case 7:
+//          p[-7] = 0;
+//        case 6:
+//          p[-6] = 0;
+//          // ...
+//        case 1:
+//          p[-1] = 0;
+//        case 0:
+//          p += 8;
+//      } while (cnt);
+//    }
+
+  const int unroll = 8; // Number of str(zr) instructions we'll unroll
+
+  lsr(len, len, LogBytesPerWord);
+  andr(rscratch1, len, unroll - 1);  // tmp1 = cnt % unroll
+  sub(len, len, rscratch1);      // cnt -= unroll
+  // t1 always points to the end of the region we're about to zero
+  add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
+  adr(rscratch2, entry);
+  sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
+  br(rscratch2);
+  bind(loop);
+  sub(len, len, unroll);
+  for (int i = -unroll; i < 0; i++)
+    str(zr, Address(t1, i * wordSize));
+  bind(entry);
+  add(t1, t1, unroll * wordSize);
+  cbnz(len, loop);
+}
+
+// preserves obj, destroys len_in_bytes
+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+  Label done;
+  assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
+  assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
+  Register index = len_in_bytes;
+  // index is positive and ptr sized
+  subs(index, index, hdr_size_in_bytes);
+  br(Assembler::EQ, done);
+  // note: for the remaining code to work, index must be a multiple of BytesPerWord
+#ifdef ASSERT
+  { Label L;
+    tst(index, BytesPerWord - 1);
+    br(Assembler::EQ, L);
+    stop("index is not a multiple of BytesPerWord");
+    bind(L);
+  }
+#endif
+
+  // Preserve obj
+  if (hdr_size_in_bytes)
+    add(obj, obj, hdr_size_in_bytes);
+  zero_memory(obj, index, t1);
+  if (hdr_size_in_bytes)
+    sub(obj, obj, hdr_size_in_bytes);
+
+  // done
+  bind(done);
+}
+
+
+void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
+  assert_different_registers(obj, t1, t2); // XXX really?
+  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
+
+  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
+
+  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+}
+
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
+         "con_size_in_bytes is not multiple of alignment");
+  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+  initialize_header(obj, klass, noreg, t1, t2);
+
+  // clear rest of allocated space
+  const Register index = t2;
+  const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
+  if (var_size_in_bytes != noreg) {
+    mov(index, var_size_in_bytes);
+    initialize_body(obj, index, hdr_size_in_bytes, t1);
+  } else if (con_size_in_bytes <= threshold) {
+    // use explicit null stores
+    int i = hdr_size_in_bytes;
+    if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
+      str(zr, Address(obj, i));
+      i += BytesPerWord;
+    }
+    for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
+      stp(zr, zr, Address(obj, i));
+  } else if (con_size_in_bytes > hdr_size_in_bytes) {
+    block_comment("zero memory");
+    // use loop to null out the fields
+
+    int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
+    mov(index,  words / 8);
+
+    const int unroll = 8; // Number of str(zr) instructions we'll unroll
+    int remainder = words % unroll;
+    lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
+
+    Label entry_point, loop;
+    b(entry_point);
+
+    bind(loop);
+    sub(index, index, 1);
+    for (int i = -unroll; i < 0; i++) {
+      if (-i == remainder)
+        bind(entry_point);
+      str(zr, Address(rscratch1, i * wordSize));
+    }
+    if (remainder == 0)
+      bind(entry_point);
+    add(rscratch1, rscratch1, unroll * wordSize);
+    cbnz(index, loop);
+
+  }
+
+  membar(StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == r0, "must be");
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+
+  verify_oop(obj);
+}
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) {
+  assert_different_registers(obj, len, t1, t2, klass);
+
+  // determine alignment mask
+  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
+
+  // check for negative or excessive length
+  mov(rscratch1, (int32_t)max_array_allocation_length);
+  cmp(len, rscratch1);
+  br(Assembler::HS, slow_case);
+
+  const Register arr_size = t2; // okay to be the same
+  // align object end
+  mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
+  add(arr_size, arr_size, len, ext::uxtw, f);
+  andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
+
+  initialize_header(obj, klass, len, t1, t2);
+
+  // clear rest of allocated space
+  const Register len_zero = len;
+  initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
+
+  membar(StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == r0, "must be");
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+
+  verify_oop(obj);
+}
+
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+  verify_oop(receiver);
+  // explicit NULL check not needed since load from [klass_offset] causes a trap
+  // check against inline cache
+  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
+
+  cmp_klass(receiver, iCache, rscratch1);
+}
+
+
+void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
+  // If we have to make this method not-entrant we'll overwrite its
+  // first instruction with a jump.  For this action to be legal we
+  // must ensure that this first instruction is a B, BL, NOP, BKPT,
+  // SVC, HVC, or SMC.  Make it a NOP.
+  nop();
+  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
+  // Make sure there is enough stack space for this method's activation.
+  // Note that we do this before doing an enter().
+  generate_stack_overflow_check(bang_size_in_bytes);
+  MacroAssembler::build_frame(framesize + 2 * wordSize);
+  if (NotifySimulator) {
+    notify(Assembler::method_entry);
+  }
+}
+
+void C1_MacroAssembler::remove_frame(int framesize) {
+  MacroAssembler::remove_frame(framesize + 2 * wordSize);
+  if (NotifySimulator) {
+    notify(Assembler::method_reentry);
+  }
+}
+
+
+void C1_MacroAssembler::verified_entry() {
+}
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+  if (!VerifyOops) return;
+  verify_oop_addr(Address(sp, stack_offset), "oop");
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+  if (!VerifyOops) return;
+  Label not_null;
+  cbnz(r, not_null);
+  stop("non-null oop required");
+  bind(not_null);
+  verify_oop(r);
+}
+
+void C1_MacroAssembler::invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) {
+#ifdef ASSERT
+  static int nn;
+  if (inv_r0) mov(r0, 0xDEAD);
+  if (inv_r19) mov(r19, 0xDEAD);
+  if (inv_r2) mov(r2, nn++);
+  if (inv_r3) mov(r3, 0xDEAD);
+  if (inv_r4) mov(r4, 0xDEAD);
+  if (inv_r5) mov(r5, 0xDEAD);
+#endif
+}
+#endif // ifndef PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
+
+using MacroAssembler::build_frame;
+
+// C1_MacroAssembler contains high-level macros for C1
+
+ private:
+  int _rsp_offset;    // track rsp changes
+  // initialization
+  void pd_init() { _rsp_offset = 0; }
+
+void zero_memory(Register addr, Register len, Register t1);
+
+ public:
+  void try_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
+  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1);
+
+  void float_cmp(bool is_float, int unordered_result,
+                 FloatRegister f0, FloatRegister f1,
+                 Register result);
+
+  // locking
+  // hdr     : must be r0, contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must point to the displaced header location, contents preserved
+  // scratch : scratch register, contents destroyed
+  // returns code offset at which to add null check debug information
+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
+
+  // unlocking
+  // hdr     : contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must be r0 & must point to the displaced header location, contents destroyed
+  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
+
+  void initialize_object(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register klass,                    // object klass
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2                        // temp register
+  );
+
+  // allocation of fixed-size objects
+  // (can also be used to allocate fixed-size arrays, by setting
+  // hdr_size correctly and storing the array length afterwards)
+  // obj        : will contain pointer to allocated object
+  // t1, t2     : scratch registers - contents destroyed
+  // header_size: size of object header in words
+  // object_size: total size of object in words
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case);
+
+  enum {
+    max_array_allocation_length = 0x00FFFFFF
+  };
+
+  // allocation of arrays
+  // obj        : will contain pointer to allocated object
+  // len        : array length in number of elements
+  // t          : scratch register - contents destroyed
+  // header_size: size of object header in words
+  // f          : element scale factor
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, int f, Register klass, Label& slow_case);
+
+  int  rsp_offset() const { return _rsp_offset; }
+  void set_rsp_offset(int n) { _rsp_offset = n; }
+
+  void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
+
+#endif // CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "compiler/disassembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_aarch64.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/vframe.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
+  // setup registers
+  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different");
+  assert(oop_result1 != rthread && metadata_result != rthread, "registers must be different");
+  assert(args_size >= 0, "illegal args_size");
+  bool align_stack = false;
+
+  mov(c_rarg0, rthread);
+  set_num_rt_args(0); // Nothing on stack
+
+  Label retaddr;
+  set_last_Java_frame(sp, rfp, retaddr, rscratch1);
+
+  // do the call
+  lea(rscratch1, RuntimeAddress(entry));
+  blrt(rscratch1, args_size + 1, 8, 1);
+  bind(retaddr);
+  int call_offset = offset();
+  // verify callee-saved register
+#ifdef ASSERT
+  push(r0, sp);
+  { Label L;
+    get_thread(r0);
+    cmp(rthread, r0);
+    br(Assembler::EQ, L);
+    stop("StubAssembler::call_RT: rthread not callee saved?");
+    bind(L);
+  }
+  pop(r0, sp);
+#endif
+  reset_last_Java_frame(true, true);
+  maybe_isb();
+
+  // check for pending exceptions
+  { Label L;
+    // check for pending exceptions (java_thread is set upon return)
+    ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    cbz(rscratch1, L);
+    // exception pending => remove activation and forward to exception handler
+    // make sure that the vm_results are cleared
+    if (oop_result1->is_valid()) {
+      str(zr, Address(rthread, JavaThread::vm_result_offset()));
+    }
+    if (metadata_result->is_valid()) {
+      str(zr, Address(rthread, JavaThread::vm_result_2_offset()));
+    }
+    if (frame_size() == no_frame_size) {
+      leave();
+      far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+    } else if (_stub_id == Runtime1::forward_exception_id) {
+      should_not_reach_here();
+    } else {
+      far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+    }
+    bind(L);
+  }
+  // get oop results if there are any and reset the values in the thread
+  if (oop_result1->is_valid()) {
+    get_vm_result(oop_result1, rthread);
+  }
+  if (metadata_result->is_valid()) {
+    get_vm_result_2(metadata_result, rthread);
+  }
+  return call_offset;
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
+  mov(c_rarg1, arg1);
+  return call_RT(oop_result1, metadata_result, entry, 1);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
+  if (c_rarg1 == arg2) {
+    if (c_rarg2 == arg1) {
+      mov(rscratch1, arg1);
+      mov(arg1, arg2);
+      mov(arg2, rscratch1);
+    } else {
+      mov(c_rarg2, arg2);
+      mov(c_rarg1, arg1);
+    }
+  } else {
+    mov(c_rarg1, arg1);
+    mov(c_rarg2, arg2);
+  }
+  return call_RT(oop_result1, metadata_result, entry, 2);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
+  // if there is any conflict use the stack
+  if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
+      arg2 == c_rarg1 || arg1 == c_rarg3 ||
+      arg3 == c_rarg1 || arg1 == c_rarg2) {
+    stp(arg3, arg2, Address(pre(sp, 2 * wordSize)));
+    stp(arg1, zr, Address(pre(sp, -2 * wordSize)));
+    ldp(c_rarg1, zr, Address(post(sp, 2 * wordSize)));
+    ldp(c_rarg3, c_rarg2, Address(post(sp, 2 * wordSize)));
+  } else {
+    mov(c_rarg1, arg1);
+    mov(c_rarg2, arg2);
+    mov(c_rarg3, arg3);
+  }
+  return call_RT(oop_result1, metadata_result, entry, 3);
+}
+
+// Implementation of StubFrame
+
+class StubFrame: public StackObj {
+ private:
+  StubAssembler* _sasm;
+
+ public:
+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
+  void load_argument(int offset_in_words, Register reg);
+
+  ~StubFrame();
+};;
+
+
+#define __ _sasm->
+
+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
+  _sasm = sasm;
+  __ set_info(name, must_gc_arguments);
+  __ enter();
+}
+
+// load parameters that were stored with LIR_Assembler::store_parameter
+// Note: offsets for store_parameter and load_argument must match
+void StubFrame::load_argument(int offset_in_words, Register reg) {
+  // rbp, + 0: link
+  //     + 1: return address
+  //     + 2: argument with offset 0
+  //     + 3: argument with offset 1
+  //     + 4: ...
+
+  __ ldr(reg, Address(rfp, (offset_in_words + 2) * BytesPerWord));
+}
+
+
+StubFrame::~StubFrame() {
+  __ leave();
+  __ ret(lr);
+}
+
+#undef __
+
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
+
+// Stack layout for saving/restoring  all the registers needed during a runtime
+// call (this includes deoptimization)
+// Note: note that users of this frame may well have arguments to some runtime
+// while these values are on the stack. These positions neglect those arguments
+// but the code in save_live_registers will take the argument count into
+// account.
+//
+
+enum reg_save_layout {
+  reg_save_frame_size = 32 /* float */ + 32 /* integer */
+};
+
+// Save off registers which might be killed by calls into the runtime.
+// Tries to smart of about FP registers.  In particular we separate
+// saving and describing the FPU registers for deoptimization since we
+// have to save the FPU registers twice if we describe them.  The
+// deopt blob is the only thing which needs to describe FPU registers.
+// In all other cases it should be sufficient to simply save their
+// current value.
+
+static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
+static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
+static int reg_save_size_in_words;
+static int frame_size_in_bytes = -1;
+
+static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
+  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
+  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+  int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+
+  for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    if (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding()) {
+      int sp_offset = cpu_reg_save_offsets[i];
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                                r->as_VMReg());
+    }
+  }
+
+  if (save_fpu_registers) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      FloatRegister r = as_FloatRegister(i);
+      {
+        int sp_offset = fpu_reg_save_offsets[i];
+        oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                                  r->as_VMReg());
+      }
+    }
+  }
+  return oop_map;
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm,
+                                   bool save_fpu_registers = true) {
+  __ block_comment("save_live_registers");
+
+  __ push(RegSet::range(r0, r29), sp);         // integer registers except lr & sp
+
+  if (save_fpu_registers) {
+    for (int i = 30; i >= 0; i -= 2)
+      __ stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ pre(sp, -2 * wordSize)));
+  } else {
+    __ add(sp, sp, -32 * wordSize);
+  }
+
+  return generate_oop_map(sasm, save_fpu_registers);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  if (restore_fpu_registers) {
+    for (int i = 0; i < 32; i += 2)
+      __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ post(sp, 2 * wordSize)));
+  } else {
+    __ add(sp, sp, 32 * wordSize);
+  }
+
+  __ pop(RegSet::range(r0, r29), sp);
+}
+
+static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
+
+  if (restore_fpu_registers) {
+    for (int i = 0; i < 32; i += 2)
+      __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+              Address(__ post(sp, 2 * wordSize)));
+  } else {
+    __ add(sp, sp, 32 * wordSize);
+  }
+
+  __ ldp(zr, r1, Address(__ post(sp, 16)));
+  __ pop(RegSet::range(r2, r29), sp);
+}
+
+
+
+void Runtime1::initialize_pd() {
+  int i;
+  int sp_offset = 0;
+
+  // all float registers are saved explicitly
+  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
+  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    fpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += 2;   // SP offsets are in halfwords
+  }
+
+  for (i = 0; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    cpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += 2;   // SP offsets are in halfwords
+  }
+}
+
+
+// target: the entry point of the method that creates and posts the exception oop
+// has_argument: true if the exception needs an argument (passed in rscratch1)
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+  // make a frame and preserve the caller's caller-save registers
+  OopMap* oop_map = save_live_registers(sasm);
+  int call_offset;
+  if (!has_argument) {
+    call_offset = __ call_RT(noreg, noreg, target);
+  } else {
+    call_offset = __ call_RT(noreg, noreg, target, rscratch1);
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  __ should_not_reach_here();
+  return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+  __ block_comment("generate_handle_exception");
+
+  // incoming parameters
+  const Register exception_oop = r0;
+  const Register exception_pc  = r3;
+  // other registers used in this stub
+
+  // Save registers, if required.
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* oop_map = NULL;
+  switch (id) {
+  case forward_exception_id:
+    // We're handling an exception in the context of a compiled frame.
+    // The registers have been saved in the standard places.  Perform
+    // an exception lookup in the caller and dispatch to the handler
+    // if found.  Otherwise unwind and dispatch to the callers
+    // exception handler.
+    oop_map = generate_oop_map(sasm, 1 /*thread*/);
+
+    // load and clear pending exception oop into r0
+    __ ldr(exception_oop, Address(rthread, Thread::pending_exception_offset()));
+    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
+
+    // load issuing PC (the return address for this stub) into r3
+    __ ldr(exception_pc, Address(rfp, 1*BytesPerWord));
+
+    // make sure that the vm_results are cleared (may be unnecessary)
+    __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
+    __ str(zr, Address(rthread, JavaThread::vm_result_2_offset()));
+    break;
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // At this point all registers MAY be live.
+    oop_map = save_live_registers(sasm, id == handle_exception_nofpu_id);
+    break;
+  case handle_exception_from_callee_id: {
+    // At this point all registers except exception oop (r0) and
+    // exception pc (lr) are dead.
+    const int frame_size = 2 /*fp, return address*/;
+    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
+    sasm->set_frame_size(frame_size);
+    break;
+  }
+  default:
+    __ should_not_reach_here();
+    break;
+  }
+
+  // verify that only r0 and r3 are valid at this time
+  __ invalidate_registers(false, true, true, false, true, true);
+  // verify that r0 contains a valid exception
+  __ verify_not_null_oop(exception_oop);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are
+  // empty before writing to them
+  Label oop_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
+  __ cbz(rscratch1, oop_empty);
+  __ stop("exception oop already set");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
+  __ cbz(rscratch1, pc_empty);
+  __ stop("exception pc already set");
+  __ bind(pc_empty);
+#endif
+
+  // save exception oop and issuing pc into JavaThread
+  // (exception handler will load it from here)
+  __ str(exception_oop, Address(rthread, JavaThread::exception_oop_offset()));
+  __ str(exception_pc, Address(rthread, JavaThread::exception_pc_offset()));
+
+  // patch throwing pc into return address (has bci & oop map)
+  __ str(exception_pc, Address(rfp, 1*BytesPerWord));
+
+  // compute the exception handler.
+  // the exception oop and the throwing pc are read from the fields in JavaThread
+  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  // r0: handler address
+  //      will be the deopt blob if nmethod was deoptimized while we looked up
+  //      handler regardless of whether handler existed in the nmethod.
+
+  // only r0 is valid at this time, all other registers have been destroyed by the runtime call
+  __ invalidate_registers(false, true, true, true, true, true);
+
+  // patch the return address, this stub will directly return to the exception handler
+  __ str(r0, Address(rfp, 1*BytesPerWord));
+
+  switch (id) {
+  case forward_exception_id:
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // Restore the registers that were saved at the beginning.
+    restore_live_registers(sasm, id == handle_exception_nofpu_id);
+    break;
+  case handle_exception_from_callee_id:
+    // Pop the return address since we are possibly changing SP (restoring from BP).
+    __ leave();
+
+    // Restore SP from FP if the exception PC is a method handle call site.
+    {
+      Label nope;
+      __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
+      __ cbzw(rscratch1, nope);
+      __ mov(sp, rfp);
+      __ bind(nope);
+    }
+
+    __ ret(lr);  // jump to exception handler
+    break;
+  default:  ShouldNotReachHere();
+  }
+
+  return oop_maps;
+}
+
+
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
+  // incoming parameters
+  const Register exception_oop = r0;
+  // callee-saved copy of exception_oop during runtime call
+  const Register exception_oop_callee_saved = r19;
+  // other registers used in this stub
+  const Register exception_pc = r3;
+  const Register handler_addr = r1;
+
+  // verify that only r0, is valid at this time
+  __ invalidate_registers(false, true, true, true, true, true);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are empty
+  Label oop_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
+  __ cbz(rscratch1, oop_empty);
+  __ stop("exception oop must be empty");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
+  __ cbz(rscratch1, pc_empty);
+  __ stop("exception pc must be empty");
+  __ bind(pc_empty);
+#endif
+
+  // Save our return address because
+  // exception_handler_for_return_address will destroy it.  We also
+  // save exception_oop
+  __ stp(lr, exception_oop, Address(__ pre(sp, -2 * wordSize)));
+
+  // search the exception handler address of the caller (using the return address)
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, lr);
+  // r0: exception handler address of the caller
+
+  // Only R0 is valid at this time; all other registers have been
+  // destroyed by the call.
+  __ invalidate_registers(false, true, true, true, false, true);
+
+  // move result of call into correct register
+  __ mov(handler_addr, r0);
+
+  // get throwing pc (= return address).
+  // lr has been destroyed by the call
+  __ ldp(lr, exception_oop, Address(__ post(sp, 2 * wordSize)));
+  __ mov(r3, lr);
+
+  __ verify_not_null_oop(exception_oop);
+
+  {
+    Label foo;
+    __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
+    __ cbzw(rscratch1, foo);
+    __ mov(sp, rfp);
+    __ bind(foo);
+  }
+
+  // continue at exception handler (return address removed)
+  // note: do *not* remove arguments when unwinding the
+  //       activation since the caller assumes having
+  //       all arguments on the stack when entering the
+  //       runtime to determine the exception handler
+  //       (GC happens at call site with arguments!)
+  // r0: exception oop
+  // r3: throwing pc
+  // r1: exception handler
+  __ br(handler_addr);
+}
+
+
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+  // use the maximum number of runtime-arguments here because it is difficult to
+  // distinguish each RT-Call.
+  // Note: This number affects also the RT-Call in generate_handle_exception because
+  //       the oop-map is shared for all calls.
+  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+  assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+  OopMap* oop_map = save_live_registers(sasm);
+
+  __ mov(c_rarg0, rthread);
+  Label retaddr;
+  __ set_last_Java_frame(sp, rfp, retaddr, rscratch1);
+  // do the call
+  __ lea(rscratch1, RuntimeAddress(target));
+  __ blrt(rscratch1, 1, 0, 1);
+  __ bind(retaddr);
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(__ offset(), oop_map);
+  // verify callee-saved register
+#ifdef ASSERT
+  { Label L;
+    __ get_thread(rscratch1);
+    __ cmp(rthread, rscratch1);
+    __ br(Assembler::EQ, L);
+    __ stop("StubAssembler::call_RT: rthread not callee saved?");
+    __ bind(L);
+  }
+#endif
+  __ reset_last_Java_frame(true, false);
+  __ maybe_isb();
+
+  // check for pending exceptions
+  { Label L;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbz(rscratch1, L);
+    // exception pending => remove activation and forward to exception handler
+
+    { Label L1;
+      __ cbnz(r0, L1);                                  // have we deoptimized?
+      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+      __ bind(L1);
+    }
+
+    // the deopt blob expects exceptions in the special fields of
+    // JavaThread, so copy and clear pending exception.
+
+    // load and clear pending exception
+    __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
+    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
+
+    // check that there is really a valid exception
+    __ verify_not_null_oop(r0);
+
+    // load throwing pc: this is the return address of the stub
+    __ mov(r3, lr);
+
+#ifdef ASSERT
+    // check that fields in JavaThread for exception oop and issuing pc are empty
+    Label oop_empty;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbz(rscratch1, oop_empty);
+    __ stop("exception oop must be empty");
+    __ bind(oop_empty);
+
+    Label pc_empty;
+    __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
+    __ cbz(rscratch1, pc_empty);
+    __ stop("exception pc must be empty");
+    __ bind(pc_empty);
+#endif
+
+    // store exception oop and throwing pc to JavaThread
+    __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
+    __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
+
+    restore_live_registers(sasm);
+
+    __ leave();
+
+    // Forward the exception directly to deopt blob. We can blow no
+    // registers and must leave throwing pc on the stack.  A patch may
+    // have values live in registers so the entry point with the
+    // exception in tls.
+    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
+
+    __ bind(L);
+  }
+
+
+  // Runtime will return true if the nmethod has been deoptimized during
+  // the patching process. In that case we must do a deopt reexecute instead.
+
+  Label reexecuteEntry, cont;
+
+  __ cbz(r0, cont);                                 // have we deoptimized?
+
+  // Will reexecute. Proper return address is already on the stack we just restore
+  // registers, pop all of our frame but the return address and jump to the deopt blob
+  restore_live_registers(sasm);
+  __ leave();
+  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+
+  __ bind(cont);
+  restore_live_registers(sasm);
+  __ leave();
+  __ ret(lr);
+
+  return oop_maps;
+}
+
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+
+  const Register exception_oop = r0;
+  const Register exception_pc  = r3;
+
+  // for better readability
+  const bool must_gc_arguments = true;
+  const bool dont_gc_arguments = false;
+
+  // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
+  bool save_fpu_registers = true;
+
+  // stub code & info for the different stubs
+  OopMapSet* oop_maps = NULL;
+  OopMap* oop_map = NULL;
+  switch (id) {
+    {
+    case forward_exception_id:
+      {
+        oop_maps = generate_handle_exception(id, sasm);
+        __ leave();
+        __ ret(lr);
+      }
+      break;
+
+    case throw_div0_exception_id:
+      { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+      }
+      break;
+
+    case throw_null_pointer_exception_id:
+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+      }
+      break;
+
+    case new_instance_id:
+    case fast_new_instance_id:
+    case fast_new_instance_init_check_id:
+      {
+        Register klass = r3; // Incoming
+        Register obj   = r0; // Result
+
+        if (id == new_instance_id) {
+          __ set_info("new_instance", dont_gc_arguments);
+        } else if (id == fast_new_instance_id) {
+          __ set_info("fast new_instance", dont_gc_arguments);
+        } else {
+          assert(id == fast_new_instance_init_check_id, "bad StubID");
+          __ set_info("fast new_instance init check", dont_gc_arguments);
+        }
+
+        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
+            UseTLAB && FastTLABRefill) {
+          Label slow_path;
+          Register obj_size = r2;
+          Register t1       = r19;
+          Register t2       = r4;
+          assert_different_registers(klass, obj, obj_size, t1, t2);
+
+          __ stp(r5, r19, Address(__ pre(sp, -2 * wordSize)));
+
+          if (id == fast_new_instance_init_check_id) {
+            // make sure the klass is initialized
+            __ ldrb(rscratch1, Address(klass, InstanceKlass::init_state_offset()));
+            __ cmpw(rscratch1, InstanceKlass::fully_initialized);
+            __ br(Assembler::NE, slow_path);
+          }
+
+#ifdef ASSERT
+          // assert object can be fast path allocated
+          {
+            Label ok, not_ok;
+            __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
+            __ cmp(obj_size, 0u);
+            __ br(Assembler::LE, not_ok);  // make sure it's an instance (LH > 0)
+            __ tstw(obj_size, Klass::_lh_instance_slow_path_bit);
+            __ br(Assembler::EQ, ok);
+            __ bind(not_ok);
+            __ stop("assert(can be fast path allocated)");
+            __ should_not_reach_here();
+            __ bind(ok);
+          }
+#endif // ASSERT
+
+          // if we got here then the TLAB allocation failed, so try
+          // refilling the TLAB or allocating directly from eden.
+          Label retry_tlab, try_eden;
+          __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy r3 (klass), returns r5
+
+          __ bind(retry_tlab);
+
+          // get the instance size (size is postive so movl is fine for 64bit)
+          __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
+
+          __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
+
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ verify_oop(obj);
+          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+          __ ret(lr);
+
+          __ bind(try_eden);
+          // get the instance size (size is postive so movl is fine for 64bit)
+          __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
+
+          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
+          __ incr_allocated_bytes(rthread, obj_size, 0, rscratch1);
+
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ verify_oop(obj);
+          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+          __ ret(lr);
+
+          __ bind(slow_path);
+          __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize)));
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+        __ verify_oop(obj);
+        __ leave();
+        __ ret(lr);
+
+        // r0,: new instance
+      }
+
+      break;
+
+    case counter_overflow_id:
+      {
+        Register bci = r0, method = r1;
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        // Retrieve bci
+        __ ldrw(bci, Address(rfp, 2*BytesPerWord));
+        // And a pointer to the Method*
+        __ ldr(method, Address(rfp, 3*BytesPerWord));
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        __ ret(lr);
+      }
+      break;
+
+    case new_type_array_id:
+    case new_object_array_id:
+      {
+        Register length   = r19; // Incoming
+        Register klass    = r3; // Incoming
+        Register obj      = r0; // Result
+
+        if (id == new_type_array_id) {
+          __ set_info("new_type_array", dont_gc_arguments);
+        } else {
+          __ set_info("new_object_array", dont_gc_arguments);
+        }
+
+#ifdef ASSERT
+        // assert object type is really an array of the proper kind
+        {
+          Label ok;
+          Register t0 = obj;
+          __ ldrw(t0, Address(klass, Klass::layout_helper_offset()));
+          __ asrw(t0, t0, Klass::_lh_array_tag_shift);
+          int tag = ((id == new_type_array_id)
+                     ? Klass::_lh_array_tag_type_value
+                     : Klass::_lh_array_tag_obj_value);
+          __ mov(rscratch1, tag);
+          __ cmpw(t0, rscratch1);
+          __ br(Assembler::EQ, ok);
+          __ stop("assert(is an array klass)");
+          __ should_not_reach_here();
+          __ bind(ok);
+        }
+#endif // ASSERT
+
+        if (UseTLAB && FastTLABRefill) {
+          Register arr_size = r4;
+          Register t1       = r2;
+          Register t2       = r5;
+          Label slow_path;
+          assert_different_registers(length, klass, obj, arr_size, t1, t2);
+
+          // check that array length is small enough for fast path.
+          __ mov(rscratch1, C1_MacroAssembler::max_array_allocation_length);
+          __ cmpw(length, rscratch1);
+          __ br(Assembler::HI, slow_path);
+
+          // if we got here then the TLAB allocation failed, so try
+          // refilling the TLAB or allocating directly from eden.
+          Label retry_tlab, try_eden;
+          const Register thread =
+            __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves r19 & r3, returns rthread
+
+          __ bind(retry_tlab);
+
+          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
+          // since size is positive ldrw does right thing on 64bit
+          __ ldrw(t1, Address(klass, Klass::layout_helper_offset()));
+          __ lslvw(arr_size, length, t1);
+          __ ubfx(t1, t1, Klass::_lh_header_size_shift,
+                  exact_log2(Klass::_lh_header_size_mask + 1));
+          __ add(arr_size, arr_size, t1);
+          __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
+          __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+          __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
+
+          __ initialize_header(obj, klass, length, t1, t2);
+          __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
+          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
+          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
+          __ andr(t1, t1, Klass::_lh_header_size_mask);
+          __ sub(arr_size, arr_size, t1);  // body length
+          __ add(t1, t1, obj);       // body start
+          __ initialize_body(t1, arr_size, 0, t2);
+          __ verify_oop(obj);
+
+          __ ret(lr);
+
+          __ bind(try_eden);
+          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
+          // since size is positive ldrw does right thing on 64bit
+          __ ldrw(t1, Address(klass, Klass::layout_helper_offset()));
+          // since size is postive movw does right thing on 64bit
+          __ movw(arr_size, length);
+          __ lslvw(arr_size, length, t1);
+          __ ubfx(t1, t1, Klass::_lh_header_size_shift,
+                  exact_log2(Klass::_lh_header_size_mask + 1));
+          __ add(arr_size, arr_size, t1);
+          __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
+          __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+          __ eden_allocate(obj, arr_size, 0, t1, slow_path);  // preserves arr_size
+          __ incr_allocated_bytes(thread, arr_size, 0, rscratch1);
+
+          __ initialize_header(obj, klass, length, t1, t2);
+          __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
+          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
+          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
+          __ andr(t1, t1, Klass::_lh_header_size_mask);
+          __ sub(arr_size, arr_size, t1);  // body length
+          __ add(t1, t1, obj);       // body start
+          __ initialize_body(t1, arr_size, 0, t2);
+          __ verify_oop(obj);
+
+          __ ret(lr);
+
+          __ bind(slow_path);
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        int call_offset;
+        if (id == new_type_array_id) {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+        } else {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+        }
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+
+        __ verify_oop(obj);
+        __ leave();
+        __ ret(lr);
+
+        // r0: new array
+      }
+      break;
+
+    case new_multi_array_id:
+      { StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
+        // r0,: klass
+        // r19,: rank
+        // r2: address of 1st dimension
+        OopMap* map = save_live_registers(sasm);
+        __ mov(c_rarg1, r0);
+        __ mov(c_rarg3, r2);
+        __ mov(c_rarg2, r19);
+        int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), r1, r2, r3);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r0(sasm);
+
+        // r0,: new multi array
+        __ verify_oop(r0);
+      }
+      break;
+
+    case register_finalizer_id:
+      {
+        __ set_info("register_finalizer", dont_gc_arguments);
+
+        // This is called via call_runtime so the arguments
+        // will be place in C abi locations
+
+        __ verify_oop(c_rarg0);
+
+        // load the klass and check the has finalizer flag
+        Label register_finalizer;
+        Register t = r5;
+        __ load_klass(t, r0);
+        __ ldrw(t, Address(t, Klass::access_flags_offset()));
+        __ tst(t, JVM_ACC_HAS_FINALIZER);
+        __ br(Assembler::NE, register_finalizer);
+        __ ret(lr);
+
+        __ bind(register_finalizer);
+        __ enter();
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), r0);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        // Now restore all the live registers
+        restore_live_registers(sasm);
+
+        __ leave();
+        __ ret(lr);
+      }
+      break;
+
+    case throw_class_cast_exception_id:
+      { StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+      }
+      break;
+
+    case throw_incompatible_class_change_error_id:
+      { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+      }
+      break;
+
+    case slow_subtype_check_id:
+      {
+        // Typical calling sequence:
+        // __ push(klass_RInfo);  // object klass or other subclass
+        // __ push(sup_k_RInfo);  // array element klass or other superclass
+        // __ bl(slow_subtype_check);
+        // Note that the subclass is pushed first, and is therefore deepest.
+        enum layout {
+          r0_off, r0_off_hi,
+          r2_off, r2_off_hi,
+          r4_off, r4_off_hi,
+          r5_off, r5_off_hi,
+          sup_k_off, sup_k_off_hi,
+          klass_off, klass_off_hi,
+          framesize,
+          result_off = sup_k_off
+        };
+
+        __ set_info("slow_subtype_check", dont_gc_arguments);
+        __ push(RegSet::of(r0, r2, r4, r5), sp);
+
+        // This is called by pushing args and not with C abi
+        // __ ldr(r4, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
+        // __ ldr(r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
+
+        __ ldp(r4, r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size));
+
+        Label miss;
+        __ check_klass_subtype_slow_path(r4, r0, r2, r5, NULL, &miss);
+
+        // fallthrough on success:
+        __ mov(rscratch1, 1);
+        __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
+        __ pop(RegSet::of(r0, r2, r4, r5), sp);
+        __ ret(lr);
+
+        __ bind(miss);
+        __ str(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
+        __ pop(RegSet::of(r0, r2, r4, r5), sp);
+        __ ret(lr);
+      }
+      break;
+
+    case monitorenter_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorenter_id:
+      {
+        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(1, r0); // r0,: object
+        f.load_argument(0, r1); // r1,: lock address
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), r0, r1);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case monitorexit_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorexit_id:
+      {
+        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(0, r0); // r0,: lock address
+
+        // note: really a leaf routine but must setup last java sp
+        //       => use call_RT for now (speed can be improved by
+        //       doing last java sp setup manually)
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), r0);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case deoptimize_id:
+      {
+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize));
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm);
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        __ leave();
+        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+      }
+      break;
+
+    case throw_range_check_failed_id:
+      { StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+      }
+      break;
+
+    case unwind_exception_id:
+      { __ set_info("unwind_exception", dont_gc_arguments);
+        // note: no stubframe since we are about to leave the current
+        //       activation and we are calling a leaf VM function only.
+        generate_unwind_exception(sasm);
+      }
+      break;
+
+    case access_field_patching_id:
+      { StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+      }
+      break;
+
+    case load_klass_patching_id:
+      { StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+      }
+      break;
+
+    case load_mirror_patching_id:
+      { StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+      }
+      break;
+
+    case load_appendix_patching_id:
+      { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+      }
+      break;
+
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      { StubFrame f(sasm, "handle_exception", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case handle_exception_from_callee_id:
+      { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case throw_index_exception_id:
+      { StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+      }
+      break;
+
+    case throw_array_store_exception_id:
+      { StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
+        // tos + 0: link
+        //     + 1: return address
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+      }
+      break;
+
+#if INCLUDE_ALL_GCS
+
+// Registers to be saved around calls to g1_wb_pre or g1_wb_post
+#define G1_SAVE_REGS (RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2))
+
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ mov(r0, (int)id);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        const Register pre_val = r0;
+        const Register thread = rthread;
+        const Register tmp = rscratch1;
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        Label done;
+        Label runtime;
+
+        // Can we store original value in the thread's buffer?
+        __ ldr(tmp, queue_index);
+        __ cbz(tmp, runtime);
+
+        __ sub(tmp, tmp, wordSize);
+        __ str(tmp, queue_index);
+        __ ldr(rscratch2, buffer);
+        __ add(tmp, tmp, rscratch2);
+        f.load_argument(0, rscratch2);
+        __ str(rscratch2, Address(tmp, 0));
+        __ b(done);
+
+        __ bind(runtime);
+        __ push(G1_SAVE_REGS, sp);
+        f.load_argument(0, pre_val);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+        __ pop(G1_SAVE_REGS, sp);
+        __ bind(done);
+      }
+      break;
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+        // arg0: store_address
+        Address store_addr(rfp, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regions.
+        // Must check to see if card is already dirty
+
+        const Register thread = rthread;
+
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        const Register card_addr = rscratch2;
+        ExternalAddress cardtable((address) ct->byte_map_base);
+
+        f.load_argument(0, card_addr);
+        __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift);
+        unsigned long offset;
+        __ adrp(rscratch1, cardtable, offset);
+        __ add(card_addr, card_addr, rscratch1);
+        __ ldrb(rscratch1, Address(card_addr, offset));
+        __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+        __ br(Assembler::EQ, done);
+
+        assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
+
+        __ membar(Assembler::StoreLoad);
+        __ ldrb(rscratch1, Address(card_addr, offset));
+        __ cbzw(rscratch1, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+        __ strb(zr, Address(card_addr, offset));
+
+        __ ldr(rscratch1, queue_index);
+        __ cbz(rscratch1, runtime);
+        __ sub(rscratch1, rscratch1, wordSize);
+        __ str(rscratch1, queue_index);
+
+        const Register buffer_addr = r0;
+
+        __ push(RegSet::of(r0, r1), sp);
+        __ ldr(buffer_addr, buffer);
+        __ str(card_addr, Address(buffer_addr, rscratch1));
+        __ pop(RegSet::of(r0, r1), sp);
+        __ b(done);
+
+        __ bind(runtime);
+        __ push(G1_SAVE_REGS, sp);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+        __ pop(G1_SAVE_REGS, sp);
+        __ bind(done);
+
+      }
+      break;
+#endif
+
+    case predicate_failed_trap_id:
+      {
+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
+
+        OopMap* map = save_live_registers(sasm);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+      }
+      break;
+
+
+    default:
+      { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
+        __ mov(r0, (int)id);
+        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
+        __ should_not_reach_here();
+      }
+      break;
+    }
+  }
+  return oop_maps;
+}
+
+#undef __
+
+const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c1_globals_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+
+#ifndef TIERED
+define_pd_global(bool, BackgroundCompilation,        true );
+define_pd_global(bool, UseTLAB,                      true );
+define_pd_global(bool, ResizeTLAB,                   true );
+define_pd_global(bool, InlineIntrinsics,             true );
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 false);
+define_pd_global(bool, UseOnStackReplacement,        true );
+define_pd_global(bool, TieredCompilation,            false);
+#ifdef BUILTIN_SIM
+// We compile very aggressively with the builtin simulator because
+// doing so greatly reduces run times and tests more code.
+define_pd_global(intx, CompileThreshold,             150 );
+define_pd_global(intx, BackEdgeThreshold,            500);
+#else
+define_pd_global(intx, CompileThreshold,             1500 );
+define_pd_global(intx, BackEdgeThreshold,            100000);
+#endif
+
+define_pd_global(intx, OnStackReplacePercentage,     933  );
+define_pd_global(intx, FreqInlineSize,               325  );
+define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
+define_pd_global(intx, InitialCodeCacheSize,         160*K);
+define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
+define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
+define_pd_global(bool, ProfileInterpreter,           false);
+define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+define_pd_global(uintx, CodeCacheMinBlockLength,     1);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+define_pd_global(uintx, MetaspaceSize,               12*M );
+define_pd_global(bool, NeverActAsServerClassMachine, true );
+define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
+define_pd_global(bool, CICompileOSR,                 true );
+#endif // !TIERED
+define_pd_global(bool, UseTypeProfile,               false);
+define_pd_global(bool, RoundFPResults,               true );
+
+define_pd_global(bool, LIRFillDelaySlots,            false);
+define_pd_global(bool, OptimizeSinglePrecision,      true );
+define_pd_global(bool, CSEArrayLength,               false);
+define_pd_global(bool, TwoOperandLIRForm,            false );
+
+define_pd_global(intx, SafepointPollOffset,          0  );
+
+#endif // CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP
+#define CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).  Alpha-sorted.
+
+define_pd_global(bool, BackgroundCompilation,        true);
+define_pd_global(bool, UseTLAB,                      true);
+define_pd_global(bool, ResizeTLAB,                   true);
+define_pd_global(bool, CICompileOSR,                 true);
+define_pd_global(bool, InlineIntrinsics,             true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 true);
+define_pd_global(bool, UseOnStackReplacement,        true);
+#ifdef CC_INTERP
+define_pd_global(bool, ProfileInterpreter,           false);
+#else
+define_pd_global(bool, ProfileInterpreter,           true);
+#endif // CC_INTERP
+define_pd_global(bool, TieredCompilation,            trueInTiered);
+define_pd_global(intx, CompileThreshold,             10000);
+define_pd_global(intx, BackEdgeThreshold,            100000);
+
+define_pd_global(intx, OnStackReplacePercentage,     140);
+define_pd_global(intx, ConditionalMoveLimit,         3);
+define_pd_global(intx, FLOATPRESSURE,                64);
+define_pd_global(intx, FreqInlineSize,               325);
+define_pd_global(intx, MinJumpTableSize,             10);
+define_pd_global(intx, INTPRESSURE,                  25);
+define_pd_global(intx, InteriorEntryAlignment,       16);
+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+define_pd_global(intx, LoopUnrollLimit,              60);
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
+define_pd_global(intx, RegisterCostAreaRatio,        16000);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole,                 true);
+define_pd_global(bool, UseCISCSpill,                 true);
+define_pd_global(bool, OptoScheduling,               false);
+define_pd_global(bool, OptoBundling,                 false);
+
+define_pd_global(intx, ReservedCodeCacheSize,        48*M);
+define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
+define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
+define_pd_global(uintx, CodeCacheMinBlockLength,     4);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+
+// Heap related flags
+define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed.
+
+#endif // CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c2_init_aarch64.cpp	Tue Jan 20 12:47:43 2015 -0800
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+
+// processor dependent initialization for i486
+
+void Compile::pd_compiler2_init() {
+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
+  // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could
+  // simply be left out.
+}